diff --git a/docs/content/docs.md b/docs/content/docs.md index 66777e1b8..7cfffe2b4 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -425,6 +425,126 @@ This can be used when scripting to make aged backups efficiently, e.g. rclone sync -i remote:current-backup remote:previous-backup rclone sync -i /path/to/files remote:current-backup +## Metadata support {#metadata} + +Metadata is data about a file which isn't the contents of the file. +Normally rclone only preserves the modification time and the content +(MIME) type where possible. + +Rclone supports preserving all the available metadata on files (not +directories) when using the `--metadata` or `-M` flag. + +Exactly what metadata is supported and what that support means depends +on the backend. Backends that support metadata have a metadata section +in their docs and are listed in the [features table](/overview/#features) +(Eg [local](/local/#metadata), [s3](/s3/#metadata)) + +Rclone only supports a one-time sync of metadata. This means that +metadata will be synced from the source object to the destination +object only when the source object has changed and needs to be +re-uploaded. If the metadata subsequently changes on the source object +without changing the object itself then it won't be synced to the +destination object. This is in line with the way rclone syncs +`Content-Type` without the `--metadata` flag. + +Using `--metadata` when syncing from local to local will preserve file +attributes such as file mode, owner, extended attributes (not +Windows). + +Note that arbitrary metadata may be added to objects using the +`--upload-metadata key=value` flag when the object is first uploaded. +This flag can be repeated as many times as necessary. + +### Types of metadata + +Metadata is divided into two type. System metadata and User metadata. + +Metadata which the backend uses itself is called system metadata. For +example on the local backend the system metadata `uid` will store the +user ID of the file when used on a unix based platform. + +Arbitrary metadata is called user metadata and this can be set however +is desired. + +When objects are copied from backend to backend, they will attempt to +interpret system metadata if it is supplied. Metadata may change from +being user metadata to system metadata as objects are copied between +different backends. For example copying an object from s3 sets the +`content-type` metadata. In a backend which understands this (like +`azureblob`) this will become the Content-Type of the object. In a +backend which doesn't understand this (like the `local` backend) this +will become user metadata. However should the local object be copied +back to s3, the Content-Type will be set correctly. + +### Metadata framework + +Rclone implements a metadata framework which can read metadata from an +object and write it to the object when (and only when) it is being +uploaded. + +This metadata is stored as a dictionary with string keys and string +values. + +There are some limits on the names of the keys (these may be clarified +further in the future). + +- must be lower case +- may be `a-z` `0-9` containing `.` `-` or `_` +- length is backend dependent + +Each backend can provide system metadata that it understands. Some +backends can also store arbitrary user metadata. + +Where possible the key names are standardized, so, for example, it is +possible to copy object metadata from s3 to azureblob for example and +metadata will be translated apropriately. + +Some backends have limits on the size of the metadata and rclone will +give errors on upload if they are exceeded. + +### Metadata preservation + +The goal of the implementation is to + +1. Preserve metadata if at all possible +2. Interpret metadata if at all possible + +The consequences of 1 is that you can copy an S3 object to a local +disk then back to S3 losslessly. Likewise you can copy a local file +with file attributes and xattrs from local disk to s3 and back again +losslessly. + +The consequence of 2 is that you can copy an S3 object with metadata +to Azureblob (say) and have the metadata appear on the Azureblob +object also. + +### Standard system metadata + +Here is a table of standard system metadata which, if appropriate, a +backend may implement. + +| key | description | example | +|---------------------|-------------|---------| +| mode | File type and mode: octal, unix style | 0100664 | +| uid | User ID of owner: decimal number | 500 | +| gid | Group ID of owner: decimal number | 500 | +| rdev | Device ID (if special file) => hexadecimal | 0 | +| atime | Time of last access: RFC 3339 | 2006-01-02T15:04:05.999999999Z07:00 | +| mtime | Time of last modification: RFC 3339 | 2006-01-02T15:04:05.999999999Z07:00 | +| btime | Time of file creation (birth): RFC 3339 | 2006-01-02T15:04:05.999999999Z07:00 | +| cache-control | Cache-Control header | no-cache | +| content-disposition | Content-Disposition header | inline | +| content-encoding | Content-Encoding header | gzip | +| content-language | Content-Language header | en-US | +| content-type | Content-Type header | text/plain | + +The metadata keys `mtime` and `content-type` will take precedence if +supplied in the metadata over reading the `Content-Type` or +modification time of the source object. + +Hashes are not included in system metadata as there is a well defined +way of reading those already. + Options ------- @@ -1206,6 +1326,12 @@ When the limit is reached all transfers will stop immediately. Rclone will exit with exit code 8 if the transfer limit is reached. +## --metadata / -M + +Setting this flag enables rclone to copy the metadata from the source +to the destination. For local backends this is ownership, permissions, +xattr etc. See the [#metadata](metadata section) for more info. + ### --cutoff-mode=hard|soft|cautious ### This modifies the behavior of `--max-transfer` diff --git a/docs/content/overview.md b/docs/content/overview.md index c107723ea..74194f28f 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -14,48 +14,48 @@ show through. Here is an overview of the major features of each cloud storage system. -| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | -| ---------------------------- |:----------------:|:-------:|:----------------:|:---------------:|:---------:| -| 1Fichier | Whirlpool | - | No | Yes | R | -| Akamai Netstorage | MD5, SHA256 | R/W | No | No | R | -| Amazon Drive | MD5 | - | Yes | No | R | -| Amazon S3 (or S3 compatible) | MD5 | R/W | No | No | R/W | -| Backblaze B2 | SHA1 | R/W | No | No | R/W | -| Box | SHA1 | R/W | Yes | No | - | -| Citrix ShareFile | MD5 | R/W | Yes | No | - | -| Dropbox | DBHASH ¹ | R | Yes | No | - | -| Enterprise File Fabric | - | R/W | Yes | No | R/W | -| FTP | - | R/W ¹⁰ | No | No | - | -| Google Cloud Storage | MD5 | R/W | No | No | R/W | -| Google Drive | MD5 | R/W | No | Yes | R/W | -| Google Photos | - | - | No | Yes | R | -| HDFS | - | R/W | No | No | - | -| HTTP | - | R | No | No | R | -| Hubic | MD5 | R/W | No | No | R/W | -| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | -| Jottacloud | MD5 | R/W | Yes | No | R | -| Koofr | MD5 | - | Yes | No | - | -| Mail.ru Cloud | Mailru ⁶ | R/W | Yes | No | - | -| Mega | - | - | No | Yes | - | -| Memory | MD5 | R/W | No | No | - | -| Microsoft Azure Blob Storage | MD5 | R/W | No | No | R/W | -| Microsoft OneDrive | SHA1 ⁵ | R/W | Yes | No | R | -| OpenDrive | MD5 | R/W | Yes | Partial ⁸ | - | -| OpenStack Swift | MD5 | R/W | No | No | R/W | -| pCloud | MD5, SHA1 ⁷ | R | No | No | W | -| premiumize.me | - | - | Yes | No | R | -| put.io | CRC-32 | R/W | No | Yes | R | -| QingStor | MD5 | - ⁹ | No | No | R/W | -| Seafile | - | - | No | No | - | -| SFTP | MD5, SHA1 ² | R/W | Depends | No | - | -| Sia | - | - | No | No | - | -| SugarSync | - | - | No | No | - | -| Storj | - | R | No | No | - | -| Uptobox | - | - | No | Yes | - | -| WebDAV | MD5, SHA1 ³ | R ⁴ | Depends | No | - | -| Yandex Disk | MD5 | R/W | No | No | R | -| Zoho WorkDrive | - | - | No | No | - | -| The local filesystem | All | R/W | Depends | No | - | +| Name | Hash | ModTime | Case Insensitive | Duplicate Files | MIME Type | Metadata | +| ---------------------------- |:----------------:|:-------:|:----------------:|:---------------:|:---------:|:--------:| +| 1Fichier | Whirlpool | - | No | Yes | R | - | +| Akamai Netstorage | MD5, SHA256 | R/W | No | No | R | - | +| Amazon Drive | MD5 | - | Yes | No | R | - | +| Amazon S3 (or S3 compatible) | MD5 | R/W | No | No | R/W | RWU | +| Backblaze B2 | SHA1 | R/W | No | No | R/W | - | +| Box | SHA1 | R/W | Yes | No | - | - | +| Citrix ShareFile | MD5 | R/W | Yes | No | - | - | +| Dropbox | DBHASH ¹ | R | Yes | No | - | - | +| Enterprise File Fabric | - | R/W | Yes | No | R/W | - | +| FTP | - | R/W ¹⁰ | No | No | - | - | +| Google Cloud Storage | MD5 | R/W | No | No | R/W | - | +| Google Drive | MD5 | R/W | No | Yes | R/W | - | +| Google Photos | - | - | No | Yes | R | - | +| HDFS | - | R/W | No | No | - | - | +| HTTP | - | R | No | No | R | - | +| Hubic | MD5 | R/W | No | No | R/W | - | +| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | - | +| Jottacloud | MD5 | R/W | Yes | No | R | - | +| Koofr | MD5 | - | Yes | No | - | - | +| Mail.ru Cloud | Mailru ⁶ | R/W | Yes | No | - | - | +| Mega | - | - | No | Yes | - | - | +| Memory | MD5 | R/W | No | No | - | - | +| Microsoft Azure Blob Storage | MD5 | R/W | No | No | R/W | - | +| Microsoft OneDrive | SHA1 ⁵ | R/W | Yes | No | R | - | +| OpenDrive | MD5 | R/W | Yes | Partial ⁸ | - | - | +| OpenStack Swift | MD5 | R/W | No | No | R/W | - | +| pCloud | MD5, SHA1 ⁷ | R | No | No | W | - | +| premiumize.me | - | - | Yes | No | R | - | +| put.io | CRC-32 | R/W | No | Yes | R | - | +| QingStor | MD5 | - ⁹ | No | No | R/W | - | +| Seafile | - | - | No | No | - | - | +| SFTP | MD5, SHA1 ² | R/W | Depends | No | - | - | +| Sia | - | - | No | No | - | - | +| SugarSync | - | - | No | No | - | - | +| Storj | - | R | No | No | - | - | +| Uptobox | - | - | No | Yes | - | - | +| WebDAV | MD5, SHA1 ³ | R ⁴ | Depends | No | - | - | +| Yandex Disk | MD5 | R/W | No | No | R | - | +| Zoho WorkDrive | - | - | No | No | - | - | +| The local filesystem | All | R/W | Depends | No | - | RWU | ### Notes @@ -438,6 +438,22 @@ remote which supports writing (`W`) then rclone will preserve the MIME types. Otherwise they will be guessed from the extension, or the remote itself may assign the MIME type. +### Metadata + +Backends may or may support reading or writing metadata. They may +support reading and writing system metadata (metadata intrinsic to +that backend) and/or user metadata (general purpose metadata). + +The levels of metadata support are + +| Key | Explanation | +|-----|-------------| +| `R` | Read only System Metadata | +| `RW` | Read and write System Metadata | +| `RWU` | Read and write System Metadata and read and write User Metadata | + +See [the metadata docs](/docs/#metadata) for more info. + ## Optional Features ## All rclone remotes support a base command set. Other features depend diff --git a/fs/features.go b/fs/features.go index 6db6bd548..6fce02d60 100644 --- a/fs/features.go +++ b/fs/features.go @@ -26,6 +26,9 @@ type Features struct { IsLocal bool // is the local backend SlowModTime bool // if calling ModTime() generally takes an extra transaction SlowHash bool // if calling Hash() generally takes an extra transaction + ReadMetadata bool // can read metadata from objects + WriteMetadata bool // can write metadata to objects + UserMetadata bool // can read/write general purpose metadata // Purge all files in the directory specified // @@ -305,6 +308,9 @@ func (ft *Features) Mask(ctx context.Context, f Fs) *Features { ft.DuplicateFiles = ft.DuplicateFiles && mask.DuplicateFiles ft.ReadMimeType = ft.ReadMimeType && mask.ReadMimeType ft.WriteMimeType = ft.WriteMimeType && mask.WriteMimeType + ft.ReadMetadata = ft.ReadMetadata && mask.ReadMetadata + ft.WriteMetadata = ft.WriteMetadata && mask.WriteMetadata + ft.UserMetadata = ft.UserMetadata && mask.UserMetadata ft.CanHaveEmptyDirectories = ft.CanHaveEmptyDirectories && mask.CanHaveEmptyDirectories ft.BucketBased = ft.BucketBased && mask.BucketBased ft.BucketBasedRootOK = ft.BucketBasedRootOK && mask.BucketBasedRootOK diff --git a/fs/metadata.go b/fs/metadata.go new file mode 100644 index 000000000..dcdb517dc --- /dev/null +++ b/fs/metadata.go @@ -0,0 +1,29 @@ +package fs + +import "context" + +// Metadata represents Object metadata in a standardised form +// +// See docs/content/metadata.md for the interpretation of the keys +type Metadata map[string]string + +// Set k to v on m +// +// If m is nil, then it will get made +func (m *Metadata) Set(k, v string) { + if *m == nil { + *m = make(Metadata, 1) + } + (*m)[k] = v +} + +// GetMetadata from an ObjectInfo +// +// If the object has no metadata then metadata will be nil +func GetMetadata(ctx context.Context, o ObjectInfo) (metadata Metadata, err error) { + do, ok := o.(Metadataer) + if !ok { + return nil, nil + } + return do.Metadata(ctx) +} diff --git a/fs/metadata_test.go b/fs/metadata_test.go new file mode 100644 index 000000000..bc72c3964 --- /dev/null +++ b/fs/metadata_test.go @@ -0,0 +1,17 @@ +package fs + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestMetadataSet(t *testing.T) { + var m Metadata + assert.Nil(t, m) + m.Set("key", "value") + assert.NotNil(t, m) + assert.Equal(t, "value", m["key"]) + m.Set("key", "value2") + assert.Equal(t, "value2", m["key"]) +} diff --git a/fs/operations/operations.go b/fs/operations/operations.go index bdeeaf036..a9c2b524c 100644 --- a/fs/operations/operations.go +++ b/fs/operations/operations.go @@ -352,6 +352,16 @@ func (o *OverrideRemote) GetTier() string { return "" } +// Metadata returns metadata for an object +// +// It should return nil if there is no Metadata +func (o *OverrideRemote) Metadata(ctx context.Context) (fs.Metadata, error) { + if do, ok := o.ObjectInfo.(fs.Metadataer); ok { + return do.Metadata(ctx) + } + return nil, nil +} + // Check all optional interfaces satisfied var _ fs.FullObjectInfo = (*OverrideRemote)(nil) diff --git a/fs/types.go b/fs/types.go index b340ce67b..88ff5e8cd 100644 --- a/fs/types.go +++ b/fs/types.go @@ -183,6 +183,14 @@ type GetTierer interface { GetTier() string } +// Metadataer is an optional interface for Object +type Metadataer interface { + // Metadata returns metadata for an object + // + // It should return nil if there is no Metadata + Metadata(ctx context.Context) (Metadata, error) +} + // FullObjectInfo contains all the read-only optional interfaces // // Use for checking making wrapping ObjectInfos implement everything @@ -192,6 +200,7 @@ type FullObjectInfo interface { IDer ObjectUnWrapper GetTierer + Metadataer } // FullObject contains all the optional interfaces for Object @@ -204,6 +213,7 @@ type FullObject interface { ObjectUnWrapper GetTierer SetTierer + Metadataer } // ObjectOptionalInterfaces returns the names of supported and @@ -232,6 +242,9 @@ func ObjectOptionalInterfaces(o Object) (supported, unsupported []string) { _, ok = o.(GetTierer) store(ok, "GetTier") + _, ok = o.(Metadataer) + store(ok, "Metadata") + return supported, unsupported } diff --git a/fstest/fstests/fstests.go b/fstest/fstests/fstests.go index d9de908c1..47b841bca 100644 --- a/fstest/fstests/fstests.go +++ b/fstest/fstests/fstests.go @@ -157,13 +157,15 @@ func retry(t *testing.T, what string, f func() error) { type objectInfoWithMimeType struct { fs.ObjectInfo mimeType string + metadata fs.Metadata } // Return a wrapped fs.ObjectInfo which returns the mime type given -func overrideMimeType(o fs.ObjectInfo, mimeType string) fs.ObjectInfo { +func overrideMimeType(o fs.ObjectInfo, mimeType string, metadata fs.Metadata) fs.ObjectInfo { return &objectInfoWithMimeType{ ObjectInfo: o, mimeType: mimeType, + metadata: metadata, } } @@ -172,13 +174,25 @@ func (o *objectInfoWithMimeType) MimeType(ctx context.Context) string { return o.mimeType } -// check interface -var _ fs.MimeTyper = (*objectInfoWithMimeType)(nil) +// Metadata that was overridden +func (o *objectInfoWithMimeType) Metadata(ctx context.Context) (fs.Metadata, error) { + return o.metadata, nil +} -// putTestContentsMimeType puts file with given contents to the remote and checks it but unlike TestPutLarge doesn't remove +// check interfaces +var ( + _ fs.MimeTyper = (*objectInfoWithMimeType)(nil) + _ fs.Metadataer = (*objectInfoWithMimeType)(nil) +) + +// check interface + +// PutTestContentsMetadata puts file with given contents to the remote and checks it but unlike TestPutLarge doesn't remove // -// it uploads the object with the mimeType passed in if set -func putTestContentsMimeType(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, contents string, check bool, mimeType string) fs.Object { +// It uploads the object with the mimeType and metadata passed in if set +// +// It returns the object which will have been checked if check is set +func PutTestContentsMetadata(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, contents string, check bool, mimeType string, metadata fs.Metadata) fs.Object { var ( err error obj fs.Object @@ -191,14 +205,21 @@ func putTestContentsMimeType(ctx context.Context, t *testing.T, f fs.Fs, file *f file.Size = int64(buf.Len()) obji := object.NewStaticObjectInfo(file.Path, file.ModTime, file.Size, true, nil, nil) - if mimeType != "" { - obji = overrideMimeType(obji, mimeType) + if mimeType != "" || metadata != nil { + obji = overrideMimeType(obji, mimeType, metadata) } obj, err = f.Put(ctx, in, obji) return err }) file.Hashes = uploadHash.Sums() if check { + // Overwrite time with that in metadata if it is already specified + mtime, ok := metadata["mtime"] + if ok { + modTime, err := time.Parse(time.RFC3339Nano, mtime) + require.NoError(t, err) + file.ModTime = modTime + } file.Check(t, obj, f.Precision()) // Re-read the object and check again obj = findObject(ctx, t, f, file.Path) @@ -209,7 +230,7 @@ func putTestContentsMimeType(ctx context.Context, t *testing.T, f fs.Fs, file *f // PutTestContents puts file with given contents to the remote and checks it but unlike TestPutLarge doesn't remove func PutTestContents(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, contents string, check bool) fs.Object { - return putTestContentsMimeType(ctx, t, f, file, contents, check, "") + return PutTestContentsMetadata(ctx, t, f, file, contents, check, "", nil) } // testPut puts file with random contents to the remote @@ -219,9 +240,9 @@ func testPut(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item) (str } // testPutMimeType puts file with random contents to the remote and the mime type given -func testPutMimeType(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, mimeType string) (string, fs.Object) { +func testPutMimeType(ctx context.Context, t *testing.T, f fs.Fs, file *fstest.Item, mimeType string, metadata fs.Metadata) (string, fs.Object) { contents := random.String(100) - return contents, putTestContentsMimeType(ctx, t, f, file, contents, true, mimeType) + return contents, PutTestContentsMetadata(ctx, t, f, file, contents, true, mimeType, metadata) } // TestPutLarge puts file to the remote, checks it and removes it on success. @@ -350,6 +371,7 @@ func Run(t *testing.T, opt *Opt) { } file1Contents string file1MimeType = "text/csv" + file1Metadata = fs.Metadata{"rclone-test": "potato"} file2 = fstest.Item{ ModTime: fstest.Time("2001-02-03T04:05:10.123123123Z"), Path: `hello? sausage/êé/Hello, 世界/ " ' @ < > & ? + ≠/z.txt`, @@ -868,7 +890,7 @@ func Run(t *testing.T, opt *Opt) { skipIfNotOk(t) file1Contents, _ = testPut(ctx, t, f, &file1) /* file2Contents = */ testPut(ctx, t, f, &file2) - file1Contents, _ = testPutMimeType(ctx, t, f, &file1, file1MimeType) + file1Contents, _ = testPutMimeType(ctx, t, f, &file1, file1MimeType, file1Metadata) // Note that the next test will check there are no duplicated file names // TestFsListDirFile2 tests the files are correctly uploaded by doing @@ -1357,6 +1379,76 @@ func Run(t *testing.T, opt *Opt) { } }) + // TestObjectMetadata tests the Metadata of the object is correct + t.Run("ObjectMetadata", func(t *testing.T) { + skipIfNotOk(t) + features := f.Features() + obj := findObject(ctx, t, f, file1.Path) + do, ok := obj.(fs.Metadataer) + if !ok { + require.False(t, features.ReadMetadata, "Features.ReadMetadata is set but Object.Metadata method not found") + t.Skip("Metadata method not supported") + } + metadata, err := do.Metadata(ctx) + require.NoError(t, err) + // check standard metadata + for k, v := range metadata { + switch k { + case "atime", "btime", "mtime": + mtime, err := time.Parse(time.RFC3339Nano, v) + require.NoError(t, err) + if k == "mtime" { + fstest.AssertTimeEqualWithPrecision(t, file1.Path, file1.ModTime, mtime, f.Precision()) + } + } + } + if !features.ReadMetadata { + if metadata != nil { + require.Equal(t, "", metadata, "Features.ReadMetadata is not set but Object.Metadata returned a non nil Metadata") + } + } else if features.WriteMetadata { + require.NotNil(t, metadata) + if features.UserMetadata { + // check all the metadata bits we uploaded are present - there may be more we didn't write + for k, v := range file1Metadata { + assert.Equal(t, v, metadata[k], "can read and write metadata but failed on key %q", k) + } + } + // Now test we can set the mtime and content-type via the metadata and these take precedence + t.Run("mtime", func(t *testing.T) { + path := "metadatatest" + mtimeModTime := fstest.Time("2002-02-03T04:05:06.499999999Z") + modTime := fstest.Time("2003-02-03T04:05:06.499999999Z") + item := fstest.NewItem(path, path, modTime) + metaMimeType := "application/zip" + mimeType := "application/gzip" + metadata := fs.Metadata{ + "mtime": mtimeModTime.Format(time.RFC3339Nano), + "content-type": metaMimeType, + } + // This checks the mtime is correct also and returns the re-read object + _, obj := testPutMimeType(ctx, t, f, &item, mimeType, metadata) + defer func() { + assert.NoError(t, obj.Remove(ctx)) + }() + // Check content-type got updated too + if features.ReadMimeType && features.WriteMimeType { + // read the object from scratch + o, err := f.NewObject(ctx, path) + require.NoError(t, err) + + // Check the mimetype is correct + do, ok := o.(fs.MimeTyper) + require.True(t, ok) + gotMimeType := do.MimeType(ctx) + assert.Equal(t, metaMimeType, gotMimeType) + } + }) + } else { + // Have some metadata here we didn't write - can't really check it! + } + }) + // TestObjectSetModTime tests that SetModTime works t.Run("ObjectSetModTime", func(t *testing.T) { skipIfNotOk(t)