chunker: finish meta-format before release

changes: - chunker: remove GetTier and SetTier - remove wdmrcompat metaformat - remove fastopen strategy - make hash_type option non-advanced - adverise hash support when possible - add metadata field "ver", run strict checks - describe internal behavior in comments - improve documentation note: wdmrcompat used to write file name in the metadata, so maximum metadata size was 1K; removing it allows to cap size by 200 bytes now.
2024-11-22 10:23:16 +08:00 · 2019-09-25 02:18:30 +03:00 · 2019-09-25 02:18:30 +03:00 · ccecfa9cb1
commit ccecfa9cb1
parent c41812fc88
5 changed files with 303 additions and 312 deletions
--- a/backend/chunker/chunker.go
+++ b/backend/chunker/chunker.go
@ -36,13 +36,11 @@ const (
 	// WARNING: this optimization is not transaction safe!
 	optimizeFirstChunk = false
-	// Normally metadata is a small (less than 1KB) piece of JSON.
+	// Normally metadata is a small (100-200 bytes) piece of JSON.
 	// Valid metadata size should not exceed this limit.
-	maxMetaDataSize = 1023
+	maxMetaDataSize = 199
-	// fastopen strategy opens all chunks immediately, but reads sequentially.
+	metaDataVersion = 1
 	// linear strategy opens and reads chunks sequentially, without read-ahead.
 	downloadStrategy = "linear"
 )
 // Formatting of temporary chunk names. Temporary suffix *follows* chunk
@ -52,6 +50,13 @@ var (
 	tempChunkRegexp = regexp.MustCompile(`^(.+)\.\.tmp_([0-9]{10,19})$`)
 )
 // Note: metadata logic is tightly coupled with chunker code in many
 // places of the code, eg. in checks whether a file can have meta object
 // or is eligible for chunking.
 // If more metadata formats (or versions of a format) are added in future,
 // it may be advisable to factor it into a "metadata strategy" interface
 // similar to chunkingReader or linearReader below.
 // Register with Fs
 func init() {
 	fs.Register(&fs.RegInfo{
@ -98,16 +103,10 @@ Metadata is a small JSON file named after the composite file.`,
 				Value: "simplejson",
 				Help: `Simple JSON supports hash sums and chunk validation.
 It has the following fields: size, nchunks, md5, sha1.`,
 			}, {
 				Value: "wdmrcompat",
 				Help: `This format brings compatibility with WebDavMailRuCloud.
 It does not support hash sums or validation, most fields are ignored.
 It has the following fields: Name, Size, PublicKey, CreationDate.
 Requires hash type "none".`,
 			}},
 		}, {
 			Name:     "hash_type",
-			Advanced: true,
+			Advanced: false,
 			Default:  "md5",
 			Help:     `Choose how chunker handles hash sums.`,
 			Examples: []fs.OptionExample{{
@ -122,8 +121,8 @@ for a single-chunk file but returns nothing otherwise.`,
 				Help:  `SHA1 for multi-chunk files. Requires "simplejson".`,
 			}, {
 				Value: "md5quick",
-				Help: `When a file is copied on to chunker, MD5 is taken from its source
+				Help: `Copying a file to chunker will request MD5 from the source
-falling back to SHA1 if the source doesn't support it. Requires "simplejson".`,
+falling back to SHA1 if unsupported. Requires "simplejson".`,
 			}, {
 				Value: "sha1quick",
 				Help:  `Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".`,
@ -188,7 +187,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
 	switch opt.MetaFormat {
 	case "none":
 		f.useMeta = false
-	case "simplejson", "wdmrcompat":
+	case "simplejson":
 		f.useMeta = true
 	default:
 		return nil, fmt.Errorf("unsupported meta format '%s'", opt.MetaFormat)
@ -243,8 +242,6 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) {
 		WriteMimeType:           true,
 		BucketBased:             true,
 		CanHaveEmptyDirectories: true,
 		SetTier:                 true,
 		GetTier:                 true,
 		ServerSideAcrossConfigs: true,
 	}).Fill(f).Mask(baseFs).WrapsFs(f, baseFs)
@ -393,6 +390,19 @@ func (f *Fs) parseChunkName(name string) (mainName string, chunkNo int, tempNo i
 //
 // This should return ErrDirNotFound if the directory isn't
 // found.
 //
 // Commands normally cleanup all temporary chunks in case of a failure.
 // However, if rclone dies unexpectedly, it can leave behind a bunch of
 // hidden temporary chunks. List and its underlying chunkEntries()
 // silently skip all temporary chunks in the directory. It's okay if
 // they belong to an unfinished command running in parallel.
 //
 // However, there is no way to discover dead temporary chunks a.t.m.
 // As a workaround users can use `purge` to forcibly remove the whole
 // directory together with dead chunks.
 // In future a flag named like `--chunker-list-hidden` may be added to
 // rclone that will tell List to reveal hidden chunks.
 //
 func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
 	entries, err = f.base.List(ctx, dir)
 	if err != nil {
@ -428,7 +438,8 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (
 	})
 }
-// Add some directory entries.  This alters entries returning it as newEntries.
+// chunkEntries is called by List(R). It merges chunk entries from
 // wrapped remote into composite directory entries.
 func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardErrors bool) (chunkedEntries fs.DirEntries, err error) {
 	// sort entries, so that meta objects (if any) appear before their chunks
 	sortedEntries := make(fs.DirEntries, len(origEntries))
@ -514,6 +525,11 @@ func (f *Fs) chunkEntries(ctx context.Context, origEntries fs.DirEntries, hardEr
 }
 // NewObject finds the Object at remote.
 //
 // Please note that every NewObject invocation will scan the whole directory.
 // Using here something like fs.DirCache might improve performance (and make
 // logic more complex though).
 //
 func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
 	if mainRemote, _, _ := f.parseChunkName(remote); mainRemote != "" {
 		return nil, fmt.Errorf("%q should be meta object, not a chunk", remote)
@ -622,23 +638,14 @@ func (o *Object) readMetaData(ctx context.Context) error {
 	case "simplejson":
 		metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metaData)
 		if err != nil {
-			// TODO: maybe it's a small single chunk?
+			// TODO: in a rare case we might mistake a small file for metadata
-			return err
+			return errors.Wrap(err, "invalid metadata")
 		}
 		if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks {
-			return errors.New("invalid simplejson metadata")
+			return errors.New("metadata doesn't match file size")
 		}
 		o.md5 = metaInfo.md5
 		o.sha1 = metaInfo.sha1
 	case "wdmrcompat":
 		metaInfo, err := unmarshalWDMRCompat(ctx, metaObject, metaData)
 		if err != nil {
 			// TODO: maybe it's a small single chunk?
 			return err
 		}
 		if o.size != metaInfo.Size() {
 			return errors.New("invalid wdmrcompat metadata")
 		}
 	}
 	o.isFull = true
@ -784,9 +791,6 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
 	case "simplejson":
 		c.updateHashes()
 		metaData, err = marshalSimpleJSON(ctx, sizeTotal, len(c.chunks), c.md5, c.sha1)
 	case "wdmrcompat":
 		fileInfo := f.wrapInfo(src, baseRemote, sizeTotal)
 		metaData, err = marshalWDMRCompat(ctx, fileInfo)
 	}
 	if err == nil {
 		metaInfo := f.wrapInfo(src, baseRemote, int64(len(metaData)))
@ -951,6 +955,9 @@ func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, opt
 // Update in to the object with the modTime given of the given size
 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
 	if err := o.readMetaData(ctx); err != nil {
 		return err
 	}
 	basePut := o.f.base.Put
 	if src.Size() < 0 {
 		basePut = o.f.base.Features().PutStream
@ -989,8 +996,17 @@ func (f *Fs) Precision() time.Duration {
 }
 // Hashes returns the supported hash sets.
 // Chunker advertises a hash type if and only if it can be calculated
 // for files of any size, multi-chunked or small.
 func (f *Fs) Hashes() hash.Set {
-	return hash.Set(hash.None)
+	// composites && all of them && small files supported by wrapped remote
 	if f.useMD5 && !f.quickHash && f.base.Hashes().Contains(hash.MD5) {
 		return hash.NewHashSet(hash.MD5)
 	}
 	if f.useSHA1 && !f.quickHash && f.base.Hashes().Contains(hash.SHA1) {
 		return hash.NewHashSet(hash.SHA1)
 	}
 	return hash.NewHashSet() // can't provide strong guarantees
 }
 // Mkdir makes the directory (container, bucket)
@ -1012,7 +1028,12 @@ func (f *Fs) Rmdir(ctx context.Context, dir string) error {
 // Implement this if you have a way of deleting all the files
 // quicker than just running Remove() on the result of List()
 //
-// Return an error if it doesn't exist
+// Return an error if it doesn't exist.
 //
 // This command will chain to `purge` from wrapped remote.
 // As a result it removes not only chunker files with their
 // active chunks but also all hidden chunks in the directory.
 //
 func (f *Fs) Purge(ctx context.Context) error {
 	do := f.base.Features().Purge
 	if do == nil {
@ -1021,7 +1042,25 @@ func (f *Fs) Purge(ctx context.Context) error {
 	return do(ctx)
 }
-// Remove an object
+// Remove an object (chunks and metadata, if any)
 //
 // Remove deletes only active chunks of the object.
 // It does not try to look for temporary chunks because they could belong
 // to another command modifying this composite file in parallel.
 //
 // Commands normally cleanup all temporary chunks in case of a failure.
 // However, if rclone dies unexpectedly, it can leave hidden temporary
 // chunks, which cannot be discovered using the `list` command.
 // Remove does not try to search for such chunks or delete them.
 // Sometimes this can lead to strange results eg. when `list` shows that
 // directory is empty but `rmdir` refuses to remove it because on the
 // level of wrapped remote it's actually *not* empty.
 // As a workaround users can use `purge` to forcibly remove it.
 //
 // In future, a flag `--chunker-delete-hidden` may be added which tells
 // Remove to search directory for hidden chunks and remove them too
 // (at the risk of breaking parallel commands).
 //
 func (o *Object) Remove(ctx context.Context) (err error) {
 	if o.main != nil {
 		err = o.main.Remove(ctx)
@ -1095,13 +1134,6 @@ func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMo
 			metaInfo := f.wrapInfo(metaObject, "", int64(len(metaData)))
 			err = newObj.main.Update(ctx, bytes.NewReader(metaData), metaInfo)
 		}
 	case "wdmrcompat":
 		newInfo := f.wrapInfo(metaObject, "", newObj.size)
 		metaData, err = marshalWDMRCompat(ctx, newInfo)
 		if err == nil {
 			metaInfo := f.wrapInfo(metaObject, "", int64(len(metaData)))
 			err = newObj.main.Update(ctx, bytes.NewReader(metaData), metaInfo)
 		}
 	case "none":
 		if newObj.main != nil {
 			err = newObj.main.Remove(ctx)
@ -1436,7 +1468,22 @@ func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error {
 // Hash returns the selected checksum of the file.
 // If no checksum is available it returns "".
-// It prefers the wrapped hashsum for a non-chunked file, then tries saved one.
+//
 // Hash prefers wrapped hashsum for a non-chunked file, then tries to
 // read it from metadata. This in theory handles an unusual case when
 // a small file is modified on the lower level by wrapped remote
 // but chunker is not yet aware of changes.
 //
 // Currently metadata (if not configured as 'none') is kept only for
 // multi-chunk files, but for small files chunker obtains hashsums from
 // wrapped remote. If a particular hashsum type is not supported,
 // chunker won't fail with `unsupported` error but return empty hash.
 //
 // In future metadata logic can be extended: if a normal (non-quick)
 // hash type is configured, chunker will check whether wrapped remote
 // supports it (see Fs.Hashes as an example). If not, it will add metadata
 // to small files as well, thus providing hashsums for all files.
 //
 func (o *Object) Hash(ctx context.Context, hashType hash.Type) (string, error) {
 	if !o.isChunked() {
 		// First, chain to the single wrapped chunk, if possible.
@ -1500,78 +1547,10 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.Read
 		limit = o.size - offset
 	}
-	switch downloadStrategy {
+	return o.newLinearReader(ctx, offset, limit, openOptions)
 	case "linear":
 		return o.newLinearReader(ctx, offset, limit, openOptions)
 	case "fastopen":
 		return o.newFastopenReader(ctx, offset, limit, openOptions)
 	default:
 		return nil, errors.New("invalid download strategy")
 	}
 }
-// fastopenReader opens all chunks immediately, but reads sequentlially
+// linearReader opens and reads file chunks sequentially, without read-ahead
 type fastopenReader struct {
 	readClosers []io.ReadCloser
 	multiReader io.Reader
 }
 func (o *Object) newFastopenReader(ctx context.Context, offset, limit int64, options []fs.OpenOption) (io.ReadCloser, error) {
 	var (
 		readers     []io.Reader
 		readClosers []io.ReadCloser
 	)
 	for _, chunk := range o.chunks {
 		if limit <= 0 {
 			break
 		}
 		count := chunk.Size()
 		if offset >= count {
 			offset -= count
 			continue
 		}
 		count -= offset
 		if limit < count {
 			count = limit
 		}
 		end := offset + count - 1
 		chunkOptions := append(options, &fs.RangeOption{Start: offset, End: end})
 		rc, err := chunk.Open(ctx, chunkOptions...)
 		if err != nil {
 			r := fastopenReader{readClosers: readClosers}
 			_ = r.Close() // ignore error
 			return nil, err
 		}
 		readClosers = append(readClosers, rc)
 		readers = append(readers, rc)
 		offset = 0
 		limit -= count
 	}
 	r := &fastopenReader{
 		readClosers: readClosers,
 		multiReader: io.MultiReader(readers...),
 	}
 	return r, nil
 }
 func (r *fastopenReader) Read(p []byte) (n int, err error) {
 	return r.multiReader.Read(p)
 }
 func (r *fastopenReader) Close() (err error) {
 	for _, rc := range r.readClosers {
 		chunkErr := rc.Close()
 		if err == nil {
 			err = chunkErr
 		}
 	}
 	return
 }
 // linearReader opens and reads chunks sequentially, without read-ahead
 type linearReader struct {
 	ctx     context.Context
 	chunks  []fs.Object
@ -1771,25 +1750,9 @@ func (o *Object) ID() string {
 	return ""
 }
 // SetTier performs changing storage tier of the Object if
 // multiple storage classes supported
 func (o *Object) SetTier(tier string) error {
 	if doer, ok := o.mainChunk().(fs.SetTierer); ok {
 		return doer.SetTier(tier)
 	}
 	return errors.New("chunker: wrapped remote does not support SetTier")
 }
 // GetTier returns storage tier or class of the Object
 func (o *Object) GetTier() string {
 	if doer, ok := o.mainChunk().(fs.GetTierer); ok {
 		return doer.GetTier()
 	}
 	return ""
 }
 // Meta format `simplejson`
 type metaSimpleJSON struct {
 	Version int    `json:"ver"`
 	Size    int64  `json:"size"`
 	NChunks int    `json:"nchunks"`
 	MD5     string `json:"md5"`
@ -1798,6 +1761,7 @@ type metaSimpleJSON struct {
 func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) (data []byte, err error) {
 	metaData := &metaSimpleJSON{
 		Version: metaDataVersion,
 		Size:    size,
 		NChunks: nChunks,
 		MD5:     md5,
@ -1806,47 +1770,56 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
 	return json.Marshal(&metaData)
 }
 // Note: only metadata format version 1 is supported a.t.m.
 //
 // Current implementation creates metadata only for files larger than
 // configured chunk size. This approach has drawback: availability of
 // configured hashsum type for small files depends on the wrapped remote.
 // Future versions of chunker may change approach as described in comment
 // to the Hash method. They can transparently migrate older metadata.
 // New format will have a higher version number and cannot be correctly
 // hanled by current implementation.
 // The version check below will then explicitly ask user to upgrade rclone.
 //
 func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) {
 	var metaData *metaSimpleJSON
 	err = json.Unmarshal(data, &metaData)
 	if err != nil {
-		return
+		return nil, err
 	}
 	// Perform strict checks, avoid corruption of future metadata formats.
 	if metaData.Size < 0 {
 		return nil, errors.New("negative file size")
 	}
 	if metaData.NChunks <= 0 {
 		return nil, errors.New("wrong number of chunks")
 	}
 	if metaData.MD5 != "" {
 		_, err = hex.DecodeString(metaData.MD5)
 		if len(metaData.MD5) != 32 || err != nil {
 			return nil, errors.New("wrong md5 hash")
 		}
 	}
 	if metaData.SHA1 != "" {
 		_, err = hex.DecodeString(metaData.SHA1)
 		if len(metaData.SHA1) != 40 || err != nil {
 			return nil, errors.New("wrong sha1 hash")
 		}
 	}
 	if metaData.Version <= 0 {
 		return nil, errors.New("wrong version number")
 	}
 	if metaData.Version != metaDataVersion {
 		return nil, errors.Errorf("version %d is not supported, please upgrade rclone", metaData.Version)
 	}
 	var nilFs *Fs // nil object triggers appropriate type method
 	info = nilFs.wrapInfo(metaObject, "", metaData.Size)
 	info.md5 = metaData.MD5
 	info.sha1 = metaData.SHA1
 	info.nChunks = metaData.NChunks
-	return
+	return info, nil
 }
 // Meta format `wdmrcompat`
 type metaWDMRCompat struct {
 	Name         string      `json:"Name"`
 	Size         int64       `json:"Size"`
 	PublicKey    interface{} `json:"PublicKey"`    // ignored, can be nil
 	CreationDate time.Time   `json:"CreationDate"` // modification time, ignored
 }
 func marshalWDMRCompat(ctx context.Context, srcInfo fs.ObjectInfo) (data []byte, err error) {
 	metaData := &metaWDMRCompat{
 		Name:         path.Base(srcInfo.Remote()),
 		Size:         srcInfo.Size(),
 		PublicKey:    nil,
 		CreationDate: srcInfo.ModTime(ctx).UTC(),
 	}
 	return json.Marshal(&metaData)
 }
 func unmarshalWDMRCompat(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, err error) {
 	var metaData *metaWDMRCompat
 	err = json.Unmarshal(data, &metaData)
 	if err != nil {
 		return
 	}
 	var nilFs *Fs // nil object triggers appropriate type method
 	info = nilFs.wrapInfo(metaObject, "", metaData.Size)
 	return
 }
 // Check the interfaces are satisfied
@ -1868,6 +1841,4 @@ var (
 	_ fs.Object          = (*Object)(nil)
 	_ fs.ObjectUnWrapper = (*Object)(nil)
 	_ fs.IDer            = (*Object)(nil)
 	_ fs.SetTierer       = (*Object)(nil)
 	_ fs.GetTierer       = (*Object)(nil)
 )
--- a/backend/chunker/chunker_test.go
+++ b/backend/chunker/chunker_test.go
@ -28,10 +28,14 @@ var (
 // dynamic chunker overlay wrapping a local temporary directory.
 func TestIntegration(t *testing.T) {
 	opt := fstests.Opt{
-		RemoteName:                   *fstest.RemoteName,
+		RemoteName:               *fstest.RemoteName,
-		NilObject:                    (*chunker.Object)(nil),
+		NilObject:                (*chunker.Object)(nil),
-		SkipBadWindowsCharacters:     !*UseBadChars,
+		SkipBadWindowsCharacters: !*UseBadChars,
-		UnimplementableObjectMethods: []string{"MimeType"},
+		UnimplementableObjectMethods: []string{
 			"MimeType",
 			"GetTier",
 			"SetTier",
 		},
 		UnimplementableFsMethods: []string{
 			"PublicLink",
 			"OpenWriterAt",
--- a/docs/content/chunker.md
+++ b/docs/content/chunker.md
@ -4,11 +4,11 @@ description: "Split-chunking overlay remote"
 date: "2019-08-30"
 ---
-<i class="fa fa-cut"></i>Chunker
+<i class="fa fa-cut"></i>Chunker (BETA)
 ----------------------------------------
 The `chunker` overlay transparently splits large files into smaller chunks
-during the upload to wrapped remote and transparently assembles them back
+during upload to wrapped remote and transparently assembles them back
 when the file is downloaded. This allows to effectively overcome size limits
 imposed by storage providers.
@ -41,10 +41,27 @@ Storage> chunker
 Remote to chunk/unchunk.
 Normally should contain a ':' and a path, eg "myremote:path/to/dir",
 "myremote:bucket" or maybe "myremote:" (not recommended).
 Enter a string value. Press Enter for the default ("").
 remote> remote:path
-Files larger than chunk_size will be split in chunks. By default 2 Gb.
+Files larger than chunk size will be split in chunks.
 Enter a size with suffix k,M,G,T. Press Enter for the default ("2G").
-chunk_size> 1G
+chunk_size> 100M
 Choose how chunker handles hash sums.
 Enter a string value. Press Enter for the default ("md5").
 Choose a number from below, or type in your own value
   / Chunker can pass any hash supported by wrapped remote
 1 | for a single-chunk file but returns nothing otherwise.
   \ "none"
 2 / MD5 for multi-chunk files. Requires "simplejson".
   \ "md5"
 3 / SHA1 for multi-chunk files. Requires "simplejson".
   \ "sha1"
   / Copying a file to chunker will request MD5 from the source
 4 | falling back to SHA1 if unsupported. Requires "simplejson".
   \ "md5quick"
 5 / Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".
   \ "sha1quick"
 hash_type> md5
 Edit advanced config? (y/n)
 y) Yes
 n) No
@ -53,8 +70,9 @@ Remote config
 --------------------
 [overlay]
 type = chunker
-remote = TestLocal:
+remote = remote:bucket
-chunk_size = 2G
+chunk_size = 100M
 hash_type = md5
 --------------------
 y) Yes this is OK
 e) Edit this remote
@ -73,8 +91,8 @@ will put files in a directory called `name` in the current directory.
 ### Chunking
-When rclone starts a file upload, chunker checks the file size.
+When rclone starts a file upload, chunker checks the file size. If it
-If it doesn't exceed the configured chunk size, chunker will just pass it
+doesn't exceed the configured chunk size, chunker will just pass the file
 to the wrapped remote. If a file is large, chunker will transparently cut
 data in pieces with temporary names and stream them one by one, on the fly.
 Each chunk will contain the specified number of data byts, except for the
@ -84,7 +102,7 @@ a temporary copy, record its size and repeat the above process.
 When upload completes, temporary chunk files are finally renamed.
 This scheme guarantees that operations look from outside as atomic.
 A similar method with hidden temporary chunks is used for other operations
-(copy/move/rename etc). If operation fails, hidden chunks are normally
+(copy/move/rename etc). If an operation fails, hidden chunks are normally
 destroyed, and the destination composite file stays intact.
 #### Chunk names
@ -94,58 +112,52 @@ By default chunk names are `BIG_FILE_NAME.rclone-chunk.001`,
 format is `*.rclone-chunk.###`. You can configure another name format
 using the `--chunker-name-format` option. The format uses asterisk
 `*` as a placeholder for the base file name and one or more consecutive
-hash characters `#` as a placeholder for the chunk number. There must be
+hash characters `#` as a placeholder for sequential chunk number.
-one and only one asterisk. The number of consecutive hashes defines the
+There must be one and only one asterisk. The number of consecutive hash
-minimum length of a string representing a chunk number. If a chunk number
+characters defines the minimum length of a string representing a chunk number.
-has less digits than the number of hashes, it is left-padded by zeros.
+If decimal chunk number has less digits than the number of hashes, it is
-If there are more digits in the number, they are left as is.
+left-padded by zeros. If the number stringis longer, it is left intact.
 By default numbering starts from 1 but there is another option that allows
 user to start from 0, eg. for compatibility with legacy software.
-For example, if name format is `big_*-##.part`, and original file was
+For example, if name format is `big_*-##.part` and original file name is
-named `data.txt` and numbering starts from 0, then the first chunk will be
+`data.txt` and numbering starts from 0, then the first chunk will be named
-named `big_data.txt-00.part`, the 99th chunk will be `big_data.txt-98.part`
+`big_data.txt-00.part`, the 99th chunk will be `big_data.txt-98.part`
-and the 302nd chunk will be `big_data.txt-301.part`.
+and the 302nd chunk will become `big_data.txt-301.part`.
-Would-be chunk files are ignored if their name does not match given format.
+When the `list` rclone command scans a directory on wrapped remote, the
-The list command might encounter composite files with missinng or invalid
+potential chunk files are accounted for and merged into composite directory
-chunks. By default, if chunker detects a missing chunk it will silently
+entries only if their names match the configured format. All other files
-ignore the whole group. Use the `--chunker-fail-on-bad-chunks` flag
+are ignored, including temporary chunks.
-to make it fail with an error message.
+The list command might encounter composite files with missing or invalid
 chunks. If chunker detects a missing chunk it will by default silently
 ignore the whole group. You can use the `--chunker-fail-on-bad-chunks`
 command line flag to make `list` fail with an error message.
 ### Metadata
 By default when a file is large enough, chunker will create a metadata
 object besides data chunks. The object is named after the original file.
-Chunker allows to choose between few metadata formats. Please note that
+Chunker allows user to disable metadata completely (the `none` format).
-currently metadata is not created for files smaller than configured
+Please note that currently metadata is not created for files smaller
-chunk size. This may change in future as new formats are developed.
+than configured chunk size. This may change in future as new formats
 are developed.
 #### Simple JSON metadata format
 This is the default format. It supports hash sums and chunk validation
 for composite files. Meta objects carry the following fields:
- `size`    - total size of chunks
+- `ver`     - version of format, currently `1`
- `nchunks` - number of chunks
+- `size`    - total size of composite file
- `md5`     - MD5 hashsum (if present)
+- `nchunks` - number of chunks in the file
 - `md5`     - MD5 hashsum of composite file (if present)
 - `sha1`    - SHA1 hashsum (if present)
 There is no field for composite file name as it's simply equal to the name
 of meta object on the wrapped remote. Please refer to respective sections
-for detils on hashsums and modified time handling.
+for detils on hashsums and handling of modified time.
 #### WedDavMailRu compatible metadata format
 The `wdmrcompat` metadata format is only useful to support historical files
 created by [WebDriveMailru](https://github.com/yar229/WebDavMailRuCloud).
 It keeps the following fields (most are ignored, though):
 - `Name`         - name of the composite file (always equal to the meta file name)
 - `Size`         - total size of chunks
 - `PublicKey`    - ignored, always "null"
 - `CreationDate` - last modification (sic!) time, ignored.
 #### No metadata
@ -161,8 +173,8 @@ errors (especially missing last chunk) than metadata-enabled formats.
 ### Hashsums
 Chunker supports hashsums only when a compatible metadata is present.
-Thus, if you choose metadata format of `none` or `wdmrcompat`, chunker
+Thus, if you choose metadata format of `none`, chunker will return
-will return `UNSUPPORTED` as hashsum.
+`UNSUPPORTED` as hashsum.
 Please note that metadata is stored only for composite files. If a file
 is small (smaller than configured chunk size), chunker will transparently
@ -175,16 +187,16 @@ Currently you can choose one or another but not both.
 MD5 is set by default as the most supported type.
 Since chunker keeps hashes for composite files and falls back to the
 wrapped remote hash for small ones, we advise you to choose the same
-hash type as wrapped remote, so your file listings look coherent.
+hash type as wrapped remote so that your file listings look coherent.
-Normally, when a file is copied to chunker controlled remote, chunker
+Normally, when a file is copied to a chunker controlled remote, chunker
-will ask its source for compatible file hash and revert to on-the-fly
+will ask the file source for compatible file hash and revert to on-the-fly
 calculation if none is found. This involves some CPU overhead but provides
 a guarantee that given hashsum is available. Also, chunker will reject
 a server-side copy or move operation if source and destination hashsum
 types are different, resulting in the extra network bandwidth, too.
 In some rare cases this may be undesired, so chunker provides two optional
-choices: `sha1quick` and `md5quick`. If source does not have the primary
+choices: `sha1quick` and `md5quick`. If the source does not support primary
 hash type and the quick mode is enabled, chunker will try to fall back to
 the secondary type. This will save CPU and bandwidth but can result in empty
 hashsums at destination. Beware of consequences: the `sync` command will
@ -215,13 +227,14 @@ chunk naming scheme is to:
  hash type, chunk naming etc.
 - Now run `rclone sync oldchunks: newchunks:` and all your data
  will be transparently converted at transfer.
-  This may take some time.
+  This may take some time, yet chunker will try server-side
  copy if possible.
 - After checking data integrity you may remove configuration section
  of the old remote.
 If rclone gets killed during a long operation on a big composite file,
 hidden temporary chunks may stay in the directory. They will not be
-shown by the list command but will eat up your account quota.
+shown by the `list` command but will eat up your account quota.
 Please note that the `deletefile` rclone command deletes only active
 chunks of a file. As a workaround, you can use remote of the wrapped
 file system to see them.
@ -234,17 +247,18 @@ remove everything including garbage.
 ### Caveats and Limitations
 Chunker requires wrapped remote to support server side `move` (or `copy` +
-delete) operations, otherwise it will explicitly refuse to start.
+`delete`) operations, otherwise it will explicitly refuse to start.
 This is because it internally renames temporary chunk files to their final
 names when an operation completes successfully.
-Note that moves done using the copy-and-delete method may incur double
+Note that a move implemented using the copy-and-delete method may incur
-charging with some cloud storage providers.
+double charging with some cloud storage providers.
-Chunker will not automatically rename existing chunks when you change the
+Chunker will not automatically rename existing chunks when you run
-chunk name format. Beware that in result of this some files which have been
+`rclone config` on a live remote and change the chunk name format.
-treated as chunks before the change can pop up in directory listings as
+Beware that in result of this some files which have been treated as chunks
-normal files and vice versa. The same warning holds for the chunk size.
+before the change can pop up in directory listings as normal files
 and vice versa. The same warning holds for the chunk size.
 If you desperately need to change critical chunking setings, you should
 run data migration as described in a dedicated section.
@ -278,6 +292,28 @@ Files larger than chunk size will be split in chunks.
 - Type:        SizeSuffix
 - Default:     2G
 #### --chunker-hash-type
 Choose how chunker handles hash sums.
 - Config:      hash_type
 - Env Var:     RCLONE_CHUNKER_HASH_TYPE
 - Type:        string
 - Default:     "md5"
 - Examples:
    - "none"
        - Chunker can pass any hash supported by wrapped remote
        - for a single-chunk file but returns nothing otherwise.
    - "md5"
        - MD5 for multi-chunk files. Requires "simplejson".
    - "sha1"
        - SHA1 for multi-chunk files. Requires "simplejson".
    - "md5quick"
        - Copying a file to chunker will request MD5 from the source
        - falling back to SHA1 if unsupported. Requires "simplejson".
    - "sha1quick"
        - Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".
 ### Advanced Options
 Here are the advanced options specific to chunker (Transparently chunk/split large files).
@ -321,33 +357,6 @@ Metadata is a small JSON file named after the composite file.
    - "simplejson"
        - Simple JSON supports hash sums and chunk validation.
        - It has the following fields: size, nchunks, md5, sha1.
    - "wdmrcompat"
        - This format brings compatibility with WebDavMailRuCloud.
        - It does not support hash sums or validation, most fields are ignored.
        - It has the following fields: Name, Size, PublicKey, CreationDate.
        - Requires hash type "none".
 #### --chunker-hash-type
 Choose how chunker handles hash sums.
 - Config:      hash_type
 - Env Var:     RCLONE_CHUNKER_HASH_TYPE
 - Type:        string
 - Default:     "md5"
 - Examples:
    - "none"
        - Chunker can pass any hash supported by wrapped remote
        - for a single-chunk file but returns nothing otherwise.
    - "md5"
        - MD5 for multi-chunk files. Requires "simplejson".
    - "sha1"
        - SHA1 for multi-chunk files. Requires "simplejson".
    - "md5quick"
        - When a file is copied on to chunker, MD5 is taken from its source
        - falling back to SHA1 if the source doesn't support it. Requires "simplejson".
    - "sha1quick"
        - Similar to "md5quick" but prefers SHA1 over MD5. Requires "simplejson".
 #### --chunker-fail-on-bad-chunks
--- a/fs/sync/sync_test.go
+++ b/fs/sync/sync_test.go
@ -986,7 +986,6 @@ func TestSyncWithTrackRenames(t *testing.T) {
 	fs.Config.TrackRenames = true
 	defer func() {
 		fs.Config.TrackRenames = false
 	}()
 	haveHash := r.Fremote.Hashes().Overlap(r.Flocal.Hashes()).GetOne() != hash.None
@ -1010,45 +1009,64 @@ func TestSyncWithTrackRenames(t *testing.T) {
 	fstest.CheckItems(t, r.Fremote, f1, f2)
-	if canTrackRenames {
+	// As currently there is no Fs interface providing number of chunks
-		if r.Fremote.Features().Move == nil || r.Fremote.Name() == "TestUnion" { // union remote can Move but returns CantMove error
+	// in a file, this test depends on the well-known names of test remotes.
 			// If no server side Move, we are falling back to Copy + Delete
 			assert.Equal(t, int64(1), accounting.GlobalStats().GetTransfers()) // 1 copy
 			assert.Equal(t, int64(4), accounting.GlobalStats().GetChecks())    // 2 file checks + 1 move + 1 delete
 		} else {
 			assert.Equal(t, int64(0), accounting.GlobalStats().GetTransfers()) // 0 copy
 			assert.Equal(t, int64(3), accounting.GlobalStats().GetChecks())    // 2 file checks + 1 move
 		}
 	} else {
 		if toyFileChecks(r) != -1 {
 			assert.Equal(t, toyFileChecks(r), accounting.GlobalStats().GetChecks())
 		}
 		assert.Equal(t, toyFileTransfers(r), accounting.GlobalStats().GetTransfers())
 	}
 }
 func toyFileChecks(r *fstest.Run) int64 {
 	remote := r.Fremote.Name()
-	// Numbers below are calculated for a 14 byte file.
+
-	if !strings.HasPrefix(remote, "TestChunker") {
+	// Union remote can Move but returns CantMove error.
-		return 2
+	moveAsCopyDelete := r.Fremote.Features().Move == nil || remote == "TestUnion"
-	}
+
-	// Chunker makes more internal checks.
+	chunker := strings.HasPrefix(remote, "TestChunker")
 	wrappedMoveAsCopyDelete := chunker && strings.HasSuffix(remote, "S3")
 	chunk3b := chunker && strings.Contains(remote, "Chunk3b")            // chunker with 3 byte chunks
 	chunk50b := chunker && strings.Contains(remote, "Chunk50b")          // chunker with 50 byte chunks
 	chunkDefault := chunker && !strings.Contains(remote, "ChunkerChunk") // default big chunk size
 	chunkBig := chunk50b || chunkDefault                                 // file is smaller than chunk size
 	// Verify number of checks for a toy 14 byte file.
 	// The order of cases matters!
 	var checks int
 	switch {
-	case strings.Contains(remote, "Chunk3b"): // chunk 3 bytes
+	case canTrackRenames && chunk3b:
-		checks = 6
+		checks = 8 // chunker makes extra checks for each small chunk
-	case strings.Contains(remote, "Chunk50b"): // chunk 50 bytes
+	case canTrackRenames && chunkBig:
-		checks = 3
+		checks = 4 // chunker makes 1 extra check for a single big chunk
-	case strings.Contains(remote, "ChunkerChunk"): // unknown chunk size
+	case canTrackRenames && moveAsCopyDelete:
-		return -1
+		checks = 4 // 2 file checks + 1 move + 1 delete
 	case canTrackRenames:
 		checks = 3 // 2 file checks + 1 move
 	case !chunker:
 		checks = 2 // 2 file checks on a generic non-chunking remote
 	case chunk3b:
 		checks = 6 // chunker makes extra checks for each small chunk
 	case chunkBig && wrappedMoveAsCopyDelete:
 		checks = 4 // one more extra check because S3 emulates Move as Copy+Delete
 	case chunkBig:
 		checks = 3 // chunker makes 1 extra check for a single big chunk
 	default:
-		checks = 3 // large chunks (eventually no chunking)
+		checks = -1 // skip verification for chunker with unknown chunk size
 	}
-	if strings.HasSuffix(remote, "S3") {
+	if checks != -1 { // "-1" allows remotes to bypass this check
-		checks++ // Extra check because S3 emulates Move as Copy+Delete.
+		assert.Equal(t, int64(checks), accounting.GlobalStats().GetChecks())
 	}
 	// Verify number of copy operations for a toy 14 byte file.
 	// The order of cases matters!
 	var copies int64
 	switch {
 	case canTrackRenames && moveAsCopyDelete:
 		copies = 1 // 1 copy
 	case canTrackRenames:
 		copies = 0 // 0 copy
 	case chunkBig && wrappedMoveAsCopyDelete:
 		copies = 2 // extra Copy because S3 emulates Move as Copy+Delete.
 	default:
 		copies = 1
 	}
 	if copies != -1 { // "-1" allows remotes to bypass this check
 		assert.Equal(t, copies, accounting.GlobalStats().GetTransfers())
 	}
 	return int64(checks)
 }
 func toyFileTransfers(r *fstest.Run) int64 {
--- a/fstest/test_all/config.yaml
+++ b/fstest/test_all/config.yaml
@ -33,9 +33,6 @@ backends:
 - backend:  "chunker"
   remote:   "TestChunkerNometaLocal:"
   fastlist: true
 - backend:  "chunker"
   remote:   "TestChunkerCompatLocal:"
   fastlist: true
 - backend:  "chunker"
   remote:   "TestChunkerChunk3bLocal:"
   fastlist: true
@ -44,10 +41,6 @@ backends:
   remote:   "TestChunkerChunk3bNometaLocal:"
   fastlist: true
   maxfile:  6k
 - backend:  "chunker"
   remote:   "TestChunkerChunk3bCompatLocal:"
   fastlist: true
   maxfile:  6k
 - backend:  "chunker"
   remote:   "TestChunkerMailru:"
   fastlist: true
@ -66,30 +59,26 @@ backends:
 - backend:  "chunker"
   remote:   "TestChunkerS3:"
   fastlist: true
   ignore:
     - TestIntegration/FsMkdir/FsPutFiles/SetTier
 - backend:  "chunker"
   remote:   "TestChunkerChunk50bS3:"
   fastlist: true
   maxfile:  1k
-   ignore:
+ - backend:  "chunker"
-     - TestIntegration/FsMkdir/FsPutFiles/SetTier
+   remote:   "TestChunkerChunk50bMD5HashS3:"
- #- backend:  "chunker"
+   fastlist: true
- #  remote:   "TestChunkerChunk50bMD5HashS3:"
+   maxfile:  1k
- #  fastlist: true
+ - backend:  "chunker"
- #  maxfile:  1k
+   remote:   "TestChunkerChunk50bSHA1HashS3:"
- #- backend:  "chunker"
+   fastlist: true
- #  remote:   "TestChunkerChunk50bMD5QuickS3:"
+   maxfile:  1k
- #  fastlist: true
+ - backend:  "chunker"
- #  maxfile:  1k
+   remote:   "TestChunkerChunk50bMD5QuickS3:"
- #- backend:  "chunker"
+   fastlist: true
- #  remote:   "TestChunkerChunk50bSHA1HashS3:"
+   maxfile:  1k
- #  fastlist: true
+ - backend:  "chunker"
- #  maxfile:  1k
+   remote:   "TestChunkerChunk50bSHA1QuickS3:"
- #- backend:  "chunker"
+   fastlist: true
- #  remote:   "TestChunkerChunk50bSHA1QuickS3:"
+   maxfile:  1k
 #  fastlist: true
 #  maxfile:  1k
 ## end chunker
 - backend:  "drive"
   remote:   "TestDrive:"