rclone/cmd/bisync/checkfn.go

package bisync

import (
	"bytes"
	"context"
	"fmt"
	"strings"

	"github.com/rclone/rclone/backend/crypt"
	"github.com/rclone/rclone/cmd/bisync/bilib"
	"github.com/rclone/rclone/cmd/check"
	"github.com/rclone/rclone/fs"
	"github.com/rclone/rclone/fs/accounting"
	"github.com/rclone/rclone/fs/filter"
	"github.com/rclone/rclone/fs/hash"
	"github.com/rclone/rclone/fs/operations"
)

var hashType hash.Type
var fsrc, fdst fs.Fs
var fcrypt *crypt.Fs

// WhichCheck determines which CheckFn we should use based on the Fs types
// It is more robust and accurate than Check because
// it will fallback to CryptCheck or DownloadCheck instead of --size-only!
// it returns the *operations.CheckOpt with the CheckFn set.
func WhichCheck(ctx context.Context, opt *operations.CheckOpt) *operations.CheckOpt {
	ci := fs.GetConfig(ctx)
	common := opt.Fsrc.Hashes().Overlap(opt.Fdst.Hashes())

	// note that ci.IgnoreChecksum doesn't change the behavior of Check -- it's just a way to opt-out of cryptcheck/download
	if common.Count() > 0 || ci.SizeOnly || ci.IgnoreChecksum {
		// use normal check
		opt.Check = CheckFn
		return opt
	}

	FsrcCrypt, srcIsCrypt := opt.Fsrc.(*crypt.Fs)
	FdstCrypt, dstIsCrypt := opt.Fdst.(*crypt.Fs)

	if (srcIsCrypt && dstIsCrypt) || (!srcIsCrypt && dstIsCrypt) {
		// if both are crypt or only dst is crypt
		hashType = FdstCrypt.UnWrap().Hashes().GetOne()
		if hashType != hash.None {
			// use cryptcheck
			fsrc = opt.Fsrc
			fdst = opt.Fdst
			fcrypt = FdstCrypt
			fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)")
			opt.Check = CryptCheckFn
			return opt
		}
	} else if srcIsCrypt && !dstIsCrypt {
		// if only src is crypt
		hashType = FsrcCrypt.UnWrap().Hashes().GetOne()
		if hashType != hash.None {
			// use reverse cryptcheck
			fsrc = opt.Fdst
			fdst = opt.Fsrc
			fcrypt = FsrcCrypt
			fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)")
			opt.Check = ReverseCryptCheckFn
			return opt
		}
	}

	// if we've gotten this far, niether check or cryptcheck will work, so use --download
	fs.Infof(fdst, "Can't compare hashes, so using check --download for safety. (Use --size-only or --ignore-checksum to disable)")
	opt.Check = DownloadCheckFn
	return opt
}

// CheckFn is a slightly modified version of Check
func CheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {
	same, ht, err := operations.CheckHashes(ctx, src, dst)
	if err != nil {
		return true, false, err
	}
	if ht == hash.None {
		return false, true, nil
	}
	if !same {
		err = fmt.Errorf("%v differ", ht)
		fs.Errorf(src, "%v", err)
		return true, false, nil
	}
	return false, false, nil
}

// CryptCheckFn is a slightly modified version of CryptCheck
func CryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {
	cryptDst := dst.(*crypt.Object)
	underlyingDst := cryptDst.UnWrap()
	underlyingHash, err := underlyingDst.Hash(ctx, hashType)
	if err != nil {
		return true, false, fmt.Errorf("error reading hash from underlying %v: %w", underlyingDst, err)
	}
	if underlyingHash == "" {
		return false, true, nil
	}
	cryptHash, err := fcrypt.ComputeHash(ctx, cryptDst, src, hashType)
	if err != nil {
		return true, false, fmt.Errorf("error computing hash: %w", err)
	}
	if cryptHash == "" {
		return false, true, nil
	}
	if cryptHash != underlyingHash {
		err = fmt.Errorf("hashes differ (%s:%s) %q vs (%s:%s) %q", fdst.Name(), fdst.Root(), cryptHash, fsrc.Name(), fsrc.Root(), underlyingHash)
		fs.Debugf(src, err.Error())
		// using same error msg as CheckFn so integration tests match
		err = fmt.Errorf("%v differ", hashType)
		fs.Errorf(src, err.Error())
		return true, false, nil
	}
	return false, false, nil
}

// ReverseCryptCheckFn is like CryptCheckFn except src and dst are switched
// result: src is crypt, dst is non-crypt
func ReverseCryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {
	return CryptCheckFn(ctx, src, dst)
}

// DownloadCheckFn is a slightly modified version of Check with --download
func DownloadCheckFn(ctx context.Context, a, b fs.Object) (differ bool, noHash bool, err error) {
	differ, err = operations.CheckIdenticalDownload(ctx, a, b)
	if err != nil {
		return true, true, fmt.Errorf("failed to download: %w", err)
	}
	return differ, false, nil
}

// check potential conflicts (to avoid renaming if already identical)
func (b *bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck *filter.Filter, fs1, fs2 fs.Fs) (bilib.Names, error) {
	matches := bilib.Names{}
	if filterCheck.HaveFilesFrom() {
		fs.Debugf(nil, "There are potential conflicts to check.")

		opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2)
		if checkopterr != nil {
			b.critical = true
			b.retryable = true
			fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
			return matches, checkopterr
		}
		defer close()

		opt.Match = new(bytes.Buffer)

		opt = WhichCheck(ctxCheck, opt)

		fs.Infof(nil, "Checking potential conflicts...")
		check := operations.CheckFn(ctxCheck, opt)
		fs.Infof(nil, "Finished checking the potential conflicts. %s", check)

		//reset error count, because we don't want to count check errors as bisync errors
		accounting.Stats(ctxCheck).ResetErrors()

		//return the list of identical files to check against later
		if len(fmt.Sprint(opt.Match)) > 0 {
			matches = bilib.ToNames(strings.Split(fmt.Sprint(opt.Match), "\n"))
		}
		if matches.NotEmpty() {
			fs.Debugf(nil, "The following potential conflicts were determined to be identical. %v", matches)
		} else {
			fs.Debugf(nil, "None of the conflicts were determined to be identical.")
		}

	}
	return matches, nil
}

// WhichEqual is similar to WhichCheck, but checks a single object.
// Returns true if the objects are equal, false if they differ or if we don't know
func WhichEqual(ctx context.Context, src, dst fs.Object, Fsrc, Fdst fs.Fs) bool {
	opt, close, checkopterr := check.GetCheckOpt(Fsrc, Fdst)
	if checkopterr != nil {
		fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
	}
	defer close()

	opt = WhichCheck(ctx, opt)
	differ, noHash, err := opt.Check(ctx, dst, src)
	if err != nil {
		fs.Errorf(src, "failed to check: %v", err)
		return false
	}
	if noHash {
		fs.Errorf(src, "failed to check as hash is missing")
		return false
	}
	return !differ
}

// Replaces the standard Equal func with one that also considers checksum
// Note that it also updates the modtime the same way as Sync
func (b *bisyncRun) EqualFn(ctx context.Context) context.Context {
	ci := fs.GetConfig(ctx)
	ci.CheckSum = false // force checksum off so modtime is evaluated if needed
	// modtime and size settings should already be set correctly for Equal
	var equalFn operations.EqualFn = func(ctx context.Context, src fs.ObjectInfo, dst fs.Object) bool {
		fs.Debugf(src, "evaluating...")
		equal := false
		logger, _ := operations.GetLogger(ctx)
		// temporarily unset logger, we don't want Equal to duplicate it
		noop := func(ctx context.Context, sigil operations.Sigil, src, dst fs.DirEntry, err error) {
			fs.Debugf(src, "equal skipped")
		}
		ctxNoLogger := operations.WithLogger(ctx, noop)
		if operations.Equal(ctxNoLogger, src, dst) {
			whichHashType := func(f fs.Info) hash.Type {
				ht := getHashType(f.Name())
				if ht == hash.None && b.opt.Compare.SlowHashSyncOnly && !b.opt.Resync {
					ht = f.Hashes().GetOne()
				}
				return ht
			}
			srcHash, _ := src.Hash(ctx, whichHashType(src.Fs()))
			dstHash, _ := dst.Hash(ctx, whichHashType(dst.Fs()))
			srcHash, _ = tryDownloadHash(ctx, src, srcHash)
			dstHash, _ = tryDownloadHash(ctx, dst, dstHash)
			equal = !hashDiffers(srcHash, dstHash, whichHashType(src.Fs()), whichHashType(dst.Fs()), src.Size(), dst.Size())
		}
		if equal {
			logger(ctx, operations.Match, src, dst, nil)
			fs.Debugf(src, "EqualFn: files are equal")
			return true
		}
		logger(ctx, operations.Differ, src, dst, nil)
		fs.Debugf(src, "EqualFn: files are NOT equal")
		return false
	}
	return operations.WithEqualFn(ctx, equalFn)
}
bisync: fallback to cryptcheck or --download when can't check hash Bisync checks file equality before renaming sync conflicts by comparing checksums. Before this change, backends without checksum support (notably Crypt) would fall back to --size-only for these checks, which is not a very safe method (differing files can sometimes have the same size, especially if they're small.) After this change, Crypt remotes fallback to using Cryptcheck so that checksums can be compared. As a last resort when neither Check nor Cryptcheck are available, files are compared using --download so that we can be certain the files are identical regardless of checksum support. 2023-11-22 06:43:17 +08:00			`package bisync`

			`import (`
			`"bytes"`
			`"context"`
			`"fmt"`
			`"strings"`

			`"github.com/rclone/rclone/backend/crypt"`
			`"github.com/rclone/rclone/cmd/bisync/bilib"`
			`"github.com/rclone/rclone/cmd/check"`
			`"github.com/rclone/rclone/fs"`
			`"github.com/rclone/rclone/fs/accounting"`
			`"github.com/rclone/rclone/fs/filter"`
			`"github.com/rclone/rclone/fs/hash"`
			`"github.com/rclone/rclone/fs/operations"`
			`)`

			`var hashType hash.Type`
			`var fsrc, fdst fs.Fs`
			`var fcrypt *crypt.Fs`

			`// WhichCheck determines which CheckFn we should use based on the Fs types`
			`// It is more robust and accurate than Check because`
			`// it will fallback to CryptCheck or DownloadCheck instead of --size-only!`
			`// it returns the *operations.CheckOpt with the CheckFn set.`
			`func WhichCheck(ctx context.Context, opt operations.CheckOpt) operations.CheckOpt {`
			`ci := fs.GetConfig(ctx)`
			`common := opt.Fsrc.Hashes().Overlap(opt.Fdst.Hashes())`

			`// note that ci.IgnoreChecksum doesn't change the behavior of Check -- it's just a way to opt-out of cryptcheck/download`
			`if common.Count() > 0 \|\| ci.SizeOnly \|\| ci.IgnoreChecksum {`
			`// use normal check`
			`opt.Check = CheckFn`
			`return opt`
			`}`

			`FsrcCrypt, srcIsCrypt := opt.Fsrc.(*crypt.Fs)`
			`FdstCrypt, dstIsCrypt := opt.Fdst.(*crypt.Fs)`

			`if (srcIsCrypt && dstIsCrypt) \|\| (!srcIsCrypt && dstIsCrypt) {`
			`// if both are crypt or only dst is crypt`
			`hashType = FdstCrypt.UnWrap().Hashes().GetOne()`
			`if hashType != hash.None {`
			`// use cryptcheck`
			`fsrc = opt.Fsrc`
			`fdst = opt.Fdst`
			`fcrypt = FdstCrypt`
			`fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)")`
			`opt.Check = CryptCheckFn`
			`return opt`
			`}`
			`} else if srcIsCrypt && !dstIsCrypt {`
			`// if only src is crypt`
			`hashType = FsrcCrypt.UnWrap().Hashes().GetOne()`
			`if hashType != hash.None {`
			`// use reverse cryptcheck`
			`fsrc = opt.Fdst`
			`fdst = opt.Fsrc`
			`fcrypt = FsrcCrypt`
			`fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)")`
			`opt.Check = ReverseCryptCheckFn`
			`return opt`
			`}`
			`}`

			`// if we've gotten this far, niether check or cryptcheck will work, so use --download`
			`fs.Infof(fdst, "Can't compare hashes, so using check --download for safety. (Use --size-only or --ignore-checksum to disable)")`
			`opt.Check = DownloadCheckFn`
			`return opt`
			`}`

			`// CheckFn is a slightly modified version of Check`
			`func CheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {`
			`same, ht, err := operations.CheckHashes(ctx, src, dst)`
			`if err != nil {`
			`return true, false, err`
			`}`
			`if ht == hash.None {`
			`return false, true, nil`
			`}`
			`if !same {`
			`err = fmt.Errorf("%v differ", ht)`
			`fs.Errorf(src, "%v", err)`
			`return true, false, nil`
			`}`
			`return false, false, nil`
			`}`

			`// CryptCheckFn is a slightly modified version of CryptCheck`
			`func CryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {`
			`cryptDst := dst.(*crypt.Object)`
			`underlyingDst := cryptDst.UnWrap()`
			`underlyingHash, err := underlyingDst.Hash(ctx, hashType)`
			`if err != nil {`
			`return true, false, fmt.Errorf("error reading hash from underlying %v: %w", underlyingDst, err)`
			`}`
			`if underlyingHash == "" {`
			`return false, true, nil`
			`}`
			`cryptHash, err := fcrypt.ComputeHash(ctx, cryptDst, src, hashType)`
			`if err != nil {`
			`return true, false, fmt.Errorf("error computing hash: %w", err)`
			`}`
			`if cryptHash == "" {`
			`return false, true, nil`
			`}`
			`if cryptHash != underlyingHash {`
			`err = fmt.Errorf("hashes differ (%s:%s) %q vs (%s:%s) %q", fdst.Name(), fdst.Root(), cryptHash, fsrc.Name(), fsrc.Root(), underlyingHash)`
bisync: account for differences in backend features on integration tests - see #5679 Before this change, integration tests often could not be run on backends with differing features from the local system that goldenized them. In particular, differences in modtime precision, checksum support, and encoding would cause false positives. After this change, the tests more accurately account for the features of the backend being tested, which allows us to see true positives more clearly, and more meaningfully assess whether a backend is supported. 2023-11-18 01:14:38 +08:00			`fs.Debugf(src, err.Error())`
			`// using same error msg as CheckFn so integration tests match`
			`err = fmt.Errorf("%v differ", hashType)`
bisync: fallback to cryptcheck or --download when can't check hash Bisync checks file equality before renaming sync conflicts by comparing checksums. Before this change, backends without checksum support (notably Crypt) would fall back to --size-only for these checks, which is not a very safe method (differing files can sometimes have the same size, especially if they're small.) After this change, Crypt remotes fallback to using Cryptcheck so that checksums can be compared. As a last resort when neither Check nor Cryptcheck are available, files are compared using --download so that we can be certain the files are identical regardless of checksum support. 2023-11-22 06:43:17 +08:00			`fs.Errorf(src, err.Error())`
			`return true, false, nil`
			`}`
			`return false, false, nil`
			`}`

			`// ReverseCryptCheckFn is like CryptCheckFn except src and dst are switched`
			`// result: src is crypt, dst is non-crypt`
			`func ReverseCryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {`
			`return CryptCheckFn(ctx, src, dst)`
			`}`

			`// DownloadCheckFn is a slightly modified version of Check with --download`
			`func DownloadCheckFn(ctx context.Context, a, b fs.Object) (differ bool, noHash bool, err error) {`
			`differ, err = operations.CheckIdenticalDownload(ctx, a, b)`
			`if err != nil {`
			`return true, true, fmt.Errorf("failed to download: %w", err)`
			`}`
			`return differ, false, nil`
			`}`

			`// check potential conflicts (to avoid renaming if already identical)`
			`func (b bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck filter.Filter, fs1, fs2 fs.Fs) (bilib.Names, error) {`
			`matches := bilib.Names{}`
			`if filterCheck.HaveFilesFrom() {`
			`fs.Debugf(nil, "There are potential conflicts to check.")`

			`opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2)`
			`if checkopterr != nil {`
			`b.critical = true`
			`b.retryable = true`
			`fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)`
			`return matches, checkopterr`
			`}`
			`defer close()`

			`opt.Match = new(bytes.Buffer)`

			`opt = WhichCheck(ctxCheck, opt)`

			`fs.Infof(nil, "Checking potential conflicts...")`
			`check := operations.CheckFn(ctxCheck, opt)`
			`fs.Infof(nil, "Finished checking the potential conflicts. %s", check)`

			`//reset error count, because we don't want to count check errors as bisync errors`
			`accounting.Stats(ctxCheck).ResetErrors()`

			`//return the list of identical files to check against later`
			`if len(fmt.Sprint(opt.Match)) > 0 {`
			`matches = bilib.ToNames(strings.Split(fmt.Sprint(opt.Match), "\n"))`
			`}`
			`if matches.NotEmpty() {`
			`fs.Debugf(nil, "The following potential conflicts were determined to be identical. %v", matches)`
			`} else {`
			`fs.Debugf(nil, "None of the conflicts were determined to be identical.")`
			`}`

			`}`
			`return matches, nil`
			`}`

			`// WhichEqual is similar to WhichCheck, but checks a single object.`
			`// Returns true if the objects are equal, false if they differ or if we don't know`
			`func WhichEqual(ctx context.Context, src, dst fs.Object, Fsrc, Fdst fs.Fs) bool {`
			`opt, close, checkopterr := check.GetCheckOpt(Fsrc, Fdst)`
			`if checkopterr != nil {`
			`fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)`
			`}`
			`defer close()`

			`opt = WhichCheck(ctx, opt)`
			`differ, noHash, err := opt.Check(ctx, dst, src)`
			`if err != nil {`
			`fs.Errorf(src, "failed to check: %v", err)`
			`return false`
			`}`
			`if noHash {`
			`fs.Errorf(src, "failed to check as hash is missing")`
			`return false`
			`}`
			`return !differ`
			`}`
bisync: full support for comparing checksum, size, modtime - fixes #5679 fixes #5683 fixes #5684 fixes #5675 Before this change, bisync could only detect changes based on modtime, and would refuse to run if either path lacked modtime support. This made bisync unavailable for many of rclone's backends. Additionally, bisync did not account for the Fs's precision when comparing modtimes, meaning that they could only be reliably compared within the same side -- not against the opposite side. Size and checksum (even when available) were ignored completely for deltas. After this change, bisync now fully supports comparing based on any combination of size, modtime, and checksum, lifting the prior restriction on backends without modtime support. The comparison logic considers the backend's precision, hash types, and other features as appropriate. The comparison features optionally use a new --compare flag (which takes any combination of size,modtime,checksum) and even supports some combinations not otherwise supported in `sync` (like comparing all three at the same time.) By default (without the --compare flag), bisync inherits the same comparison options as `sync` (that is: size and modtime by default, unless modified with flags such as --checksum or --size-only.) If the --compare flag is set, it will override these defaults. If --compare includes checksum and both remotes support checksums but have no hash types in common with each other, checksums will be considered only for comparisons within the same side (to determine what has changed since the prior sync), but not for comparisons against the opposite side. If one side supports checksums and the other does not, checksums will only be considered on the side that supports them. When comparing with checksum and/or size without modtime, bisync cannot determine whether a file is newer or older -- only whether it is changed or unchanged. (If it is changed on both sides, bisync still does the standard equality-check to avoid declaring a sync conflict unless it absolutely has to.) Also included are some new flags to customize the checksum comparison behavior on backends where hashes are slow or unavailable. --no-slow-hash and --slow-hash-sync-only allow selectively ignoring checksums on backends such as local where they are slow. --download-hash allows computing them by downloading when (and only when) they're otherwise not available. Of course, this option probably won't be practical with large files, but may be a good option for syncing small-but-important files with maximum accuracy (for example, a source code repo on a crypt remote.) An additional advantage over methods like cryptcheck is that the original file is not required for comparison (for example, --download-hash can be used to bisync two different crypt remotes with different passwords.) Additionally, all of the above are now considered during the final --check-sync for much-improved accuracy (before this change, it only compared filenames!) Many other details are explained in the included docs. 2023-12-01 08:44:38 +08:00
			`// Replaces the standard Equal func with one that also considers checksum`
			`// Note that it also updates the modtime the same way as Sync`
			`func (b *bisyncRun) EqualFn(ctx context.Context) context.Context {`
			`ci := fs.GetConfig(ctx)`
			`ci.CheckSum = false // force checksum off so modtime is evaluated if needed`
			`// modtime and size settings should already be set correctly for Equal`
			`var equalFn operations.EqualFn = func(ctx context.Context, src fs.ObjectInfo, dst fs.Object) bool {`
			`fs.Debugf(src, "evaluating...")`
			`equal := false`
			`logger, _ := operations.GetLogger(ctx)`
			`// temporarily unset logger, we don't want Equal to duplicate it`
			`noop := func(ctx context.Context, sigil operations.Sigil, src, dst fs.DirEntry, err error) {`
			`fs.Debugf(src, "equal skipped")`
			`}`
			`ctxNoLogger := operations.WithLogger(ctx, noop)`
			`if operations.Equal(ctxNoLogger, src, dst) {`
			`whichHashType := func(f fs.Info) hash.Type {`
			`ht := getHashType(f.Name())`
			`if ht == hash.None && b.opt.Compare.SlowHashSyncOnly && !b.opt.Resync {`
			`ht = f.Hashes().GetOne()`
			`}`
			`return ht`
			`}`
			`srcHash, _ := src.Hash(ctx, whichHashType(src.Fs()))`
			`dstHash, _ := dst.Hash(ctx, whichHashType(dst.Fs()))`
			`srcHash, _ = tryDownloadHash(ctx, src, srcHash)`
			`dstHash, _ = tryDownloadHash(ctx, dst, dstHash)`
			`equal = !hashDiffers(srcHash, dstHash, whichHashType(src.Fs()), whichHashType(dst.Fs()), src.Size(), dst.Size())`
			`}`
			`if equal {`
			`logger(ctx, operations.Match, src, dst, nil)`
			`fs.Debugf(src, "EqualFn: files are equal")`
			`return true`
			`}`
			`logger(ctx, operations.Differ, src, dst, nil)`
			`fs.Debugf(src, "EqualFn: files are NOT equal")`
			`return false`
			`}`
			`return operations.WithEqualFn(ctx, equalFn)`
			`}`