rclone/fs/operations.go

1395 lines
35 KiB
Go
Raw Normal View History

// Generic operations on filesystems and objects
package fs
import (
"fmt"
"io"
"log"
"mime"
"path"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/pkg/errors"
"golang.org/x/text/unicode/norm"
)
2015-09-23 01:47:16 +08:00
// CalculateModifyWindow works out modify window for Fses passed in -
// sets Config.ModifyWindow
//
// This is the largest modify window of all the fses in use, and the
// user configured value
func CalculateModifyWindow(fs ...Fs) {
for _, f := range fs {
if f != nil {
precision := f.Precision()
if precision > Config.ModifyWindow {
Config.ModifyWindow = precision
}
if precision == ModTimeNotSupported {
Debug(f, "Modify window not supported")
return
}
}
}
Debug(fs[0], "Modify window is %s", Config.ModifyWindow)
}
// HashEquals checks to see if src == dst, but ignores empty strings
// and returns true if either is empty.
func HashEquals(src, dst string) bool {
if src == "" || dst == "" {
return true
}
return src == dst
}
// CheckHashes checks the two files to see if they have common
// known hash types and compares them
//
// Returns
//
// equal - which is equality of the hashes
//
// hash - the HashType. This is HashNone if either of the hashes were
// unset or a compatible hash couldn't be found.
//
// err - may return an error which will already have been logged
//
// If an error is returned it will return equal as false
func CheckHashes(src, dst Object) (equal bool, hash HashType, err error) {
common := src.Fs().Hashes().Overlap(dst.Fs().Hashes())
// Debug(nil, "Shared hashes: %v", common)
if common.Count() == 0 {
return true, HashNone, nil
}
hash = common.GetOne()
srcHash, err := src.Hash(hash)
if err != nil {
Stats.Error()
2016-06-18 16:32:14 +08:00
ErrorLog(src, "Failed to calculate src hash: %v", err)
return false, hash, err
}
if srcHash == "" {
return true, HashNone, nil
}
dstHash, err := dst.Hash(hash)
if err != nil {
Stats.Error()
2016-06-18 16:32:14 +08:00
ErrorLog(dst, "Failed to calculate dst hash: %v", err)
return false, hash, err
}
if dstHash == "" {
return true, HashNone, nil
}
return srcHash == dstHash, hash, nil
}
2015-09-23 01:47:16 +08:00
// Equal checks to see if the src and dst objects are equal by looking at
// size, mtime and hash
//
// If the src and dst size are different then it is considered to be
// not equal. If --size-only is in effect then this is the only check
2016-06-18 00:20:08 +08:00
// that is done. If --ignore-size is in effect then this check is
// skipped and the files are considered the same size.
//
// If the size is the same and the mtime is the same then it is
// considered to be equal. This check is skipped if using --checksum.
//
// If the size is the same and mtime is different, unreadable or
// --checksum is set and the hash is the same then the file is
// considered to be equal. In this case the mtime on the dst is
// updated if --checksum is not set.
//
// Otherwise the file is considered to be not equal including if there
// were errors reading info.
func Equal(src, dst Object) bool {
2016-06-18 00:20:08 +08:00
if !Config.IgnoreSize {
if src.Size() != dst.Size() {
Debug(src, "Sizes differ")
return false
}
}
if Config.SizeOnly {
Debug(src, "Sizes identical")
return true
}
var srcModTime time.Time
if !Config.CheckSum {
if Config.ModifyWindow == ModTimeNotSupported {
Debug(src, "Sizes identical")
return true
}
// Size the same so check the mtime
srcModTime = src.ModTime()
dstModTime := dst.ModTime()
dt := dstModTime.Sub(srcModTime)
ModifyWindow := Config.ModifyWindow
if dt >= ModifyWindow || dt <= -ModifyWindow {
Debug(src, "Modification times differ by %s: %v, %v", dt, srcModTime, dstModTime)
} else {
Debug(src, "Size and modification time the same (differ by %s, within tolerance %s)", dt, ModifyWindow)
return true
}
}
// mtime is unreadable or different but size is the same so
// check the hash
same, hash, _ := CheckHashes(src, dst)
if !same {
Debug(src, "Hash differ")
return false
}
if !Config.CheckSum {
// Size and hash the same but mtime different so update the
// mtime of the dst object here
err := dst.SetModTime(srcModTime)
if err == ErrorCantSetModTime {
Debug(src, "src and dst identical but can't set mod time without re-uploading")
return false
} else if err != nil {
Stats.Error()
2016-06-18 16:32:14 +08:00
ErrorLog(dst, "Failed to read set modification time: %v", err)
}
}
if hash == HashNone {
Debug(src, "Size of src and dst objects identical")
} else {
Debug(src, "Size and %v of src and dst objects identical", hash)
}
return true
}
2015-09-23 01:47:16 +08:00
// MimeType returns a guess at the mime type from the extension
func MimeType(o ObjectInfo) string {
mimeType := mime.TypeByExtension(path.Ext(o.Remote()))
if !strings.ContainsRune(mimeType, '/') {
mimeType = "application/octet-stream"
}
return mimeType
}
// Used to remove a failed copy
//
// Returns whether the file was succesfully removed or not
func removeFailedCopy(dst Object) bool {
if dst == nil {
return false
}
Debug(dst, "Removing failed copy")
removeErr := dst.Remove()
if removeErr != nil {
Debug(dst, "Failed to remove failed copy: %s", removeErr)
return false
}
return true
}
// Copy src object to dst or f if nil
//
// If dst is nil then the object must not exist already. If you do
// call Copy() with dst nil on a pre-existing file then some filing
// systems (eg Drive) may duplicate the file.
func Copy(f Fs, dst, src Object) {
maxTries := Config.LowLevelRetries
tries := 0
doUpdate := dst != nil
var err error
var actionTaken string
for {
// Try server side copy first - if has optional interface and
// is same underlying remote
actionTaken = "Copied (server side copy)"
if fCopy, ok := f.(Copier); ok && src.Fs().Name() == f.Name() {
var newDst Object
newDst, err = fCopy.Copy(src, src.Remote())
if err == nil {
dst = newDst
}
} else {
err = ErrorCantCopy
}
// If can't server side copy, do it manually
if err == ErrorCantCopy {
var in0 io.ReadCloser
in0, err = src.Open()
if err != nil {
err = errors.Wrap(err, "failed to open source object")
} else {
// On big files add a buffer
if src.Size() > 10<<20 {
in0, _ = newAsyncReader(in0, 4, 4<<20)
}
in := NewAccount(in0, src) // account the transfer
if doUpdate {
actionTaken = "Copied (updated existing)"
err = dst.Update(in, src)
} else {
actionTaken = "Copied (new)"
dst, err = f.Put(in, src)
}
closeErr := in.Close()
if err == nil {
err = closeErr
}
}
}
tries++
if tries >= maxTries {
break
}
// Retry if err returned a retry error
if IsRetryError(err) || ShouldRetry(err) {
Debug(src, "Received error: %v - low level retry %d/%d", err, tries, maxTries)
if removeFailedCopy(dst) {
// If we removed dst, then nil it out and note we are not updating
dst = nil
doUpdate = false
}
continue
}
// otherwise finish
break
}
if err != nil {
Stats.Error()
2016-06-18 16:32:14 +08:00
ErrorLog(src, "Failed to copy: %v", err)
removeFailedCopy(dst)
return
}
// Verify sizes are the same after transfer
2016-06-18 00:20:08 +08:00
if !Config.IgnoreSize && src.Size() != dst.Size() {
Stats.Error()
err = errors.Errorf("corrupted on transfer: sizes differ %d vs %d", src.Size(), dst.Size())
2016-06-18 16:32:14 +08:00
ErrorLog(dst, "%v", err)
removeFailedCopy(dst)
return
}
// Verify hashes are the same after transfer - ignoring blank hashes
// TODO(klauspost): This could be extended, so we always create a hash type matching
// the destination, and calculate it while sending.
common := src.Fs().Hashes().Overlap(dst.Fs().Hashes())
// Debug(src, "common hashes: %v", common)
if !Config.SizeOnly && common.Count() > 0 {
// Get common hash type
hashType := common.GetOne()
srcSum, err := src.Hash(hashType)
if err != nil {
Stats.Error()
2016-06-18 16:32:14 +08:00
ErrorLog(src, "Failed to read src hash: %v", err)
} else if srcSum != "" {
dstSum, err := dst.Hash(hashType)
if err != nil {
Stats.Error()
2016-06-18 16:32:14 +08:00
ErrorLog(dst, "Failed to read hash: %v", err)
} else if !HashEquals(srcSum, dstSum) {
Stats.Error()
err = errors.Errorf("corrupted on transfer: %v hash differ %q vs %q", hashType, srcSum, dstSum)
2016-06-18 16:32:14 +08:00
ErrorLog(dst, "%v", err)
removeFailedCopy(dst)
return
}
}
}
Debug(src, actionTaken)
}
// Check to see if src needs to be copied to dst and if so puts it in out
func checkOne(pair ObjectPair, out ObjectPairChan) {
src, dst := pair.src, pair.dst
if dst == nil {
Debug(src, "Couldn't find file - need to transfer")
out <- pair
return
}
// Check to see if can store this
if !src.Storable() {
return
}
// If we should ignore existing files, don't transfer
if Config.IgnoreExisting {
Debug(src, "Destination exists, skipping")
return
}
// If we should upload unconditionally
if Config.IgnoreTimes {
Debug(src, "Uploading unconditionally as --ignore-times is in use")
out <- pair
return
}
// If UpdateOlder is in effect, skip if dst is newer than src
if Config.UpdateOlder {
srcModTime := src.ModTime()
dstModTime := dst.ModTime()
dt := dstModTime.Sub(srcModTime)
// If have a mutually agreed precision then use that
modifyWindow := Config.ModifyWindow
if modifyWindow == ModTimeNotSupported {
// Otherwise use 1 second as a safe default as
// the resolution of the time a file was
// uploaded.
modifyWindow = time.Second
}
switch {
case dt >= modifyWindow:
Debug(src, "Destination is newer than source, skipping")
return
case dt <= -modifyWindow:
Debug(src, "Destination is older than source, transferring")
default:
if src.Size() == dst.Size() {
Debug(src, "Destination mod time is within %v of source and sizes identical, skipping", modifyWindow)
return
}
Debug(src, "Destination mod time is within %v of source but sizes differ, transferring", modifyWindow)
}
} else {
// Check to see if changed or not
if Equal(src, dst) {
Debug(src, "Unchanged skipping")
return
}
}
out <- pair
}
2015-09-23 01:47:16 +08:00
// PairChecker reads Objects~s on in send to out if they need transferring.
//
// FIXME potentially doing lots of hashes at once
func PairChecker(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) {
defer wg.Done()
for pair := range in {
src := pair.src
Stats.Checking(src.Remote())
checkOne(pair, out)
Stats.DoneChecking(src.Remote())
}
}
2015-09-23 01:47:16 +08:00
// PairCopier reads Objects on in and copies them.
func PairCopier(in ObjectPairChan, fdst Fs, wg *sync.WaitGroup) {
defer wg.Done()
for pair := range in {
src := pair.src
Stats.Transferring(src.Remote())
if Config.DryRun {
Log(src, "Not copying as --dry-run")
} else {
Copy(fdst, pair.dst, src)
}
Stats.DoneTransferring(src.Remote())
}
}
2015-09-23 01:47:16 +08:00
// PairMover reads Objects on in and moves them if possible, or copies
// them if not
func PairMover(in ObjectPairChan, fdst Fs, wg *sync.WaitGroup) {
defer wg.Done()
// See if we have Move available
fdstMover, haveMover := fdst.(Mover)
for pair := range in {
src := pair.src
dst := pair.dst
Stats.Transferring(src.Remote())
if Config.DryRun {
Log(src, "Not moving as --dry-run")
} else if haveMover && src.Fs().Name() == fdst.Name() {
// Delete destination if it exists
if pair.dst != nil {
err := dst.Remove()
if err != nil {
Stats.Error()
2015-09-22 14:31:12 +08:00
ErrorLog(dst, "Couldn't delete: %v", err)
}
}
2015-09-22 14:31:12 +08:00
_, err := fdstMover.Move(src, src.Remote())
if err != nil {
Stats.Error()
ErrorLog(dst, "Couldn't move: %v", err)
} else {
Debug(src, "Moved")
}
} else {
Copy(fdst, pair.dst, src)
}
Stats.DoneTransferring(src.Remote())
}
}
// DeleteFile deletes a single file respecting --dry-run and accumulating stats and errors.
func DeleteFile(dst Object) (err error) {
if Config.DryRun {
Log(dst, "Not deleting as --dry-run")
} else {
Stats.Checking(dst.Remote())
err = dst.Remove()
Stats.DoneChecking(dst.Remote())
if err != nil {
Stats.Error()
2016-06-18 16:32:14 +08:00
ErrorLog(dst, "Couldn't delete: %v", err)
} else {
Debug(dst, "Deleted")
}
}
return err
}
2015-09-23 01:47:16 +08:00
// DeleteFiles removes all the files passed in the channel
func DeleteFiles(toBeDeleted ObjectsChan) error {
var wg sync.WaitGroup
wg.Add(Config.Transfers)
var errorCount int32
for i := 0; i < Config.Transfers; i++ {
go func() {
defer wg.Done()
2015-09-23 01:47:16 +08:00
for dst := range toBeDeleted {
err := DeleteFile(dst)
if err != nil {
atomic.AddInt32(&errorCount, 1)
}
}
}()
}
Log(nil, "Waiting for deletions to finish")
wg.Wait()
if errorCount > 0 {
return errors.Errorf("failed to delete %d files", errorCount)
}
return nil
}
// Read a Objects into add() for the given Fs.
2016-04-24 04:46:52 +08:00
// dir is the start directory, "" for root
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
func readFilesFn(fs Fs, includeAll bool, dir string, add func(Object)) (err error) {
list := NewLister()
if !includeAll {
list.SetFilter(Config.Filter)
list.SetLevel(Config.MaxDepth)
}
2016-04-24 04:46:52 +08:00
list.Start(fs, dir)
for {
o, err := list.GetObject()
if err != nil {
return err
}
// Check if we are finished
if o == nil {
break
}
// Make sure we don't delete excluded files if not required
if includeAll || Config.Filter.IncludeObject(o) {
add(o)
} else {
Debug(o, "Excluded from sync (and deletion)")
}
}
return nil
}
// Read a map of Object.Remote to Object for the given Fs.
// dir is the start directory, "" for root
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
//
// This also detects duplicates and normalised duplicates
func readFilesMap(fs Fs, includeAll bool, dir string) (files map[string]Object, err error) {
files = make(map[string]Object)
normalised := make(map[string]struct{})
err = readFilesFn(fs, includeAll, dir, func(o Object) {
remote := o.Remote()
normalisedRemote := strings.ToLower(norm.NFC.String(remote))
if _, ok := files[remote]; !ok {
files[remote] = o
if _, ok := normalised[normalisedRemote]; ok {
Log(o, "Warning: File found with same name but different case on %v", o.Fs())
}
} else {
Log(o, "Duplicate file detected")
}
normalised[normalisedRemote] = struct{}{}
})
return files, err
}
// readFilesMaps runs readFilesMap on fdst and fsrc at the same time
2016-04-24 04:46:52 +08:00
// dir is the start directory, "" for root
func readFilesMaps(fdst Fs, fdstIncludeAll bool, fsrc Fs, fsrcIncludeAll bool, dir string) (dstFiles, srcFiles map[string]Object, err error) {
var wg sync.WaitGroup
var srcErr, dstErr error
list := func(fs Fs, includeAll bool, pMap *map[string]Object, pErr *error) {
defer wg.Done()
Log(fs, "Building file list")
files, listErr := readFilesMap(fs, includeAll, dir)
if listErr != nil {
ErrorLog(fs, "Error building file list: %v", listErr)
*pErr = listErr
} else {
Debug(fs, "Done building file list")
*pMap = files
}
}
wg.Add(2)
go list(fdst, fdstIncludeAll, &dstFiles, &srcErr)
go list(fsrc, fsrcIncludeAll, &srcFiles, &dstErr)
wg.Wait()
if srcErr != nil {
err = srcErr
}
if dstErr != nil {
err = dstErr
}
return dstFiles, srcFiles, err
}
2015-09-23 01:47:16 +08:00
// Same returns true if fdst and fsrc point to the same underlying Fs
func Same(fdst, fsrc Fs) bool {
return fdst.Name() == fsrc.Name() && fdst.Root() == fsrc.Root()
}
type syncCopyMove struct {
// parameters
fdst Fs
fsrc Fs
Delete bool
DoMove bool
dir string
// internal state
noTraverse bool // if set don't trafevers the dst
deleteBefore bool // set if we must delete objects before copying
dstFiles map[string]Object // dst files, only used if Delete
srcFiles map[string]Object // src files, only used if deleteBefore
srcFilesChan chan Object // passes src objects
srcFilesResult chan error // error result of src listing
dstFilesResult chan error // error result of dst listing
checkerWg sync.WaitGroup // wait for checkers
toBeChecked ObjectPairChan // checkers channel
copierWg sync.WaitGroup // wait for copiers
toBeUploaded ObjectPairChan // copiers channel
}
func newSyncCopyMove(fdst, fsrc Fs, Delete bool, DoMove bool) *syncCopyMove {
s := &syncCopyMove{
fdst: fdst,
fsrc: fsrc,
Delete: Delete,
DoMove: DoMove,
dir: "",
srcFilesChan: make(chan Object, Config.Checkers+Config.Transfers),
srcFilesResult: make(chan error, 1),
dstFilesResult: make(chan error, 1),
noTraverse: Config.NoTraverse,
toBeChecked: make(ObjectPairChan, Config.Transfers),
toBeUploaded: make(ObjectPairChan, Config.Transfers),
deleteBefore: Delete && Config.DeleteBefore,
}
if s.noTraverse && s.Delete {
Debug(s.fdst, "Ignoring --no-traverse with sync")
s.noTraverse = false
}
return s
}
// This reads the source files from s.srcFiles into srcFilesChan then
// closes it
//
// It returns the final result of the read into s.srcFilesResult
func (s *syncCopyMove) readSrcUsingMap() {
for _, o := range s.srcFiles {
s.srcFilesChan <- o
}
close(s.srcFilesChan)
s.srcFilesResult <- nil
}
// This reads the source files into srcFilesChan then closes it
//
// It returns the final result of the read into s.srcFilesResult
func (s *syncCopyMove) readSrcUsingChan() {
err := readFilesFn(s.fsrc, false, s.dir, func(o Object) {
s.srcFilesChan <- o
})
close(s.srcFilesChan)
s.srcFilesResult <- err
}
// This reads the destination files in into dstFiles
//
// It returns the final result of the read into s.dstFilesResult
func (s *syncCopyMove) readDstFiles() {
var err error
s.dstFiles, err = readFilesMap(s.fdst, Config.Filter.DeleteExcluded, s.dir)
s.dstFilesResult <- err
}
// This deletes the files in the dstFiles map. If checkSrcMap is set
// then it checks to see if they exist first in srcFiles the source
// file map, otherwise it unconditionally deletes them. If
// checkSrcMap is clear then it assumes that the any source files that
// have been found have been removed from dstFiles already.
func (s *syncCopyMove) deleteFiles(checkSrcMap bool) error {
if Stats.Errored() {
ErrorLog(s.fdst, "%v", ErrorNotDeleting)
return ErrorNotDeleting
}
// Delete the spare files
toDelete := make(ObjectsChan, Config.Transfers)
go func() {
for remote, o := range s.dstFiles {
if checkSrcMap {
_, exists := s.srcFiles[remote]
if !exists {
toDelete <- o
}
} else {
toDelete <- o
}
}
close(toDelete)
}()
return DeleteFiles(toDelete)
}
// This starts the background checkers.
func (s *syncCopyMove) startCheckers() {
s.checkerWg.Add(Config.Checkers)
for i := 0; i < Config.Checkers; i++ {
go PairChecker(s.toBeChecked, s.toBeUploaded, &s.checkerWg)
}
}
// This stops the background checkers
func (s *syncCopyMove) stopCheckers() {
close(s.toBeChecked)
Log(s.fdst, "Waiting for checks to finish")
s.checkerWg.Wait()
}
// This starts the background transfers
func (s *syncCopyMove) startTransfers() {
s.copierWg.Add(Config.Transfers)
for i := 0; i < Config.Transfers; i++ {
if s.DoMove {
go PairMover(s.toBeUploaded, s.fdst, &s.copierWg)
} else {
go PairCopier(s.toBeUploaded, s.fdst, &s.copierWg)
}
}
}
// This stops the background transfers
func (s *syncCopyMove) stopTransfers() {
close(s.toBeUploaded)
Log(s.fdst, "Waiting for transfers to finish")
s.copierWg.Wait()
}
// Syncs fsrc into fdst
//
// If Delete is true then it deletes any files in fdst that aren't in fsrc
//
// If DoMove is true then files will be moved instead of copied
2016-04-24 04:46:52 +08:00
//
// dir is the start directory, "" for root
func (s *syncCopyMove) run() error {
if Same(s.fdst, s.fsrc) {
ErrorLog(s.fdst, "Nothing to do as source and destination are the same")
return nil
}
err := Mkdir(s.fdst)
if err != nil {
return err
}
// Start reading dstFiles if required
if !s.noTraverse {
go s.readDstFiles()
}
// If s.deleteBefore then we need to read the whole source map first
if s.deleteBefore {
// Read source files into the map
s.srcFiles, err = readFilesMap(s.fsrc, false, s.dir)
if err != nil {
return err
}
// Pump the map into s.srcFilesChan
go s.readSrcUsingMap()
} else {
go s.readSrcUsingChan()
}
// Wait for dstfiles to finish reading if we were reading them
// and report any errors
if !s.noTraverse {
err = <-s.dstFilesResult
if err != nil {
return err
}
}
// Delete files first if required
// Have dstFiles and srcFiles complete at this point
if s.deleteBefore {
err = s.deleteFiles(true)
if err != nil {
return err
}
}
// Start background checking and transferring pipeline
s.startCheckers()
s.startTransfers()
// Do the transfers
for src := range s.srcFilesChan {
remote := src.Remote()
var dst Object
if s.noTraverse {
var err error
dst, err = s.fdst.NewObject(remote)
if err != nil {
dst = nil
if err != ErrorObjectNotFound {
Debug(src, "Error making NewObject: %v", err)
}
}
} else {
dst = s.dstFiles[remote]
// Remove file from s.dstFiles because it exists in srcFiles
delete(s.dstFiles, remote)
}
if dst != nil {
s.toBeChecked <- ObjectPair{src, dst}
} else {
// No need to check since doesn't exist
s.toBeUploaded <- ObjectPair{src, nil}
}
}
// Stop background checking and transferring pipeline
s.stopCheckers()
s.stopTransfers()
// Retrieve the delayed error from the source listing goroutine
err = <-s.srcFilesResult
if err != nil {
return err
}
// Delete files during or after
if s.Delete && (Config.DeleteDuring || Config.DeleteAfter) {
err = s.deleteFiles(false)
if err != nil {
return err
}
}
return nil
}
2015-09-23 01:47:16 +08:00
// Sync fsrc into fdst
func Sync(fdst, fsrc Fs) error {
return newSyncCopyMove(fdst, fsrc, true, false).run()
}
2015-09-23 01:47:16 +08:00
// CopyDir copies fsrc into fdst
func CopyDir(fdst, fsrc Fs) error {
return newSyncCopyMove(fdst, fsrc, false, false).run()
}
// moveDir moves fsrc into fdst
func moveDir(fdst, fsrc Fs) error {
return newSyncCopyMove(fdst, fsrc, false, true).run()
}
2015-09-23 01:47:16 +08:00
// MoveDir moves fsrc into fdst
func MoveDir(fdst, fsrc Fs) error {
2015-09-23 01:47:16 +08:00
if Same(fdst, fsrc) {
ErrorLog(fdst, "Nothing to do as source and destination are the same")
return nil
}
// First attempt to use DirMover if exists, same Fs and no filters are active
if fdstDirMover, ok := fdst.(DirMover); ok && fsrc.Name() == fdst.Name() && Config.Filter.InActive() {
err := fdstDirMover.DirMove(fsrc)
Debug(fdst, "Using server side directory move")
switch err {
case ErrorCantDirMove, ErrorDirExists:
Debug(fdst, "Server side directory move failed - fallback to copy/delete: %v", err)
case nil:
Debug(fdst, "Server side directory move succeeded")
return nil
default:
Stats.Error()
ErrorLog(fdst, "Server side directory move failed: %v", err)
return err
}
}
// Now move the files
err := moveDir(fdst, fsrc)
if err != nil || Stats.Errored() {
ErrorLog(fdst, "Not deleting files as there were IO errors")
return err
}
// If no filters then purge
if Config.Filter.InActive() {
return Purge(fsrc)
}
// Otherwise remove any remaining files obeying filters
err = Delete(fsrc)
if err != nil {
return err
}
// and try to remove the directory if empty - ignoring error
_ = TryRmdir(fsrc)
return nil
}
// checkIdentical checks to see if dst and src are identical
//
// it returns true if differences were found
func checkIdentical(dst, src Object) bool {
Stats.Checking(src.Remote())
defer Stats.DoneChecking(src.Remote())
if src.Size() != dst.Size() {
Stats.Error()
ErrorLog(src, "Sizes differ")
return true
}
if !Config.SizeOnly {
same, _, err := CheckHashes(src, dst)
if err != nil {
// CheckHashes will log and count errors
return true
}
if !same {
Stats.Error()
ErrorLog(src, "Md5sums differ")
return true
}
}
Debug(src, "OK")
return false
}
// Check the files in fsrc and fdst according to Size and hash
func Check(fdst, fsrc Fs) error {
2016-04-24 04:46:52 +08:00
dstFiles, srcFiles, err := readFilesMaps(fdst, false, fsrc, false, "")
if err != nil {
return err
}
differences := int32(0)
// FIXME could do this as it goes along and make it use less
// memory.
// Move all the common files into commonFiles and delete then
// from srcFiles and dstFiles
commonFiles := make(map[string][]Object)
for remote, src := range srcFiles {
if dst, ok := dstFiles[remote]; ok {
commonFiles[remote] = []Object{dst, src}
delete(srcFiles, remote)
delete(dstFiles, remote)
}
}
Log(fdst, "%d files not in %v", len(dstFiles), fsrc)
for _, dst := range dstFiles {
Stats.Error()
ErrorLog(dst, "File not in %v", fsrc)
atomic.AddInt32(&differences, 1)
}
Log(fsrc, "%d files not in %s", len(srcFiles), fdst)
for _, src := range srcFiles {
Stats.Error()
ErrorLog(src, "File not in %v", fdst)
atomic.AddInt32(&differences, 1)
}
checks := make(chan []Object, Config.Transfers)
go func() {
for _, check := range commonFiles {
checks <- check
}
close(checks)
}()
var checkerWg sync.WaitGroup
checkerWg.Add(Config.Checkers)
for i := 0; i < Config.Checkers; i++ {
go func() {
defer checkerWg.Done()
for check := range checks {
if checkIdentical(check[0], check[1]) {
atomic.AddInt32(&differences, 1)
}
}
}()
}
Log(fdst, "Waiting for checks to finish")
checkerWg.Wait()
Log(fdst, "%d differences found", Stats.GetErrors())
if differences > 0 {
return errors.Errorf("%d differences found", differences)
}
return nil
}
2015-09-23 01:47:16 +08:00
// ListFn lists the Fs to the supplied function
//
// Lists in parallel which may get them out of order
func ListFn(f Fs, fn func(Object)) error {
list := NewLister().SetFilter(Config.Filter).SetLevel(Config.MaxDepth).Start(f, "")
var wg sync.WaitGroup
wg.Add(Config.Checkers)
for i := 0; i < Config.Checkers; i++ {
go func() {
defer wg.Done()
for {
o, err := list.GetObject()
if err != nil {
log.Fatal(err)
}
// check if we are finished
if o == nil {
return
}
if Config.Filter.IncludeObject(o) {
fn(o)
}
}
}()
}
wg.Wait()
return nil
}
// mutex for synchronized output
var outMutex sync.Mutex
// Synchronized fmt.Fprintf
2015-09-22 14:31:12 +08:00
//
// Ignores errors from Fprintf
func syncFprintf(w io.Writer, format string, a ...interface{}) {
outMutex.Lock()
defer outMutex.Unlock()
2015-09-22 14:31:12 +08:00
_, _ = fmt.Fprintf(w, format, a...)
}
// List the Fs to the supplied writer
//
// Shows size and path - obeys includes and excludes
//
// Lists in parallel which may get them out of order
func List(f Fs, w io.Writer) error {
return ListFn(f, func(o Object) {
syncFprintf(w, "%9d %s\n", o.Size(), o.Remote())
})
}
2015-09-23 01:47:16 +08:00
// ListLong lists the Fs to the supplied writer
//
// Shows size, mod time and path - obeys includes and excludes
//
// Lists in parallel which may get them out of order
func ListLong(f Fs, w io.Writer) error {
return ListFn(f, func(o Object) {
Stats.Checking(o.Remote())
modTime := o.ModTime()
Stats.DoneChecking(o.Remote())
syncFprintf(w, "%9d %s %s\n", o.Size(), modTime.Local().Format("2006-01-02 15:04:05.000000000"), o.Remote())
})
}
2015-09-23 01:47:16 +08:00
// Md5sum list the Fs to the supplied writer
//
// Produces the same output as the md5sum command - obeys includes and
// excludes
//
// Lists in parallel which may get them out of order
func Md5sum(f Fs, w io.Writer) error {
return hashLister(HashMD5, f, w)
}
// Sha1sum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func Sha1sum(f Fs, w io.Writer) error {
return hashLister(HashSHA1, f, w)
}
func hashLister(ht HashType, f Fs, w io.Writer) error {
return ListFn(f, func(o Object) {
Stats.Checking(o.Remote())
sum, err := o.Hash(ht)
Stats.DoneChecking(o.Remote())
if err == ErrHashUnsupported {
sum = "UNSUPPORTED"
} else if err != nil {
Debug(o, "Failed to read %v: %v", ht, err)
sum = "ERROR"
}
2016-01-17 21:56:00 +08:00
syncFprintf(w, "%*s %s\n", HashWidth[ht], sum, o.Remote())
})
}
// Count counts the objects and their sizes in the Fs
//
// Obeys includes and excludes
func Count(f Fs) (objects int64, size int64, err error) {
err = ListFn(f, func(o Object) {
atomic.AddInt64(&objects, 1)
atomic.AddInt64(&size, o.Size())
})
return
}
2015-09-23 01:47:16 +08:00
// ListDir lists the directories/buckets/containers in the Fs to the supplied writer
func ListDir(f Fs, w io.Writer) error {
level := 1
if Config.MaxDepth > 0 {
level = Config.MaxDepth
}
list := NewLister().SetLevel(level).Start(f, "")
for {
dir, err := list.GetDir()
if err != nil {
log.Fatal(err)
}
if dir == nil {
break
}
syncFprintf(w, "%12d %13s %9d %s\n", dir.Bytes, dir.When.Format("2006-01-02 15:04:05"), dir.Count, dir.Name)
}
return nil
}
2015-09-23 01:47:16 +08:00
// Mkdir makes a destination directory or container
func Mkdir(f Fs) error {
if Config.DryRun {
Log(f, "Not making directory as dry run is set")
return nil
}
err := f.Mkdir()
if err != nil {
Stats.Error()
return err
}
return nil
}
// TryRmdir removes a container but not if not empty. It doesn't
// count errors but may return one.
func TryRmdir(f Fs) error {
if Config.DryRun {
Log(f, "Not deleting as dry run is set")
return nil
}
return f.Rmdir()
}
// Rmdir removes a container but not if not empty
func Rmdir(f Fs) error {
err := TryRmdir(f)
if err != nil {
Stats.Error()
return err
}
return err
}
2015-09-23 01:47:16 +08:00
// Purge removes a container and all of its contents
//
// FIXME doesn't delete local directories
func Purge(f Fs) error {
doFallbackPurge := true
2014-07-26 01:19:49 +08:00
var err error
if purger, ok := f.(Purger); ok {
doFallbackPurge = false
2014-07-13 17:45:13 +08:00
if Config.DryRun {
Log(f, "Not purging as --dry-run set")
2014-07-13 17:45:13 +08:00
} else {
2014-07-26 01:19:49 +08:00
err = purger.Purge()
if err == ErrorCantPurge {
doFallbackPurge = true
}
}
}
if doFallbackPurge {
2014-07-26 01:19:49 +08:00
// DeleteFiles and Rmdir observe --dry-run
2016-04-24 04:46:52 +08:00
list := NewLister().Start(f, "")
err = DeleteFiles(listToChan(list))
if err != nil {
return err
}
2014-07-26 01:19:49 +08:00
err = Rmdir(f)
}
if err != nil {
Stats.Error()
return err
}
return nil
}
// Delete removes all the contents of a container. Unlike Purge, it
// obeys includes and excludes.
func Delete(f Fs) error {
delete := make(ObjectsChan, Config.Transfers)
delErr := make(chan error, 1)
go func() {
delErr <- DeleteFiles(delete)
}()
err := ListFn(f, func(o Object) {
delete <- o
})
close(delete)
delError := <-delErr
if err == nil {
err = delError
}
return err
}
// dedupeRename renames the objs slice to different names
func dedupeRename(remote string, objs []Object) {
f := objs[0].Fs()
mover, ok := f.(Mover)
if !ok {
log.Fatalf("Fs %v doesn't support Move", f)
}
ext := path.Ext(remote)
base := remote[:len(remote)-len(ext)]
for i, o := range objs {
newName := fmt.Sprintf("%s-%d%s", base, i+1, ext)
if !Config.DryRun {
newObj, err := mover.Move(o, newName)
if err != nil {
Stats.Error()
ErrorLog(o, "Failed to rename: %v", err)
continue
}
Log(newObj, "renamed from: %v", o)
} else {
Log(remote, "Not renaming to %q as --dry-run", newName)
}
}
}
// dedupeDeleteAllButOne deletes all but the one in keep
func dedupeDeleteAllButOne(keep int, remote string, objs []Object) {
for i, o := range objs {
if i == keep {
continue
}
_ = DeleteFile(o)
}
Log(remote, "Deleted %d extra copies", len(objs)-1)
}
// dedupeDeleteIdentical deletes all but one of identical (by hash) copies
func dedupeDeleteIdentical(remote string, objs []Object) []Object {
// See how many of these duplicates are identical
byHash := make(map[string][]Object, len(objs))
for _, o := range objs {
md5sum, err := o.Hash(HashMD5)
if err == nil {
byHash[md5sum] = append(byHash[md5sum], o)
}
}
// Delete identical duplicates, refilling obj with the ones remaining
objs = nil
for md5sum, hashObjs := range byHash {
if len(hashObjs) > 1 {
Log(remote, "Deleting %d/%d identical duplicates (md5sum %q)", len(hashObjs)-1, len(hashObjs), md5sum)
for _, o := range hashObjs[1:] {
_ = DeleteFile(o)
}
}
objs = append(objs, hashObjs[0])
}
return objs
}
// dedupeInteractive interactively dedupes the slice of objects
func dedupeInteractive(remote string, objs []Object) {
fmt.Printf("%s: %d duplicates remain\n", remote, len(objs))
for i, o := range objs {
md5sum, err := o.Hash(HashMD5)
if err != nil {
md5sum = err.Error()
}
fmt.Printf(" %d: %12d bytes, %s, md5sum %32s\n", i+1, o.Size(), o.ModTime().Format("2006-01-02 15:04:05.000000000"), md5sum)
}
switch Command([]string{"sSkip and do nothing", "kKeep just one (choose which in next step)", "rRename all to be different (by changing file.jpg to file-1.jpg)"}) {
case 's':
case 'k':
keep := ChooseNumber("Enter the number of the file to keep", 1, len(objs))
dedupeDeleteAllButOne(keep-1, remote, objs)
case 'r':
dedupeRename(remote, objs)
}
}
type objectsSortedByModTime []Object
func (objs objectsSortedByModTime) Len() int { return len(objs) }
func (objs objectsSortedByModTime) Swap(i, j int) { objs[i], objs[j] = objs[j], objs[i] }
func (objs objectsSortedByModTime) Less(i, j int) bool {
return objs[i].ModTime().Before(objs[j].ModTime())
}
// DeduplicateMode is how the dedupe command chooses what to do
type DeduplicateMode int
// Deduplicate modes
const (
DeduplicateInteractive DeduplicateMode = iota // interactively ask the user
DeduplicateSkip // skip all conflicts
DeduplicateFirst // choose the first object
DeduplicateNewest // choose the newest object
DeduplicateOldest // choose the oldest object
DeduplicateRename // rename the objects
)
func (mode DeduplicateMode) String() string {
switch mode {
case DeduplicateInteractive:
return "interactive"
case DeduplicateSkip:
return "skip"
case DeduplicateFirst:
return "first"
case DeduplicateNewest:
return "newest"
case DeduplicateOldest:
return "oldest"
case DeduplicateRename:
return "rename"
}
return "unknown"
}
// Deduplicate interactively finds duplicate files and offers to
// delete all but one or rename them to be different. Only useful with
// Google Drive which can have duplicate file names.
func Deduplicate(f Fs, mode DeduplicateMode) error {
Log(f, "Looking for duplicates using %v mode.", mode)
files := map[string][]Object{}
2016-04-24 04:46:52 +08:00
list := NewLister().Start(f, "")
for {
o, err := list.GetObject()
if err != nil {
return err
}
// Check if we are finished
if o == nil {
break
}
remote := o.Remote()
files[remote] = append(files[remote], o)
}
for remote, objs := range files {
if len(objs) > 1 {
Log(remote, "Found %d duplicates - deleting identical copies", len(objs))
objs = dedupeDeleteIdentical(remote, objs)
if len(objs) <= 1 {
Log(remote, "All duplicates removed")
continue
}
switch mode {
case DeduplicateInteractive:
dedupeInteractive(remote, objs)
case DeduplicateFirst:
dedupeDeleteAllButOne(0, remote, objs)
case DeduplicateNewest:
sort.Sort(objectsSortedByModTime(objs)) // sort oldest first
dedupeDeleteAllButOne(len(objs)-1, remote, objs)
case DeduplicateOldest:
sort.Sort(objectsSortedByModTime(objs)) // sort oldest first
dedupeDeleteAllButOne(0, remote, objs)
case DeduplicateRename:
dedupeRename(remote, objs)
case DeduplicateSkip:
// skip
default:
//skip
}
}
}
return nil
}
// listToChan will transfer all incoming objects to a new channel.
//
// If an error occurs, the error will be logged, and it will close the
// channel.
//
// If the error was ErrorDirNotFound then it will be ignored
func listToChan(list *Lister) ObjectsChan {
o := make(ObjectsChan, Config.Checkers)
go func() {
defer close(o)
for {
obj, dir, err := list.Get()
if err != nil {
if err != ErrorDirNotFound {
Stats.Error()
ErrorLog(nil, "Failed to list: %v", err)
}
return
}
if dir == nil && obj == nil {
return
}
if o == nil {
continue
}
o <- obj
}
}()
return o
}
// CleanUp removes the trash for the Fs
func CleanUp(f Fs) error {
fc, ok := f.(CleanUpper)
if !ok {
return errors.Errorf("%v doesn't support cleanup", f)
}
if Config.DryRun {
Log(f, "Not running cleanup as --dry-run set")
return nil
}
return fc.CleanUp()
}