mirror of
https://github.com/rclone/rclone.git
synced 2024-11-22 12:36:38 +08:00
march: added flag to allow Unicode filenames to remain unique
If your filenames contain two near-identical Unicode characters, rclone will normalize these, making them identical. This flag gives you the ability to keep them unique. This might create unintended side effects, such as duplicating files that contain certain Unicode characters, when downloading them from certain cloud providers to a macOS filesystem. Fixes #4228
This commit is contained in:
parent
4006345cfb
commit
899c8e0697
|
@ -908,6 +908,20 @@ changed and won't need copying then you shouldn't use `--no-traverse`.
|
|||
|
||||
See [rclone copy](/commands/rclone_copy/) for an example of how to use it.
|
||||
|
||||
### --no-unicode-normalization ###
|
||||
|
||||
Don't normalize unicode characters in filenames during the sync routine.
|
||||
|
||||
Sometimes, an operating system will store filenames containing unicode
|
||||
parts in their decomposed form (particularly macOS). Some cloud storage
|
||||
systems will then recompose the unicode, resulting in duplicate files if
|
||||
the data is ever copied back to a local filesystem.
|
||||
|
||||
Using this flag will disable that functionality, treating each unicode
|
||||
character as unique. For example, by default é and é will be normalized
|
||||
into the same character. With `--no-unicode-normalization` they will be
|
||||
treated as unique characters.
|
||||
|
||||
### --no-update-modtime ###
|
||||
|
||||
When using this flag, rclone won't update modification times of remote
|
||||
|
|
|
@ -70,6 +70,7 @@ type ConfigInfo struct {
|
|||
IgnoreCaseSync bool
|
||||
NoTraverse bool
|
||||
NoCheckDest bool
|
||||
NoUnicodeNormalization bool
|
||||
NoUpdateModTime bool
|
||||
DataRateUnit string
|
||||
CompareDest string
|
||||
|
|
|
@ -75,6 +75,7 @@ func AddFlags(flagSet *pflag.FlagSet) {
|
|||
flags.BoolVarP(flagSet, &fs.Config.IgnoreCaseSync, "ignore-case-sync", "", fs.Config.IgnoreCaseSync, "Ignore case when synchronizing")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoTraverse, "no-traverse", "", fs.Config.NoTraverse, "Don't traverse destination file system on copy.")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoCheckDest, "no-check-dest", "", fs.Config.NoCheckDest, "Don't check the destination, copy regardless.")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoUnicodeNormalization, "no-unicode-normalization", "", fs.Config.NoUnicodeNormalization, "Don't normalize unicode characters in filenames.")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoUpdateModTime, "no-update-modtime", "", fs.Config.NoUpdateModTime, "Don't update destination mod-time if files identical.")
|
||||
flags.StringVarP(flagSet, &fs.Config.CompareDest, "compare-dest", "", fs.Config.CompareDest, "Include additional server-side path during comparison.")
|
||||
flags.StringVarP(flagSet, &fs.Config.CopyDest, "copy-dest", "", fs.Config.CopyDest, "Implies --compare-dest but also copies files from path into destination.")
|
||||
|
|
|
@ -22,15 +22,16 @@ import (
|
|||
// calling Callback for each match
|
||||
type March struct {
|
||||
// parameters
|
||||
Ctx context.Context // context for background goroutines
|
||||
Fdst fs.Fs // source Fs
|
||||
Fsrc fs.Fs // dest Fs
|
||||
Dir string // directory
|
||||
NoTraverse bool // don't traverse the destination
|
||||
SrcIncludeAll bool // don't include all files in the src
|
||||
DstIncludeAll bool // don't include all files in the destination
|
||||
Callback Marcher // object to call with results
|
||||
NoCheckDest bool // transfer all objects regardless without checking dst
|
||||
Ctx context.Context // context for background goroutines
|
||||
Fdst fs.Fs // source Fs
|
||||
Fsrc fs.Fs // dest Fs
|
||||
Dir string // directory
|
||||
NoTraverse bool // don't traverse the destination
|
||||
SrcIncludeAll bool // don't include all files in the src
|
||||
DstIncludeAll bool // don't include all files in the destination
|
||||
Callback Marcher // object to call with results
|
||||
NoCheckDest bool // transfer all objects regardless without checking dst
|
||||
NoUnicodeNormalization bool // don't normalize unicode characters in filenames
|
||||
// internal state
|
||||
srcListDir listDirFn // function to call to list a directory in the src
|
||||
dstListDir listDirFn // function to call to list a directory in the dst
|
||||
|
@ -55,7 +56,9 @@ func (m *March) init() {
|
|||
}
|
||||
// Now create the matching transform
|
||||
// ..normalise the UTF8 first
|
||||
m.transforms = append(m.transforms, norm.NFC.String)
|
||||
if !m.NoUnicodeNormalization {
|
||||
m.transforms = append(m.transforms, norm.NFC.String)
|
||||
}
|
||||
// ..if destination is caseInsensitive then make it lower case
|
||||
// case Insensitive | src | dst | lower case compare |
|
||||
// | No | No | No |
|
||||
|
|
|
@ -19,6 +19,7 @@ import (
|
|||
"github.com/rclone/rclone/fstest/mockobject"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// Some times used in the tests
|
||||
|
@ -313,6 +314,8 @@ func TestMatchListings(t *testing.T) {
|
|||
b = mockobject.Object("b")
|
||||
c = mockobject.Object("c")
|
||||
d = mockobject.Object("d")
|
||||
uE1 = mockobject.Object("é") // one of the unicode E characters
|
||||
uE2 = mockobject.Object("é") // a different unicode E character
|
||||
dirA = mockdir.New("A")
|
||||
dirb = mockdir.New("b")
|
||||
)
|
||||
|
@ -419,6 +422,28 @@ func TestMatchListings(t *testing.T) {
|
|||
},
|
||||
transforms: []matchTransformFn{strings.ToLower},
|
||||
},
|
||||
{
|
||||
what: "Unicode near-duplicate that becomes duplicate with normalization",
|
||||
input: fs.DirEntries{
|
||||
uE1, uE1,
|
||||
uE2, uE2,
|
||||
},
|
||||
matches: []matchPair{
|
||||
{uE1, uE1},
|
||||
},
|
||||
transforms: []matchTransformFn{norm.NFC.String},
|
||||
},
|
||||
{
|
||||
what: "Unicode near-duplicate with no normalization",
|
||||
input: fs.DirEntries{
|
||||
uE1, uE1,
|
||||
uE2, uE2,
|
||||
},
|
||||
matches: []matchPair{
|
||||
{uE1, uE1},
|
||||
{uE2, uE2},
|
||||
},
|
||||
},
|
||||
{
|
||||
what: "File and directory are not duplicates - srcOnly",
|
||||
input: fs.DirEntries{
|
||||
|
|
127
fs/sync/sync.go
127
fs/sync/sync.go
|
@ -30,42 +30,43 @@ type syncCopyMove struct {
|
|||
deleteEmptySrcDirs bool
|
||||
dir string
|
||||
// internal state
|
||||
ctx context.Context // internal context for controlling go-routines
|
||||
cancel func() // cancel the context
|
||||
noTraverse bool // if set don't traverse the dst
|
||||
noCheckDest bool // if set transfer all objects regardless without checking dst
|
||||
deletersWg sync.WaitGroup // for delete before go routine
|
||||
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
|
||||
trackRenames bool // set if we should do server side renames
|
||||
trackRenamesStrategy trackRenamesStrategy // stratgies used for tracking renames
|
||||
dstFilesMu sync.Mutex // protect dstFiles
|
||||
dstFiles map[string]fs.Object // dst files, always filled
|
||||
srcFiles map[string]fs.Object // src files, only used if deleteBefore
|
||||
srcFilesChan chan fs.Object // passes src objects
|
||||
srcFilesResult chan error // error result of src listing
|
||||
dstFilesResult chan error // error result of dst listing
|
||||
dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs
|
||||
dstEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
||||
srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs
|
||||
srcEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
||||
checkerWg sync.WaitGroup // wait for checkers
|
||||
toBeChecked *pipe // checkers channel
|
||||
transfersWg sync.WaitGroup // wait for transfers
|
||||
toBeUploaded *pipe // copiers channel
|
||||
errorMu sync.Mutex // Mutex covering the errors variables
|
||||
err error // normal error from copy process
|
||||
noRetryErr error // error with NoRetry set
|
||||
fatalErr error // fatal error
|
||||
commonHash hash.Type // common hash type between src and dst
|
||||
renameMapMu sync.Mutex // mutex to protect the below
|
||||
renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames
|
||||
renamerWg sync.WaitGroup // wait for renamers
|
||||
toBeRenamed *pipe // renamers channel
|
||||
trackRenamesWg sync.WaitGroup // wg for background track renames
|
||||
trackRenamesCh chan fs.Object // objects are pumped in here
|
||||
renameCheck []fs.Object // accumulate files to check for rename here
|
||||
compareCopyDest fs.Fs // place to check for files to server side copy
|
||||
backupDir fs.Fs // place to store overwrites/deletes
|
||||
ctx context.Context // internal context for controlling go-routines
|
||||
cancel func() // cancel the context
|
||||
noTraverse bool // if set don't traverse the dst
|
||||
noCheckDest bool // if set transfer all objects regardless without checking dst
|
||||
noUnicodeNormalization bool // don't normalize unicode characters in filenames
|
||||
deletersWg sync.WaitGroup // for delete before go routine
|
||||
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
|
||||
trackRenames bool // set if we should do server side renames
|
||||
trackRenamesStrategy trackRenamesStrategy // stratgies used for tracking renames
|
||||
dstFilesMu sync.Mutex // protect dstFiles
|
||||
dstFiles map[string]fs.Object // dst files, always filled
|
||||
srcFiles map[string]fs.Object // src files, only used if deleteBefore
|
||||
srcFilesChan chan fs.Object // passes src objects
|
||||
srcFilesResult chan error // error result of src listing
|
||||
dstFilesResult chan error // error result of dst listing
|
||||
dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs
|
||||
dstEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
||||
srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs
|
||||
srcEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
||||
checkerWg sync.WaitGroup // wait for checkers
|
||||
toBeChecked *pipe // checkers channel
|
||||
transfersWg sync.WaitGroup // wait for transfers
|
||||
toBeUploaded *pipe // copiers channel
|
||||
errorMu sync.Mutex // Mutex covering the errors variables
|
||||
err error // normal error from copy process
|
||||
noRetryErr error // error with NoRetry set
|
||||
fatalErr error // fatal error
|
||||
commonHash hash.Type // common hash type between src and dst
|
||||
renameMapMu sync.Mutex // mutex to protect the below
|
||||
renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames
|
||||
renamerWg sync.WaitGroup // wait for renamers
|
||||
toBeRenamed *pipe // renamers channel
|
||||
trackRenamesWg sync.WaitGroup // wg for background track renames
|
||||
trackRenamesCh chan fs.Object // objects are pumped in here
|
||||
renameCheck []fs.Object // accumulate files to check for rename here
|
||||
compareCopyDest fs.Fs // place to check for files to server side copy
|
||||
backupDir fs.Fs // place to store overwrites/deletes
|
||||
}
|
||||
|
||||
type trackRenamesStrategy byte
|
||||
|
@ -88,24 +89,25 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete
|
|||
return nil, fserrors.FatalError(fs.ErrorOverlapping)
|
||||
}
|
||||
s := &syncCopyMove{
|
||||
fdst: fdst,
|
||||
fsrc: fsrc,
|
||||
deleteMode: deleteMode,
|
||||
DoMove: DoMove,
|
||||
copyEmptySrcDirs: copyEmptySrcDirs,
|
||||
deleteEmptySrcDirs: deleteEmptySrcDirs,
|
||||
dir: "",
|
||||
srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers),
|
||||
srcFilesResult: make(chan error, 1),
|
||||
dstFilesResult: make(chan error, 1),
|
||||
dstEmptyDirs: make(map[string]fs.DirEntry),
|
||||
srcEmptyDirs: make(map[string]fs.DirEntry),
|
||||
noTraverse: fs.Config.NoTraverse,
|
||||
noCheckDest: fs.Config.NoCheckDest,
|
||||
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
|
||||
trackRenames: fs.Config.TrackRenames,
|
||||
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
|
||||
trackRenamesCh: make(chan fs.Object, fs.Config.Checkers),
|
||||
fdst: fdst,
|
||||
fsrc: fsrc,
|
||||
deleteMode: deleteMode,
|
||||
DoMove: DoMove,
|
||||
copyEmptySrcDirs: copyEmptySrcDirs,
|
||||
deleteEmptySrcDirs: deleteEmptySrcDirs,
|
||||
dir: "",
|
||||
srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers),
|
||||
srcFilesResult: make(chan error, 1),
|
||||
dstFilesResult: make(chan error, 1),
|
||||
dstEmptyDirs: make(map[string]fs.DirEntry),
|
||||
srcEmptyDirs: make(map[string]fs.DirEntry),
|
||||
noTraverse: fs.Config.NoTraverse,
|
||||
noCheckDest: fs.Config.NoCheckDest,
|
||||
noUnicodeNormalization: fs.Config.NoUnicodeNormalization,
|
||||
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
|
||||
trackRenames: fs.Config.TrackRenames,
|
||||
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
|
||||
trackRenamesCh: make(chan fs.Object, fs.Config.Checkers),
|
||||
}
|
||||
var err error
|
||||
s.toBeChecked, err = newPipe(fs.Config.OrderBy, accounting.Stats(ctx).SetCheckQueue, fs.Config.MaxBacklog)
|
||||
|
@ -782,14 +784,15 @@ func (s *syncCopyMove) run() error {
|
|||
|
||||
// set up a march over fdst and fsrc
|
||||
m := &march.March{
|
||||
Ctx: s.ctx,
|
||||
Fdst: s.fdst,
|
||||
Fsrc: s.fsrc,
|
||||
Dir: s.dir,
|
||||
NoTraverse: s.noTraverse,
|
||||
Callback: s,
|
||||
DstIncludeAll: filter.Active.Opt.DeleteExcluded,
|
||||
NoCheckDest: s.noCheckDest,
|
||||
Ctx: s.ctx,
|
||||
Fdst: s.fdst,
|
||||
Fsrc: s.fsrc,
|
||||
Dir: s.dir,
|
||||
NoTraverse: s.noTraverse,
|
||||
Callback: s,
|
||||
DstIncludeAll: filter.Active.Opt.DeleteExcluded,
|
||||
NoCheckDest: s.noCheckDest,
|
||||
NoUnicodeNormalization: s.noUnicodeNormalization,
|
||||
}
|
||||
s.processError(m.Run())
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user