diff --git a/cmd/cmount/mount.go b/cmd/cmount/mount.go index f6d86bdb5..d6d684d04 100644 --- a/cmd/cmount/mount.go +++ b/cmd/cmount/mount.go @@ -116,13 +116,6 @@ func mountOptions(VFS *vfs.VFS, device string, mountpoint string, opt *mountlib. for _, option := range opt.ExtraFlags { options = append(options, option) } - if runtime.GOOS == "darwin" { - if !findOption("modules=iconv", options) { - iconv := "modules=iconv,from_code=UTF-8,to_code=UTF-8-MAC" - options = append(options, "-o", iconv) - fs.Debugf(nil, "Adding \"-o %s\" for macOS", iconv) - } - } return options } diff --git a/cmd/mountlib/mount.md b/cmd/mountlib/mount.md index 6b3ccc620..0e38323b3 100644 --- a/cmd/mountlib/mount.md +++ b/cmd/mountlib/mount.md @@ -257,7 +257,12 @@ Mounting on macOS can be done either via [built-in NFS server](/commands/rclone_ FUSE driver utilizing a macOS kernel extension (kext). FUSE-T is an alternative FUSE system which "mounts" via an NFSv4 local server. -## NFS mount +##### Unicode Normalization + +It is highly recommended to keep the default of `--no-unicode-normalization=false` +for all `mount` and `serve` commands on macOS. For details, see [vfs-case-sensitivity](https://rclone.org/commands/rclone_mount/#vfs-case-sensitivity). + +#### NFS mount This method spins up an NFS server using [serve nfs](/commands/rclone_serve_nfs/) command and mounts it to the specified mountpoint. If you run this in background mode using |--daemon|, you will need to @@ -294,15 +299,6 @@ As per the [FUSE-T wiki](https://github.com/macos-fuse-t/fuse-t/wiki#caveats): This means that viewing files with various tools, notably macOS Finder, will cause rlcone to update the modification time of the file. This may make rclone upload a full new copy of the file. - -##### Unicode Normalization - -Rclone includes flags for unicode normalization with macFUSE that should be updated -for FUSE-T. See [this forum post](https://forum.rclone.org/t/some-unicode-forms-break-mount-on-macos-with-fuse-t/36403) -and [FUSE-T issue #16](https://github.com/macos-fuse-t/fuse-t/issues/16). The following -flag should be added to the `rclone mount` command. - - -o modules=iconv,from_code=UTF-8,to_code=UTF-8 ##### Read Only mounts diff --git a/fs/operations/check.go b/fs/operations/check.go index 2279b3fe6..b9fe36aab 100644 --- a/fs/operations/check.go +++ b/fs/operations/check.go @@ -380,10 +380,19 @@ func CheckDownload(ctx context.Context, opt *CheckOpt) error { // so that it matches behavior of Check (where it's handled by March) func ApplyTransforms(ctx context.Context, s string) string { ci := fs.GetConfig(ctx) - if !ci.NoUnicodeNormalization { + return ToNormal(s, !ci.NoUnicodeNormalization, ci.IgnoreCaseSync) +} + +// ToNormal normalizes case and unicode form and returns the transformed string. +// It is similar to ApplyTransforms but does not use a context. +// If normUnicode == true, s will be transformed to NFC. +// If normCase == true, s will be transformed to lowercase. +// If both are true, both transformations will be performed. +func ToNormal(s string, normUnicode, normCase bool) string { + if normUnicode { s = norm.NFC.String(s) } - if ci.IgnoreCaseSync { + if normCase { s = strings.ToLower(s) } return s diff --git a/vfs/dir.go b/vfs/dir.go index 5044ee855..47ffd8aaa 100644 --- a/vfs/dir.go +++ b/vfs/dir.go @@ -18,6 +18,7 @@ import ( "github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fs/walk" "github.com/rclone/rclone/vfs/vfscommon" + "golang.org/x/text/unicode/norm" ) // Dir represents a directory entry @@ -466,7 +467,6 @@ func (d *Dir) AddVirtual(leaf string, size int64, isDir bool) { node = f } d.addObject(node) - } // delObject removes an object from the directory @@ -514,6 +514,35 @@ func (d *Dir) _readDir() error { return err } + if d.vfs.Opt.BlockNormDupes { // do this only if requested, as it will have a performance hit + ci := fs.GetConfig(context.TODO()) + + // sort entries such that NFD comes before NFC of same name + sort.Slice(entries, func(i, j int) bool { + if entries[i] != entries[j] && fs.DirEntryType(entries[i]) == fs.DirEntryType(entries[j]) && norm.NFC.String(entries[i].Remote()) == norm.NFC.String(entries[j].Remote()) { + if norm.NFD.IsNormalString(entries[i].Remote()) && !norm.NFD.IsNormalString(entries[j].Remote()) { + return true + } + } + return entries.Less(i, j) + }) + + // detect dupes, remove them from the list and log an error + normalizedNames := make(map[string]struct{}, entries.Len()) + filteredEntries := make(fs.DirEntries, 0) + for _, e := range entries { + normName := fmt.Sprintf("%s-%T", operations.ToNormal(e.Remote(), !ci.NoUnicodeNormalization, (ci.IgnoreCaseSync || d.vfs.Opt.CaseInsensitive)), e) // include type to track objects and dirs separately + _, found := normalizedNames[normName] + if found { + fs.Errorf(e.Remote(), "duplicate normalized names detected - skipping") + continue + } + normalizedNames[normName] = struct{}{} + filteredEntries = append(filteredEntries, e) + } + entries = filteredEntries + } + err = d._readDirFromEntries(entries, nil, time.Time{}) if err != nil { return err @@ -767,15 +796,18 @@ func (d *Dir) stat(leaf string) (Node, error) { } item, ok := d.items[leaf] - if !ok && d.vfs.Opt.CaseInsensitive { - leafLower := strings.ToLower(leaf) + ci := fs.GetConfig(context.TODO()) + normUnicode := !ci.NoUnicodeNormalization + normCase := ci.IgnoreCaseSync || d.vfs.Opt.CaseInsensitive + if !ok && (normUnicode || normCase) { + leafNormalized := operations.ToNormal(leaf, normUnicode, normCase) // this handles both case and unicode normalization for name, node := range d.items { - if strings.ToLower(name) == leafLower { + if operations.ToNormal(name, normUnicode, normCase) == leafNormalized { if ok { - // duplicate case insensitive match is an error - return nil, fmt.Errorf("duplicate filename %q detected with --vfs-case-insensitive set", leaf) + // duplicate normalized match is an error + return nil, fmt.Errorf("duplicate filename %q detected with case/unicode normalization settings", leaf) } - // found a case insensitive match + // found a normalized match ok = true item = node } diff --git a/vfs/vfs.md b/vfs/vfs.md index dba7afa0a..8d5350764 100644 --- a/vfs/vfs.md +++ b/vfs/vfs.md @@ -309,6 +309,28 @@ If the flag is not provided on the command line, then its default value depends on the operating system where rclone runs: "true" on Windows and macOS, "false" otherwise. If the flag is provided without a value, then it is "true". +The `--no-unicode-normalization` flag controls whether a similar "fixup" is +performed for filenames that differ but are [canonically +equivalent](https://en.wikipedia.org/wiki/Unicode_equivalence) with respect to +unicode. Unicode normalization can be particularly helpful for users of macOS, +which prefers form NFD instead of the NFC used by most other platforms. It is +therefore highly recommended to keep the default of `false` on macOS, to avoid +encoding compatibility issues. + +In the (probably unlikely) event that a directory has multiple duplicate +filenames after applying case and unicode normalization, the `--vfs-block-norm-dupes` +flag allows hiding these duplicates. This comes with a performance tradeoff, as +rclone will have to scan the entire directory for duplicates when listing a +directory. For this reason, it is recommended to leave this disabled if not +needed. However, macOS users may wish to consider using it, as otherwise, if a +remote directory contains both NFC and NFD versions of the same filename, an odd +situation will occur: both versions of the file will be visible in the mount, +and both will appear to be editable, however, editing either version will +actually result in only the NFD version getting edited under the hood. `--vfs-block- +norm-dupes` prevents this confusion by detecting this scenario, hiding the +duplicates, and logging an error, similar to how this is handled in `rclone +sync`. + ### VFS Disk Options This flag allows you to manually set the statistics about the filing system. diff --git a/vfs/vfs_case_test.go b/vfs/vfs_case_test.go index 5ba81a169..7641cf89f 100644 --- a/vfs/vfs_case_test.go +++ b/vfs/vfs_case_test.go @@ -5,10 +5,12 @@ import ( "os" "testing" + "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fstest" "github.com/rclone/rclone/vfs/vfscommon" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/text/unicode/norm" ) func TestCaseSensitivity(t *testing.T) { @@ -160,3 +162,36 @@ func assertFileAbsentVFS(t *testing.T, vfs *VFS, name string) { assert.Error(t, err) assert.Equal(t, err, ENOENT) } + +func TestUnicodeNormalization(t *testing.T) { + r := fstest.NewRun(t) + + var ( + nfc = norm.NFC.String(norm.NFD.String("測試_Русский___ě_áñ")) + nfd = norm.NFD.String(nfc) + both = "normal name with no special characters.txt" + ) + + // Create test files + ctx := context.Background() + file1 := r.WriteObject(ctx, both, "data1", t1) + file2 := r.WriteObject(ctx, nfc, "data2", t2) + r.CheckRemoteItems(t, file1, file2) + + // Create VFS + opt := vfscommon.DefaultOpt + vfs := New(r.Fremote, &opt) + defer cleanupVFS(t, vfs) + + // assert that both files are found under NFD-normalized names + assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1") + assertFileDataVFS(t, vfs, nfd, "data2") + + // change ci.NoUnicodeNormalization to true and verify that only file1 is found + ci := fs.GetConfig(ctx) // need to set the global config here as the *Dir methods don't take a ctx param + oldVal := ci.NoUnicodeNormalization + defer func() { fs.GetConfig(ctx).NoUnicodeNormalization = oldVal }() // restore the prior value after the test + ci.NoUnicodeNormalization = true + assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1") + assertFileAbsentVFS(t, vfs, nfd) +} diff --git a/vfs/vfscommon/options.go b/vfs/vfscommon/options.go index bb5f3313a..f1e436847 100644 --- a/vfs/vfscommon/options.go +++ b/vfs/vfscommon/options.go @@ -30,6 +30,7 @@ type Options struct { CacheMinFreeSpace fs.SizeSuffix CachePollInterval time.Duration CaseInsensitive bool + BlockNormDupes bool WriteWait time.Duration // time to wait for in-sequence write ReadWait time.Duration // time to wait for in-sequence read WriteBack time.Duration // time to wait before writing back dirty files diff --git a/vfs/vfsflags/vfsflags.go b/vfs/vfsflags/vfsflags.go index 83b593d29..1dedd8918 100644 --- a/vfs/vfsflags/vfsflags.go +++ b/vfs/vfsflags/vfsflags.go @@ -35,6 +35,7 @@ func AddFlags(flagSet *pflag.FlagSet) { flags.FVarP(flagSet, DirPerms, "dir-perms", "", "Directory permissions", "VFS") flags.FVarP(flagSet, FilePerms, "file-perms", "", "File permissions", "VFS") flags.BoolVarP(flagSet, &Opt.CaseInsensitive, "vfs-case-insensitive", "", Opt.CaseInsensitive, "If a file name not found, find a case insensitive match", "VFS") + flags.BoolVarP(flagSet, &Opt.BlockNormDupes, "vfs-block-norm-dupes", "", Opt.BlockNormDupes, "If duplicate filenames exist in the same directory (after normalization), log an error and hide the duplicates (may have a performance cost)", "VFS") flags.DurationVarP(flagSet, &Opt.WriteWait, "vfs-write-wait", "", Opt.WriteWait, "Time to wait for in-sequence write before giving error", "VFS") flags.DurationVarP(flagSet, &Opt.ReadWait, "vfs-read-wait", "", Opt.ReadWait, "Time to wait for in-sequence read before seeking", "VFS") flags.DurationVarP(flagSet, &Opt.WriteBack, "vfs-write-back", "", Opt.WriteBack, "Time to writeback files after last use when using cache", "VFS")