encoder: add lib/encoder to handle character subsitution and quoting

This commit is contained in:
Fabian Möller 2018-11-02 13:12:31 +01:00 committed by Nick Craig-Wood
parent bca5d8009e
commit ef5e1909a0
4 changed files with 3531 additions and 0 deletions

633
lib/encoder/encoder.go Normal file
View File

@ -0,0 +1,633 @@
/*
Translate file names for usage on restrictive storage systems
The restricted set of characters are mapped to a unicode equivalent version
(most to their FULLWIDTH variant) to increase compatability with other
storage systems.
See: http://unicode-search.net/unicode-namesearch.pl?term=FULLWIDTH
Encoders will also quote reserved characters to differentiate between
the raw and encoded forms.
*/
package encoder
import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"unicode/utf8"
)
const (
// adding this to any printable ASCII character turns it into the
// FULLWIDTH variant
fullOffset = 0xFEE0
// the first rune of the SYMBOL FOR block for control characters
symbolOffset = '␀' // SYMBOL FOR NULL
// QuoteRune is the rune used for quoting reserved characters
QuoteRune = '' // SINGLE HIGH-REVERSED-9 QUOTATION MARK
// EncodeStandard contains the flags used for the Standard Encoder
EncodeStandard = EncodeZero | EncodeSlash | EncodeCtl | EncodeDel
// Standard defines the encoding that is used for paths in- and output by rclone.
//
// List of replaced characters:
// (0x00) -> '␀' // SYMBOL FOR NULL
// / (slash) -> '' // FULLWIDTH SOLIDUS
Standard = MultiEncoder(EncodeStandard)
)
// Possible flags for the MultiEncoder
const (
EncodeZero uint = 0 // NUL(0x00)
EncodeSlash uint = 1 << iota // /
EncodeWin // :?"*<>|
EncodeBackSlash // \
EncodeHashPercent // #%
EncodeDel // DEL(0x7F)
EncodeCtl // CTRL(0x01-0x1F)
EncodeLeftSpace // Leading SPACE
EncodeLeftTilde // Leading ~
EncodeRightSpace // Trailing SPACE
EncodeRightPeriod // Trailing .
EncodeInvalidUtf8 // Invalid UTF-8 bytes
)
// Encoder can transform names to and from the original and translated version.
type Encoder interface {
// Encode takes a raw name and substitutes any reserved characters and
// patterns in it
Encode(string) string
// Decode takes a name and undoes any substitutions made by Encode
Decode(string) string
// FromStandardPath takes a / separated path in Standard encoding
// and converts it to a / separated path in this encoding.
FromStandardPath(string) string
// FromStandardName takes name in Standard encoding and converts
// it in this encoding.
FromStandardName(string) string
// ToStandardPath takes a / separated path in this encoding
// and converts it to a / separated path in Standard encoding.
ToStandardPath(string) string
// ToStandardName takes name in this encoding and converts
// it in Standard encoding.
ToStandardName(string) string
}
// MultiEncoder is a configurable Encoder. The Encode* constants in this
// package can be combined using bitwise or (|) to enable handling of multiple
// character classes
type MultiEncoder uint
// Encode takes a raw name and substitutes any reserved characters and
// patterns in it
func (mask MultiEncoder) Encode(in string) string {
var (
encodeWin = uint(mask)&EncodeWin != 0
encodeSlash = uint(mask)&EncodeSlash != 0
encodeBackSlash = uint(mask)&EncodeBackSlash != 0
encodeHashPercent = uint(mask)&EncodeHashPercent != 0
encodeDel = uint(mask)&EncodeDel != 0
encodeCtl = uint(mask)&EncodeCtl != 0
encodeLeftSpace = uint(mask)&EncodeLeftSpace != 0
encodeLeftTilde = uint(mask)&EncodeLeftTilde != 0
encodeRightSpace = uint(mask)&EncodeRightSpace != 0
encodeRightPeriod = uint(mask)&EncodeRightPeriod != 0
encodeInvalidUnicode = uint(mask)&EncodeInvalidUtf8 != 0
)
// handle prefix only replacements
prefix := ""
if encodeLeftSpace && len(in) > 0 { // Leading SPACE
if in[0] == ' ' {
prefix, in = "␠", in[1:] // SYMBOL FOR SPACE
} else if r, l := utf8.DecodeRuneInString(in); r == '␠' { // SYMBOL FOR SPACE
prefix, in = string(QuoteRune)+"␠", in[l:] // SYMBOL FOR SPACE
}
}
if encodeLeftTilde && len(in) > 0 { // Leading ~
if in[0] == '~' {
prefix, in = string('~'+fullOffset), in[1:] // FULLWIDTH TILDE
} else if r, l := utf8.DecodeRuneInString(in); r == '~'+fullOffset {
prefix, in = string(QuoteRune)+string('~'+fullOffset), in[l:] // FULLWIDTH TILDE
}
}
// handle suffix only replacements
suffix := ""
if encodeRightSpace && len(in) > 0 { // Trailing SPACE
if in[len(in)-1] == ' ' {
suffix, in = "␠", in[:len(in)-1] // SYMBOL FOR SPACE
} else if r, l := utf8.DecodeLastRuneInString(in); r == '␠' {
suffix, in = string(QuoteRune)+"␠", in[:len(in)-l] // SYMBOL FOR SPACE
}
}
if encodeRightPeriod && len(in) > 0 { // Trailing .
if in[len(in)-1] == '.' {
suffix, in = "", in[:len(in)-1] // FULLWIDTH FULL STOP
} else if r, l := utf8.DecodeLastRuneInString(in); r == '' {
suffix, in = string(QuoteRune)+"", in[:len(in)-l] // FULLWIDTH FULL STOP
}
}
index := 0
if prefix == "" && suffix == "" {
// find the first rune which (most likely) needs to be replaced
index = strings.IndexFunc(in, func(r rune) bool {
switch r {
case 0, '␀', QuoteRune, utf8.RuneError:
return true
}
if encodeWin { // :?"*<>|
switch r {
case '*', '<', '>', '?', ':', '|', '"',
'', '', '', '', '', '', '':
return true
}
}
if encodeSlash { // /
switch r {
case '/',
'':
return true
}
}
if encodeBackSlash { // \
switch r {
case '\\',
'':
return true
}
}
if encodeHashPercent { // #%
switch r {
case '#', '%',
'', '':
return true
}
}
if encodeDel { // DEL(0x7F)
switch r {
case rune(0x7F), '␡':
return true
}
}
if encodeCtl { // CTRL(0x01-0x1F)
if r >= 1 && r <= 0x1F {
return true
} else if r > symbolOffset && r <= symbolOffset+0x1F {
return true
}
}
return false
})
}
// nothing to replace, return input
if index == -1 {
return in
}
var out bytes.Buffer
out.Grow(len(in) + len(prefix) + len(suffix))
out.WriteString(prefix)
// copy the clean part of the input and skip it
out.WriteString(in[:index])
in = in[index:]
for i, r := range in {
switch r {
case 0:
out.WriteRune(symbolOffset)
continue
case '␀', QuoteRune:
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
case utf8.RuneError:
if encodeInvalidUnicode {
// only encode invalid sequences and not utf8.RuneError
if i+3 > len(in) || in[i:i+3] != string(utf8.RuneError) {
_, l := utf8.DecodeRuneInString(in[i:])
appendQuotedBytes(&out, in[i:i+l])
continue
}
} else {
// append the real bytes instead of utf8.RuneError
_, l := utf8.DecodeRuneInString(in[i:])
out.WriteString(in[i : i+l])
continue
}
}
if encodeWin { // :?"*<>|
switch r {
case '*', '<', '>', '?', ':', '|', '"':
out.WriteRune(r + fullOffset)
continue
case '', '', '', '', '', '', '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if encodeSlash { // /
switch r {
case '/':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if encodeBackSlash { // \
switch r {
case '\\':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if encodeHashPercent { // #%
switch r {
case '#', '%':
out.WriteRune(r + fullOffset)
continue
case '', '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if encodeDel { // DEL(0x7F)
switch r {
case rune(0x7F):
out.WriteRune('␡') // SYMBOL FOR DELETE
continue
case '␡':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if encodeCtl { // CTRL(0x01-0x1F)
if r >= 1 && r <= 0x1F {
out.WriteRune('␀' + r) // SYMBOL FOR NULL
continue
} else if r > symbolOffset && r <= symbolOffset+0x1F {
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
out.WriteRune(r)
}
out.WriteString(suffix)
return out.String()
}
// Decode takes a name and undoes any substitutions made by Encode
func (mask MultiEncoder) Decode(in string) string {
var (
encodeWin = uint(mask)&EncodeWin != 0
encodeSlash = uint(mask)&EncodeSlash != 0
encodeBackSlash = uint(mask)&EncodeBackSlash != 0
encodeHashPercent = uint(mask)&EncodeHashPercent != 0
encodeDel = uint(mask)&EncodeDel != 0
encodeCtl = uint(mask)&EncodeCtl != 0
encodeLeftSpace = uint(mask)&EncodeLeftSpace != 0
encodeLeftTilde = uint(mask)&EncodeLeftTilde != 0
encodeRightSpace = uint(mask)&EncodeRightSpace != 0
encodeRightPeriod = uint(mask)&EncodeRightPeriod != 0
encodeInvalidUnicode = uint(mask)&EncodeInvalidUtf8 != 0
)
// handle prefix only replacements
prefix := ""
if r, l1 := utf8.DecodeRuneInString(in); encodeLeftSpace && r == '␠' { // SYMBOL FOR SPACE
prefix, in = " ", in[l1:]
} else if encodeLeftTilde && r == '' { // FULLWIDTH TILDE
prefix, in = "~", in[l1:]
} else if r == QuoteRune {
if r, l2 := utf8.DecodeRuneInString(in[l1:]); encodeLeftSpace && r == '␠' { // SYMBOL FOR SPACE
prefix, in = "␠", in[l1+l2:]
} else if encodeLeftTilde && r == '' { // FULLWIDTH TILDE
prefix, in = "", in[l1+l2:]
}
}
// handle suffix only replacements
suffix := ""
if r, l := utf8.DecodeLastRuneInString(in); encodeRightSpace && r == '␠' { // SYMBOL FOR SPACE
in = in[:len(in)-l]
if r, l2 := utf8.DecodeLastRuneInString(in); r == QuoteRune {
suffix, in = "␠", in[:len(in)-l2]
} else {
suffix = " "
}
} else if encodeRightPeriod && r == '' { // FULLWIDTH FULL STOP
in = in[:len(in)-l]
if r, l2 := utf8.DecodeLastRuneInString(in); r == QuoteRune {
suffix, in = "", in[:len(in)-l2]
} else {
suffix = "."
}
}
index := 0
if prefix == "" && suffix == "" {
// find the first rune which (most likely) needs to be replaced
index = strings.IndexFunc(in, func(r rune) bool {
switch r {
case '␀', QuoteRune:
return true
}
if encodeWin { // :?"*<>|
switch r {
case '', '', '', '', '', '', '':
return true
}
}
if encodeSlash { // /
switch r {
case '':
return true
}
}
if encodeBackSlash { // \
switch r {
case '':
return true
}
}
if encodeHashPercent { // #%
switch r {
case '', '':
return true
}
}
if encodeDel { // DEL(0x7F)
switch r {
case '␡':
return true
}
}
if encodeCtl { // CTRL(0x01-0x1F)
if r > symbolOffset && r <= symbolOffset+0x1F {
return true
}
}
return false
})
}
// nothing to replace, return input
if index == -1 {
return in
}
var out bytes.Buffer
out.Grow(len(in))
out.WriteString(prefix)
// copy the clean part of the input and skip it
out.WriteString(in[:index])
in = in[index:]
var unquote, unquoteNext, skipNext bool
for i, r := range in {
if skipNext {
skipNext = false
continue
}
unquote, unquoteNext = unquoteNext, false
switch r {
case '␀': // SYMBOL FOR NULL
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(0)
}
continue
case QuoteRune:
if unquote {
out.WriteRune(r)
} else {
unquoteNext = true
}
continue
}
if encodeWin { // :?"*<>|
switch r {
case '', '', '', '', '', '', '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if encodeSlash { // /
switch r {
case '': // FULLWIDTH SOLIDUS
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if encodeBackSlash { // \
switch r {
case '': // FULLWIDTH REVERSE SOLIDUS
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if encodeHashPercent { // #%
switch r {
case '', '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if encodeDel { // DEL(0x7F)
switch r {
case '␡': // SYMBOL FOR DELETE
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(0x7F)
}
continue
}
}
if encodeCtl { // CTRL(0x01-0x1F)
if r > symbolOffset && r <= symbolOffset+0x1F {
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - symbolOffset)
}
continue
}
}
if unquote {
if encodeInvalidUnicode {
skipNext = appendUnquotedByte(&out, in[i:])
if skipNext {
continue
}
}
out.WriteRune(QuoteRune)
}
switch r {
case utf8.RuneError:
// append the real bytes instead of utf8.RuneError
_, l := utf8.DecodeRuneInString(in[i:])
out.WriteString(in[i : i+l])
continue
}
out.WriteRune(r)
}
if unquoteNext {
out.WriteRune(QuoteRune)
}
out.WriteString(suffix)
return out.String()
}
// FromStandardPath takes a / separated path in Standard encoding
// and converts it to a / separated path in this encoding.
func (mask MultiEncoder) FromStandardPath(s string) string {
return FromStandardPath(mask, s)
}
// FromStandardName takes name in Standard encoding and converts
// it in this encoding.
func (mask MultiEncoder) FromStandardName(s string) string {
return FromStandardName(mask, s)
}
// ToStandardPath takes a / separated path in this encoding
// and converts it to a / separated path in Standard encoding.
func (mask MultiEncoder) ToStandardPath(s string) string {
return ToStandardPath(mask, s)
}
// ToStandardName takes name in this encoding and converts
// it in Standard encoding.
func (mask MultiEncoder) ToStandardName(s string) string {
return ToStandardName(mask, s)
}
func appendQuotedBytes(w io.Writer, s string) {
for _, b := range []byte(s) {
_, _ = fmt.Fprintf(w, string(QuoteRune)+"%02X", b)
}
}
func appendUnquotedByte(w io.Writer, s string) bool {
if len(s) < 2 {
return false
}
u, err := strconv.ParseUint(s[:2], 16, 8)
if err != nil {
return false
}
n, _ := w.Write([]byte{byte(u)})
return n == 1
}
type identity struct{}
func (identity) Encode(in string) string { return in }
func (identity) Decode(in string) string { return in }
func (i identity) FromStandardPath(s string) string {
return FromStandardPath(i, s)
}
func (i identity) FromStandardName(s string) string {
return FromStandardName(i, s)
}
func (i identity) ToStandardPath(s string) string {
return ToStandardPath(i, s)
}
func (i identity) ToStandardName(s string) string {
return ToStandardName(i, s)
}
// Identity returns a Encoder that always returns the input value
func Identity() Encoder {
return identity{}
}
// FromStandardPath takes a / separated path in Standard encoding
// and converts it to a / separated path in the given encoding.
func FromStandardPath(e Encoder, s string) string {
if e == Standard {
return s
}
parts := strings.Split(s, "/")
encoded := make([]string, len(parts))
changed := false
for i, p := range parts {
enc := FromStandardName(e, p)
changed = changed || enc != p
encoded[i] = enc
}
if !changed {
return s
}
return strings.Join(encoded, "/")
}
// FromStandardName takes name in Standard encoding and converts
// it in the given encoding.
func FromStandardName(e Encoder, s string) string {
if e == Standard {
return s
}
return e.Encode(Standard.Decode(s))
}
// ToStandardPath takes a / separated path in the given encoding
// and converts it to a / separated path in Standard encoding.
func ToStandardPath(e Encoder, s string) string {
if e == Standard {
return s
}
parts := strings.Split(s, "/")
encoded := make([]string, len(parts))
changed := false
for i, p := range parts {
dec := ToStandardName(e, p)
changed = changed || dec != p
encoded[i] = dec
}
if !changed {
return s
}
return strings.Join(encoded, "/")
}
// ToStandardName takes name in the given encoding and converts
// it in Standard encoding.
func ToStandardName(e Encoder, s string) string {
if e == Standard {
return s
}
return Standard.Encode(e.Decode(s))
}

File diff suppressed because it is too large Load Diff

262
lib/encoder/encoder_test.go Normal file
View File

@ -0,0 +1,262 @@
package encoder
import (
"regexp"
"strconv"
"strings"
"testing"
)
type testCase struct {
mask uint
in string
out string
}
func TestEncodeSingleMask(t *testing.T) {
for i, tc := range testCasesSingle {
e := MultiEncoder(tc.mask)
t.Run(strconv.FormatInt(int64(i), 10), func(t *testing.T) {
got := e.Encode(tc.in)
if got != tc.out {
t.Errorf("Encode(%q) want %q got %q", tc.in, tc.out, got)
}
got2 := e.Decode(got)
if got2 != tc.in {
t.Errorf("Decode(%q) want %q got %q", got, tc.in, got2)
}
})
}
}
func TestEncodeSingleMaskEdge(t *testing.T) {
for i, tc := range testCasesSingleEdge {
e := MultiEncoder(tc.mask)
t.Run(strconv.FormatInt(int64(i), 10), func(t *testing.T) {
got := e.Encode(tc.in)
if got != tc.out {
t.Errorf("Encode(%q) want %q got %q", tc.in, tc.out, got)
}
got2 := e.Decode(got)
if got2 != tc.in {
t.Errorf("Decode(%q) want %q got %q", got, tc.in, got2)
}
})
}
}
func TestEncodeInvalidUnicode(t *testing.T) {
for i, tc := range []testCase{
{
mask: EncodeInvalidUtf8,
in: "\xBF",
out: "BF",
}, {
mask: EncodeInvalidUtf8,
in: "\xBF\xFE",
out: "BFFE",
}, {
mask: EncodeInvalidUtf8,
in: "a\xBF\xFEb",
out: "aBFFEb",
}, {
mask: EncodeInvalidUtf8,
in: "a\xBFξ\xFEb",
out: "aBFξFEb",
}, {
mask: EncodeInvalidUtf8 | EncodeBackSlash,
in: "a\xBF\\\xFEb",
out: "aBFFEb",
}, {
mask: 0,
in: "\xBF",
out: "\xBF",
}, {
mask: 0,
in: "\xBF\xFE",
out: "\xBF\xFE",
}, {
mask: 0,
in: "a\xBF\xFEb",
out: "a\xBF\xFEb",
}, {
mask: 0,
in: "a\xBFξ\xFEb",
out: "a\xBFξ\xFEb",
}, {
mask: EncodeBackSlash,
in: "a\xBF\\\xFEb",
out: "a\xBF\xFEb",
},
} {
e := MultiEncoder(tc.mask)
t.Run(strconv.FormatInt(int64(i), 10), func(t *testing.T) {
got := e.Encode(tc.in)
if got != tc.out {
t.Errorf("Encode(%q) want %q got %q", tc.in, tc.out, got)
}
got2 := e.Decode(got)
if got2 != tc.in {
t.Errorf("Decode(%q) want %q got %q", got, tc.in, got2)
}
})
}
}
func TestDecodeHalf(t *testing.T) {
for i, tc := range []testCase{
{
mask: 0,
in: "",
out: "",
}, {
mask: 0,
in: "",
out: "",
}, {
mask: 0,
in: "a",
out: "a",
}, {
mask: EncodeInvalidUtf8,
in: "aBEg",
out: "aBEg",
}, {
mask: EncodeInvalidUtf8,
in: "aBEg",
out: "aBEg",
}, {
mask: EncodeInvalidUtf8 | EncodeBackSlash,
in: "aBEg",
out: "aB\\Eg",
},
} {
e := MultiEncoder(tc.mask)
t.Run(strconv.FormatInt(int64(i), 10), func(t *testing.T) {
got := e.Decode(tc.in)
if got != tc.out {
t.Errorf("Decode(%q) want %q got %q", tc.in, tc.out, got)
}
})
}
}
const oneDrive = MultiEncoder(
EncodeStandard |
EncodeWin |
EncodeBackSlash |
EncodeHashPercent |
EncodeDel |
EncodeCtl |
EncodeLeftTilde |
EncodeRightSpace |
EncodeRightPeriod)
var benchTests = []struct {
in string
out string
}{
{"", ""},
{"abc 123", "abc 123"},
{`\*<>?:|#%".~`, `.~`},
{`\*<>?:|#%".~/\*<>?:|#%".~`, `.~/.~`},
{" leading space", " leading space"},
{"~leading tilde", "leading tilde"},
{"trailing dot.", "trailing dot"},
{" leading space/ leading space/ leading space", " leading space/ leading space/ leading space"},
{"~leading tilde/~leading tilde/~leading tilde", "leading tilde/leading tilde/leading tilde"},
{"leading tilde/~leading tilde", "leading tilde/leading tilde"},
{"trailing dot./trailing dot./trailing dot.", "trailing dot/trailing dot/trailing dot"},
}
func benchReplace(b *testing.B, f func(string) string) {
for range make([]struct{}, b.N) {
for _, test := range benchTests {
got := f(test.in)
if got != test.out {
b.Errorf("Encode(%q) want %q got %q", test.in, test.out, got)
}
}
}
}
func benchRestore(b *testing.B, f func(string) string) {
for range make([]struct{}, b.N) {
for _, test := range benchTests {
got := f(test.out)
if got != test.in {
b.Errorf("Decode(%q) want %q got %q", got, test.in, got)
}
}
}
}
func BenchmarkOneDriveReplaceNew(b *testing.B) {
benchReplace(b, oneDrive.Encode)
}
func BenchmarkOneDriveReplaceOld(b *testing.B) {
benchReplace(b, replaceReservedChars)
}
func BenchmarkOneDriveRestoreNew(b *testing.B) {
benchRestore(b, oneDrive.Decode)
}
func BenchmarkOneDriveRestoreOld(b *testing.B) {
benchRestore(b, restoreReservedChars)
}
var (
charMap = map[rune]rune{
'\\': '', // FULLWIDTH REVERSE SOLIDUS
'*': '', // FULLWIDTH ASTERISK
'<': '', // FULLWIDTH LESS-THAN SIGN
'>': '', // FULLWIDTH GREATER-THAN SIGN
'?': '', // FULLWIDTH QUESTION MARK
':': '', // FULLWIDTH COLON
'|': '', // FULLWIDTH VERTICAL LINE
'#': '', // FULLWIDTH NUMBER SIGN
'%': '', // FULLWIDTH PERCENT SIGN
'"': '', // FULLWIDTH QUOTATION MARK - not on the list but seems to be reserved
'.': '', // FULLWIDTH FULL STOP
'~': '', // FULLWIDTH TILDE
' ': '␠', // SYMBOL FOR SPACE
}
invCharMap map[rune]rune
fixEndingInPeriod = regexp.MustCompile(`\.(/|$)`)
fixEndingWithSpace = regexp.MustCompile(` (/|$)`)
fixStartingWithTilde = regexp.MustCompile(`(/|^)~`)
)
func init() {
// Create inverse charMap
invCharMap = make(map[rune]rune, len(charMap))
for k, v := range charMap {
invCharMap[v] = k
}
}
// replaceReservedChars takes a path and substitutes any reserved
// characters in it
func replaceReservedChars(in string) string {
// Folder names can't end with a period '.'
in = fixEndingInPeriod.ReplaceAllString(in, string(charMap['.'])+"$1")
// OneDrive for Business file or folder names cannot begin with a tilde '~'
in = fixStartingWithTilde.ReplaceAllString(in, "$1"+string(charMap['~']))
// Apparently file names can't start with space either
in = fixEndingWithSpace.ReplaceAllString(in, string(charMap[' '])+"$1")
// Encode reserved characters
return strings.Map(func(c rune) rune {
if replacement, ok := charMap[c]; ok && c != '.' && c != '~' && c != ' ' {
return replacement
}
return c
}, in)
}
// restoreReservedChars takes a path and undoes any substitutions
// made by replaceReservedChars
func restoreReservedChars(in string) string {
return strings.Map(func(c rune) rune {
if replacement, ok := invCharMap[c]; ok {
return replacement
}
return c
}, in)
}

View File

@ -0,0 +1,419 @@
// +build go1.10
package main
import (
"fmt"
"log"
"math/rand"
"os"
"strconv"
"strings"
"github.com/ncw/rclone/lib/encoder"
)
const (
edgeLeft = iota
edgeRight
)
type mapping struct {
mask uint
src, dst []rune
}
type stringPair struct {
a, b string
}
const header = `// Code generated by ./internal/gen/main.go. DO NOT EDIT.
` + `//go:generate go run ./internal/gen/main.go
package encoder
`
var maskBits = []struct {
mask uint
name string
}{
{encoder.EncodeZero, "EncodeZero"},
{encoder.EncodeWin, "EncodeWin"},
{encoder.EncodeSlash, "EncodeSlash"},
{encoder.EncodeBackSlash, "EncodeBackSlash"},
{encoder.EncodeHashPercent, "EncodeHashPercent"},
{encoder.EncodeDel, "EncodeDel"},
{encoder.EncodeCtl, "EncodeCtl"},
{encoder.EncodeLeftSpace, "EncodeLeftSpace"},
{encoder.EncodeLeftTilde, "EncodeLeftTilde"},
{encoder.EncodeRightSpace, "EncodeRightSpace"},
{encoder.EncodeRightPeriod, "EncodeRightPeriod"},
{encoder.EncodeInvalidUtf8, "EncodeInvalidUtf8"},
}
var edges = []struct {
mask uint
name string
edge int
orig rune
replace rune
}{
{encoder.EncodeLeftSpace, "EncodeLeftSpace", edgeLeft, ' ', '␠'},
{encoder.EncodeLeftTilde, "EncodeLeftTilde", edgeLeft, '~', ''},
{encoder.EncodeRightSpace, "EncodeRightSpace", edgeRight, ' ', '␠'},
{encoder.EncodeRightPeriod, "EncodeRightPeriod", edgeRight, '.', ''},
}
var allMappings = []mapping{{
encoder.EncodeZero, []rune{
0,
}, []rune{
'␀',
}}, {
encoder.EncodeWin, []rune{
':', '?', '"', '*', '<', '>', '|',
}, []rune{
'', '', '', '', '', '', '',
}}, {
encoder.EncodeSlash, []rune{
'/',
}, []rune{
'',
}}, {
encoder.EncodeBackSlash, []rune{
'\\',
}, []rune{
'',
}}, {
encoder.EncodeHashPercent, []rune{
'#', '%',
}, []rune{
'', '',
}}, {
encoder.EncodeDel, []rune{
0x7F,
}, []rune{
'␡',
}}, {
encoder.EncodeCtl,
runeRange(0x01, 0x1F),
runeRange('␁', '␟'),
}}
var (
rng = rand.New(rand.NewSource(42))
printables = runeRange(0x20, 0x7E)
fullwidthPrintables = runeRange(0xFF00, 0xFF5E)
encodables = collectEncodables(allMappings)
encoded = collectEncoded(allMappings)
greek = runeRange(0x03B1, 0x03C9)
)
func main() {
fd, err := os.Create("encoder_cases_test.go")
fatal(err, "Unable to open encoder_cases_test.go:")
defer func() {
fatal(fd.Close(), "Failed to close encoder_cases_test.go:")
}()
fatalW(fd.WriteString(header))("Failed to write header:")
fatalW(fd.WriteString("var testCasesSingle = []testCase{\n\t"))("Write:")
_i := 0
i := func() (r int) {
r, _i = _i, _i+1
return
}
for _, m := range maskBits {
if len(getMapping(m.mask).src) == 0 {
continue
}
if _i != 0 {
fatalW(fd.WriteString(" "))("Write:")
}
in, out := buildTestString(
[]mapping{getMapping(m.mask)}, // pick
[]mapping{getMapping(0)}, // quote
printables, fullwidthPrintables, encodables, encoded, greek) // fill
fatalW(fmt.Fprintf(fd, `{ // %d
mask: %s,
in: %s,
out: %s,
},`, i(), m.name, strconv.Quote(in), strconv.Quote(out)))("Error writing test case:")
}
fatalW(fd.WriteString(`
}
var testCasesSingleEdge = []testCase{
`))("Write:")
_i = 0
for _, e := range edges {
if _i != 0 {
fatalW(fd.WriteString(" "))("Write:")
}
fatalW(fmt.Fprintf(fd, `{ // %d
mask: %s,
in: %s,
out: %s,
},`, i(), e.name, strconv.Quote(string(e.orig)), strconv.Quote(string(e.replace))))("Error writing test case:")
for _, m := range maskBits {
if len(getMapping(m.mask).src) == 0 {
continue
}
pairs := buildEdgeTestString(
e.edge, e.orig, e.replace,
[]mapping{getMapping(0), getMapping(m.mask)}, // quote
printables, fullwidthPrintables, encodables, encoded, greek) // fill
for _, p := range pairs {
fatalW(fmt.Fprintf(fd, ` { // %d
mask: %s | %s,
in: %s,
out: %s,
},`, i(), m.name, e.name, strconv.Quote(p.a), strconv.Quote(p.b)))("Error writing test case:")
}
}
}
fatalW(fmt.Fprintf(fd, ` { // %d
mask: EncodeLeftSpace,
in: " ",
out: "␠ ",
}, { // %d
mask: EncodeLeftTilde,
in: "~~",
out: "~",
}, { // %d
mask: EncodeRightSpace,
in: " ",
out: " ␠",
}, { // %d
mask: EncodeRightPeriod,
in: "..",
out: ".",
}, { // %d
mask: EncodeLeftSpace | EncodeRightPeriod,
in: " .",
out: "␠.",
}, { // %d
mask: EncodeLeftSpace | EncodeRightSpace,
in: " ",
out: "␠",
}, { // %d
mask: EncodeLeftSpace | EncodeRightSpace,
in: " ",
out: "␠␠",
}, { // %d
mask: EncodeLeftSpace | EncodeRightSpace,
in: " ",
out: "␠ ␠",
},
}
`, i(), i(), i(), i(), i(), i(), i(), i()))("Error writing test case:")
}
func fatal(err error, s ...interface{}) {
if err != nil {
log.Fatalln(append(s, err))
}
}
func fatalW(_ int, err error) func(...interface{}) {
if err != nil {
return func(s ...interface{}) {
log.Fatalln(append(s, err))
}
}
return func(s ...interface{}) {}
}
// construct a slice containing the runes between (l)ow (inclusive) and (h)igh (inclusive)
func runeRange(l, h rune) []rune {
if h < l {
panic("invalid range")
}
out := make([]rune, h-l+1)
for i := range out {
out[i] = l + rune(i)
}
return out
}
func getMapping(mask uint) mapping {
for _, m := range allMappings {
if m.mask == mask {
return m
}
}
return mapping{}
}
func collectEncodables(m []mapping) (out []rune) {
for _, s := range m {
for _, r := range s.src {
out = append(out, r)
}
}
return
}
func collectEncoded(m []mapping) (out []rune) {
for _, s := range m {
for _, r := range s.dst {
out = append(out, r)
}
}
return
}
func buildTestString(mappings, testMappings []mapping, fill ...[]rune) (string, string) {
combinedMappings := append(mappings, testMappings...)
var (
rIn []rune
rOut []rune
)
for _, m := range mappings {
if len(m.src) == 0 || len(m.src) != len(m.dst) {
panic("invalid length")
}
rIn = append(rIn, m.src...)
rOut = append(rOut, m.dst...)
}
inL := len(rIn)
testL := inL * 3
if testL < 30 {
testL = 30
}
rIn = append(rIn, make([]rune, testL-inL)...)
rOut = append(rOut, make([]rune, testL-inL)...)
quoteOut := make([]bool, testL)
set := func(i int, in, out rune, quote bool) {
rIn[i] = in
rOut[i] = out
quoteOut[i] = quote
}
for i, r := range rOut[:inL] {
set(inL+i, r, r, true)
}
outer:
for pos := inL * 2; pos < testL; pos++ {
m := pos % len(fill)
i := rng.Intn(len(fill[m]))
r := fill[m][i]
for _, m := range combinedMappings {
if pSrc := runePos(r, m.src); pSrc != -1 {
set(pos, r, m.dst[pSrc], false)
continue outer
} else if pDst := runePos(r, m.dst); pDst != -1 {
set(pos, r, r, true)
continue outer
}
}
set(pos, r, r, false)
}
rng.Shuffle(testL, func(i, j int) {
rIn[i], rIn[j] = rIn[j], rIn[i]
rOut[i], rOut[j] = rOut[j], rOut[i]
quoteOut[i], quoteOut[j] = quoteOut[j], quoteOut[i]
})
var bOut strings.Builder
bOut.Grow(testL)
for i, r := range rOut {
if quoteOut[i] {
bOut.WriteRune(encoder.QuoteRune)
}
bOut.WriteRune(r)
}
return string(rIn), bOut.String()
}
func buildEdgeTestString(edge int, orig, replace rune, testMappings []mapping, fill ...[]rune) (out []stringPair) {
testL := 30
rIn := make([]rune, testL)
rOut := make([]rune, testL)
quoteOut := make([]bool, testL)
set := func(i int, in, out rune, quote bool) {
rIn[i] = in
rOut[i] = out
quoteOut[i] = quote
}
outer:
for pos := 0; pos < testL; pos++ {
m := pos % len(fill)
i := rng.Intn(len(fill[m]))
r := fill[m][i]
for _, m := range testMappings {
if pSrc := runePos(r, m.src); pSrc != -1 {
set(pos, r, m.dst[pSrc], false)
continue outer
} else if pDst := runePos(r, m.dst); pDst != -1 {
set(pos, r, r, true)
continue outer
}
}
set(pos, r, r, false)
}
rng.Shuffle(testL, func(i, j int) {
rIn[i], rIn[j] = rIn[j], rIn[i]
rOut[i], rOut[j] = rOut[j], rOut[i]
quoteOut[i], quoteOut[j] = quoteOut[j], quoteOut[i]
})
set(10, orig, orig, false)
out = append(out, stringPair{string(rIn), quotedToString(rOut, quoteOut)})
for _, i := range []int{0, 1, testL - 2, testL - 1} {
for _, j := range []int{1, testL - 2, testL - 1} {
if j < i {
continue
}
rIn := append([]rune{}, rIn...)
rOut := append([]rune{}, rOut...)
quoteOut := append([]bool{}, quoteOut...)
for _, in := range []rune{orig, replace} {
expect, quote := in, false
if i == 0 && edge == edgeLeft ||
i == testL-1 && edge == edgeRight {
expect, quote = replace, in == replace
}
rIn[i], rOut[i], quoteOut[i] = in, expect, quote
if i != j {
for _, in := range []rune{orig, replace} {
expect, quote = in, false
if j == testL-1 && edge == edgeRight {
expect, quote = replace, in == replace
}
rIn[j], rOut[j], quoteOut[j] = in, expect, quote
}
}
out = append(out, stringPair{string(rIn), quotedToString(rOut, quoteOut)})
}
}
}
return
}
func runePos(r rune, s []rune) int {
for i, c := range s {
if c == r {
return i
}
}
return -1
}
// quotedToString returns a string for the chars slice where a encoder.QuoteRune is
// inserted before a char[i] when quoted[i] is true.
func quotedToString(chars []rune, quoted []bool) string {
var out strings.Builder
out.Grow(len(chars))
for i, r := range chars {
if quoted[i] {
out.WriteRune(encoder.QuoteRune)
}
out.WriteRune(r)
}
return out.String()
}