rclone/lib/encoder/encoder.go

1232 lines
28 KiB
Go
Raw Normal View History

// Package encoder provides functionality to translate file names
// for usage on restrictive storage systems.
//
// The restricted set of characters are mapped to a unicode equivalent version
// (most to their FULLWIDTH variant) to increase compatibility with other
// storage systems.
// See: http://unicode-search.net/unicode-namesearch.pl?term=FULLWIDTH
//
// Encoders will also quote reserved characters to differentiate between
// the raw and encoded forms.
package encoder
import (
"bytes"
"fmt"
"io"
"sort"
"strconv"
"strings"
"unicode/utf8"
)
const (
// adding this to any printable ASCII character turns it into the
// FULLWIDTH variant
fullOffset = 0xFEE0
// the first rune of the SYMBOL FOR block for control characters
symbolOffset = '␀' // SYMBOL FOR NULL
// QuoteRune is the rune used for quoting reserved characters
QuoteRune = '' // SINGLE HIGH-REVERSED-9 QUOTATION MARK
)
// NB keep the tests in fstests/fstests/fstests.go FsEncoding up to date with this
// NB keep the aliases up to date below also
// Possible flags for the MultiEncoder
const (
EncodeZero MultiEncoder = 0 // NUL(0x00)
EncodeSlash MultiEncoder = 1 << iota // /
EncodeLtGt // <>
EncodeDoubleQuote // "
EncodeSingleQuote // '
EncodeBackQuote // `
EncodeDollar // $
EncodeColon // :
EncodeQuestion // ?
EncodeAsterisk // *
EncodePipe // |
EncodeHash // #
EncodePercent // %
EncodeBackSlash // \
EncodeCrLf // CR(0x0D), LF(0x0A)
EncodeDel // DEL(0x7F)
EncodeCtl // CTRL(0x01-0x1F)
EncodeLeftSpace // Leading SPACE
EncodeLeftPeriod // Leading .
EncodeLeftTilde // Leading ~
EncodeLeftCrLfHtVt // Leading CR LF HT VT
EncodeRightSpace // Trailing SPACE
EncodeRightPeriod // Trailing .
EncodeRightCrLfHtVt // Trailing CR LF HT VT
EncodeInvalidUtf8 // Invalid UTF-8 bytes
EncodeDot // . and .. names
EncodeSquareBracket // []
2022-04-21 19:02:27 +08:00
EncodeSemicolon // ;
// Synthetic
EncodeWin = EncodeColon | EncodeQuestion | EncodeDoubleQuote | EncodeAsterisk | EncodeLtGt | EncodePipe // :?"*<>|
EncodeHashPercent = EncodeHash | EncodePercent // #%
)
// Has returns true if flag is contained in mask
func (mask MultiEncoder) Has(flag MultiEncoder) bool {
return mask&flag != 0
}
// Encoder can transform names to and from the original and translated version.
type Encoder interface {
// Encode takes a raw name and substitutes any reserved characters and
// patterns in it
Encode(string) string
// Decode takes a name and undoes any substitutions made by Encode
Decode(string) string
// FromStandardPath takes a / separated path in Standard encoding
// and converts it to a / separated path in this encoding.
FromStandardPath(string) string
// FromStandardName takes name in Standard encoding and converts
// it in this encoding.
FromStandardName(string) string
// ToStandardPath takes a / separated path in this encoding
// and converts it to a / separated path in Standard encoding.
ToStandardPath(string) string
// ToStandardName takes name in this encoding and converts
// it in Standard encoding.
ToStandardName(string) string
}
// MultiEncoder is a configurable Encoder. The Encode* constants in this
// package can be combined using bitwise or (|) to enable handling of multiple
// character classes
type MultiEncoder uint
// Aliases maps encodings to names and vice versa
var (
encodingToName = map[MultiEncoder]string{}
nameToEncoding = map[string]MultiEncoder{}
)
// alias adds an alias for MultiEncoder.String() and MultiEncoder.Set()
func alias(name string, mask MultiEncoder) {
nameToEncoding[name] = mask
// don't overwrite existing reverse translations
if _, ok := encodingToName[mask]; !ok {
encodingToName[mask] = name
}
}
func init() {
alias("None", EncodeZero)
alias("Slash", EncodeSlash)
alias("LtGt", EncodeLtGt)
alias("SquareBracket", EncodeSquareBracket)
2022-04-21 19:02:27 +08:00
alias("Semicolon", EncodeSemicolon)
alias("DoubleQuote", EncodeDoubleQuote)
alias("SingleQuote", EncodeSingleQuote)
alias("BackQuote", EncodeBackQuote)
alias("Dollar", EncodeDollar)
alias("Colon", EncodeColon)
alias("Question", EncodeQuestion)
alias("Asterisk", EncodeAsterisk)
alias("Pipe", EncodePipe)
alias("Hash", EncodeHash)
alias("Percent", EncodePercent)
alias("BackSlash", EncodeBackSlash)
alias("CrLf", EncodeCrLf)
alias("Del", EncodeDel)
alias("Ctl", EncodeCtl)
alias("LeftSpace", EncodeLeftSpace)
alias("LeftPeriod", EncodeLeftPeriod)
alias("LeftTilde", EncodeLeftTilde)
alias("LeftCrLfHtVt", EncodeLeftCrLfHtVt)
alias("RightSpace", EncodeRightSpace)
alias("RightPeriod", EncodeRightPeriod)
alias("RightCrLfHtVt", EncodeRightCrLfHtVt)
alias("InvalidUtf8", EncodeInvalidUtf8)
alias("Dot", EncodeDot)
}
// validStrings returns all the valid MultiEncoder strings
func validStrings() string {
var out []string
for k := range nameToEncoding {
out = append(out, k)
}
sort.Strings(out)
return strings.Join(out, ", ")
}
// String converts the MultiEncoder into text
func (mask MultiEncoder) String() string {
// See if there is an exact translation - if so return that
if name, ok := encodingToName[mask]; ok {
return name
}
var out []string
// Otherwise decompose bit by bit
for bit := MultiEncoder(1); bit != 0; bit *= 2 {
if (mask & bit) != 0 {
if name, ok := encodingToName[bit]; ok {
out = append(out, name)
} else {
out = append(out, fmt.Sprintf("0x%X", uint(bit)))
}
}
}
return strings.Join(out, ",")
}
// Set converts a string into a MultiEncoder
func (mask *MultiEncoder) Set(in string) error {
var out MultiEncoder
parts := strings.Split(in, ",")
for _, part := range parts {
part = strings.TrimSpace(part)
if bits, ok := nameToEncoding[part]; ok {
out |= bits
} else {
i, err := strconv.ParseInt(part, 0, 64)
if err != nil {
return fmt.Errorf("bad encoding %q: possible values are: %s", part, validStrings())
}
out |= MultiEncoder(i)
}
}
*mask = out
return nil
}
// Type returns a textual type of the MultiEncoder to satisfy the pflag.Value interface
func (mask MultiEncoder) Type() string {
return "Encoding"
}
// Scan implements the fmt.Scanner interface
func (mask *MultiEncoder) Scan(s fmt.ScanState, ch rune) error {
token, err := s.Token(true, nil)
if err != nil {
return err
}
return mask.Set(string(token))
}
// Encode takes a raw name and substitutes any reserved characters and
// patterns in it
func (mask MultiEncoder) Encode(in string) string {
if in == "" {
return ""
}
if mask.Has(EncodeDot) {
switch in {
case ".":
return ""
case "..":
return ""
case "":
return string(QuoteRune) + ""
case "":
return string(QuoteRune) + "" + string(QuoteRune) + ""
}
}
// handle prefix only replacements
prefix := ""
if mask.Has(EncodeLeftSpace) { // Leading SPACE
if in[0] == ' ' {
prefix, in = "␠", in[1:] // SYMBOL FOR SPACE
} else if r, l := utf8.DecodeRuneInString(in); r == '␠' { // SYMBOL FOR SPACE
prefix, in = string(QuoteRune)+"␠", in[l:] // SYMBOL FOR SPACE
}
}
2019-09-30 23:41:48 +08:00
if mask.Has(EncodeLeftPeriod) && prefix == "" { // Leading PERIOD
if in[0] == '.' {
prefix, in = "", in[1:] // FULLWIDTH FULL STOP
} else if r, l := utf8.DecodeRuneInString(in); r == '' { // FULLWIDTH FULL STOP
prefix, in = string(QuoteRune)+"", in[l:] // FULLWIDTH FULL STOP
}
}
if mask.Has(EncodeLeftTilde) && prefix == "" { // Leading ~
if in[0] == '~' {
prefix, in = string('~'+fullOffset), in[1:] // FULLWIDTH TILDE
} else if r, l := utf8.DecodeRuneInString(in); r == '~'+fullOffset {
prefix, in = string(QuoteRune)+string('~'+fullOffset), in[l:] // FULLWIDTH TILDE
}
}
if mask.Has(EncodeLeftCrLfHtVt) && prefix == "" { // Leading CR LF HT VT
switch c := in[0]; c {
case '\t', '\n', '\v', '\r':
prefix, in = string('␀'+rune(c)), in[1:] // SYMBOL FOR NULL
default:
switch r, l := utf8.DecodeRuneInString(in); r {
case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
prefix, in = string(QuoteRune)+string(r), in[l:]
}
}
}
// handle suffix only replacements
suffix := ""
if in != "" {
if mask.Has(EncodeRightSpace) { // Trailing SPACE
if in[len(in)-1] == ' ' {
suffix, in = "␠", in[:len(in)-1] // SYMBOL FOR SPACE
} else if r, l := utf8.DecodeLastRuneInString(in); r == '␠' {
suffix, in = string(QuoteRune)+"␠", in[:len(in)-l] // SYMBOL FOR SPACE
}
}
if mask.Has(EncodeRightPeriod) && suffix == "" { // Trailing .
if in[len(in)-1] == '.' {
suffix, in = "", in[:len(in)-1] // FULLWIDTH FULL STOP
} else if r, l := utf8.DecodeLastRuneInString(in); r == '' {
suffix, in = string(QuoteRune)+"", in[:len(in)-l] // FULLWIDTH FULL STOP
}
}
if mask.Has(EncodeRightCrLfHtVt) && suffix == "" { // Trailing .
switch c := in[len(in)-1]; c {
case '\t', '\n', '\v', '\r':
suffix, in = string('␀'+rune(c)), in[:len(in)-1] // FULLWIDTH FULL STOP
default:
switch r, l := utf8.DecodeLastRuneInString(in); r {
case '␀' + '\t', '␀' + '\n', '␀' + '\v', '␀' + '\r':
suffix, in = string(QuoteRune)+string(r), in[:len(in)-l]
}
}
}
}
index := 0
if prefix == "" && suffix == "" {
// find the first rune which (most likely) needs to be replaced
index = strings.IndexFunc(in, func(r rune) bool {
switch r {
case 0, '␀', QuoteRune, utf8.RuneError:
return true
}
if mask.Has(EncodeAsterisk) { // *
switch r {
case '*',
'':
return true
}
}
if mask.Has(EncodeLtGt) { // <>
switch r {
case '<', '>',
'', '':
return true
}
}
if mask.Has(EncodeSquareBracket) { // []
switch r {
case '[', ']',
'', '':
return true
}
}
2022-04-21 19:02:27 +08:00
if mask.Has(EncodeSemicolon) { // ;
switch r {
case ';', '':
return true
}
}
if mask.Has(EncodeQuestion) { // ?
switch r {
case '?',
'':
return true
}
}
if mask.Has(EncodeColon) { // :
switch r {
case ':',
'':
return true
}
}
if mask.Has(EncodePipe) { // |
switch r {
case '|',
'':
return true
}
}
if mask.Has(EncodeDoubleQuote) { // "
switch r {
case '"',
'':
return true
}
}
if mask.Has(EncodeSingleQuote) { // '
switch r {
case '\'',
'':
return true
}
}
if mask.Has(EncodeBackQuote) { // `
switch r {
case '`',
'':
return true
}
}
if mask.Has(EncodeDollar) { // $
switch r {
case '$',
'':
return true
}
}
if mask.Has(EncodeSlash) { // /
switch r {
case '/',
'':
return true
}
}
if mask.Has(EncodeBackSlash) { // \
switch r {
case '\\',
'':
return true
}
}
if mask.Has(EncodeCrLf) { // CR LF
2019-09-21 06:04:31 +08:00
switch r {
case rune(0x0D), rune(0x0A),
'␍', '␊':
return true
}
}
if mask.Has(EncodeHash) { // #
switch r {
case '#',
'':
return true
}
}
if mask.Has(EncodePercent) { // %
switch r {
case '%',
'':
return true
}
}
if mask.Has(EncodeDel) { // DEL(0x7F)
switch r {
case rune(0x7F), '␡':
return true
}
}
if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
if r >= 1 && r <= 0x1F {
return true
} else if r > symbolOffset && r <= symbolOffset+0x1F {
return true
}
}
return false
})
}
// nothing to replace, return input
if index == -1 {
return in
}
var out bytes.Buffer
out.Grow(len(in) + len(prefix) + len(suffix))
out.WriteString(prefix)
// copy the clean part of the input and skip it
out.WriteString(in[:index])
in = in[index:]
for i, r := range in {
switch r {
case 0:
out.WriteRune(symbolOffset)
continue
case '␀', QuoteRune:
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
case utf8.RuneError:
if mask.Has(EncodeInvalidUtf8) {
// only encode invalid sequences and not utf8.RuneError
if i+3 > len(in) || in[i:i+3] != string(utf8.RuneError) {
_, l := utf8.DecodeRuneInString(in[i:])
appendQuotedBytes(&out, in[i:i+l])
continue
}
} else {
// append the real bytes instead of utf8.RuneError
_, l := utf8.DecodeRuneInString(in[i:])
out.WriteString(in[i : i+l])
continue
}
}
if mask.Has(EncodeAsterisk) { // *
switch r {
case '*':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeLtGt) { // <>
switch r {
case '<', '>':
out.WriteRune(r + fullOffset)
continue
case '', '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeSquareBracket) { // []
switch r {
case '[', ']':
out.WriteRune(r + fullOffset)
continue
case '', '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
2022-04-21 19:02:27 +08:00
if mask.Has(EncodeSemicolon) { // ;
switch r {
case ';':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeQuestion) { // ?
switch r {
case '?':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeColon) { // :
switch r {
case ':':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodePipe) { // |
switch r {
case '|':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeDoubleQuote) { // "
switch r {
case '"':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeSingleQuote) { // '
switch r {
case '\'':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeBackQuote) { // `
switch r {
case '`':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeDollar) { // $
switch r {
case '$':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeSlash) { // /
switch r {
case '/':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeBackSlash) { // \
switch r {
case '\\':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeCrLf) { // CR LF
2019-09-21 06:04:31 +08:00
switch r {
case rune(0x0D), rune(0x0A):
out.WriteRune(r + symbolOffset)
continue
case '␍', '␊':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeHash) { // #
switch r {
case '#':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodePercent) { // %
switch r {
case '%':
out.WriteRune(r + fullOffset)
continue
case '':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeDel) { // DEL(0x7F)
switch r {
case rune(0x7F):
out.WriteRune('␡') // SYMBOL FOR DELETE
continue
case '␡':
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
if r >= 1 && r <= 0x1F {
out.WriteRune('␀' + r) // SYMBOL FOR NULL
continue
} else if r > symbolOffset && r <= symbolOffset+0x1F {
out.WriteRune(QuoteRune)
out.WriteRune(r)
continue
}
}
out.WriteRune(r)
}
out.WriteString(suffix)
return out.String()
}
// Decode takes a name and undoes any substitutions made by Encode
func (mask MultiEncoder) Decode(in string) string {
if mask.Has(EncodeDot) {
switch in {
case "":
return "."
case "":
return ".."
case string(QuoteRune) + "":
return ""
case string(QuoteRune) + "" + string(QuoteRune) + "":
return ""
}
}
// handle prefix only replacements
prefix := ""
if r, l1 := utf8.DecodeRuneInString(in); mask.Has(EncodeLeftSpace) && r == '␠' { // SYMBOL FOR SPACE
prefix, in = " ", in[l1:]
2019-09-30 23:41:48 +08:00
} else if mask.Has(EncodeLeftPeriod) && r == '' { // FULLWIDTH FULL STOP
prefix, in = ".", in[l1:]
} else if mask.Has(EncodeLeftTilde) && r == '' { // FULLWIDTH TILDE
prefix, in = "~", in[l1:]
} else if mask.Has(EncodeLeftCrLfHtVt) && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
prefix, in = string(r-'␀'), in[l1:]
} else if r == QuoteRune {
if r, l2 := utf8.DecodeRuneInString(in[l1:]); mask.Has(EncodeLeftSpace) && r == '␠' { // SYMBOL FOR SPACE
prefix, in = "␠", in[l1+l2:]
2019-09-30 23:41:48 +08:00
} else if mask.Has(EncodeLeftPeriod) && r == '' { // FULLWIDTH FULL STOP
prefix, in = "", in[l1+l2:]
} else if mask.Has(EncodeLeftTilde) && r == '' { // FULLWIDTH TILDE
prefix, in = "", in[l1+l2:]
} else if mask.Has(EncodeLeftCrLfHtVt) && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
prefix, in = string(r), in[l1+l2:]
}
}
// handle suffix only replacements
suffix := ""
if r, l := utf8.DecodeLastRuneInString(in); mask.Has(EncodeRightSpace) && r == '␠' { // SYMBOL FOR SPACE
in = in[:len(in)-l]
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
suffix, in = "␠", in[:len(in)-l2]
} else {
suffix = " "
}
} else if mask.Has(EncodeRightPeriod) && r == '' { // FULLWIDTH FULL STOP
in = in[:len(in)-l]
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
suffix, in = "", in[:len(in)-l2]
} else {
suffix = "."
}
} else if mask.Has(EncodeRightCrLfHtVt) && (r == '␀'+'\t' || r == '␀'+'\n' || r == '␀'+'\v' || r == '␀'+'\r') {
in = in[:len(in)-l]
if q, l2 := utf8.DecodeLastRuneInString(in); q == QuoteRune {
suffix, in = string(r), in[:len(in)-l2]
} else {
suffix = string(r - '␀')
}
}
index := 0
if prefix == "" && suffix == "" {
// find the first rune which (most likely) needs to be replaced
index = strings.IndexFunc(in, func(r rune) bool {
switch r {
case '␀', QuoteRune:
return true
}
if mask.Has(EncodeAsterisk) { // *
switch r {
case '':
return true
}
}
if mask.Has(EncodeLtGt) { // <>
switch r {
case '', '':
return true
}
}
if mask.Has(EncodeSquareBracket) { // []
switch r {
case '', '':
return true
}
}
2022-04-21 19:02:27 +08:00
if mask.Has(EncodeSemicolon) { // ;
switch r {
case '':
return true
}
}
if mask.Has(EncodeQuestion) { // ?
switch r {
case '':
return true
}
}
if mask.Has(EncodeColon) { // :
switch r {
case '':
return true
}
}
if mask.Has(EncodePipe) { // |
switch r {
case '':
return true
}
}
if mask.Has(EncodeDoubleQuote) { // "
switch r {
case '':
return true
}
}
if mask.Has(EncodeSingleQuote) { // '
switch r {
case '':
return true
}
}
if mask.Has(EncodeBackQuote) { // `
switch r {
case '':
return true
}
}
if mask.Has(EncodeDollar) { // $
switch r {
case '':
return true
}
}
if mask.Has(EncodeSlash) { // /
switch r {
case '':
return true
}
}
if mask.Has(EncodeBackSlash) { // \
switch r {
case '':
return true
}
}
if mask.Has(EncodeCrLf) { // CR LF
2019-09-21 06:04:31 +08:00
switch r {
case '␍', '␊':
return true
}
}
if mask.Has(EncodeHash) { // #
switch r {
case '':
return true
}
}
if mask.Has(EncodePercent) { // %
switch r {
case '':
return true
}
}
if mask.Has(EncodeDel) { // DEL(0x7F)
switch r {
case '␡':
return true
}
}
if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
if r > symbolOffset && r <= symbolOffset+0x1F {
return true
}
}
return false
})
}
// nothing to replace, return input
if index == -1 {
return in
}
var out bytes.Buffer
out.Grow(len(in))
out.WriteString(prefix)
// copy the clean part of the input and skip it
out.WriteString(in[:index])
in = in[index:]
var unquote, unquoteNext, skipNext bool
for i, r := range in {
if skipNext {
skipNext = false
continue
}
unquote, unquoteNext = unquoteNext, false
switch r {
case '␀': // SYMBOL FOR NULL
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(0)
}
continue
case QuoteRune:
if unquote {
out.WriteRune(r)
} else {
unquoteNext = true
}
continue
}
if mask.Has(EncodeAsterisk) { // *
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeLtGt) { // <>
switch r {
case '', '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeSquareBracket) { // []
switch r {
case '', '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
2022-04-21 19:02:27 +08:00
if mask.Has(EncodeSemicolon) { // ;
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeQuestion) { // ?
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeColon) { // :
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodePipe) { // |
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeDoubleQuote) { // "
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeSingleQuote) { // '
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeBackQuote) { // `
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeDollar) { // $
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeSlash) { // /
switch r {
case '': // FULLWIDTH SOLIDUS
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeBackSlash) { // \
switch r {
case '': // FULLWIDTH REVERSE SOLIDUS
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeCrLf) { // CR LF
2019-09-21 06:04:31 +08:00
switch r {
case '␍', '␊':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - symbolOffset)
}
continue
}
}
if mask.Has(EncodeHash) { // %
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodePercent) { // %
switch r {
case '':
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - fullOffset)
}
continue
}
}
if mask.Has(EncodeDel) { // DEL(0x7F)
switch r {
case '␡': // SYMBOL FOR DELETE
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(0x7F)
}
continue
}
}
if mask.Has(EncodeCtl) { // CTRL(0x01-0x1F)
if r > symbolOffset && r <= symbolOffset+0x1F {
if unquote {
out.WriteRune(r)
} else {
out.WriteRune(r - symbolOffset)
}
continue
}
}
if unquote {
if mask.Has(EncodeInvalidUtf8) {
skipNext = appendUnquotedByte(&out, in[i:])
if skipNext {
continue
}
}
out.WriteRune(QuoteRune)
}
switch r {
case utf8.RuneError:
// append the real bytes instead of utf8.RuneError
_, l := utf8.DecodeRuneInString(in[i:])
out.WriteString(in[i : i+l])
continue
}
out.WriteRune(r)
}
if unquoteNext {
out.WriteRune(QuoteRune)
}
out.WriteString(suffix)
return out.String()
}
// FromStandardPath takes a / separated path in Standard encoding
// and converts it to a / separated path in this encoding.
func (mask MultiEncoder) FromStandardPath(s string) string {
return FromStandardPath(mask, s)
}
// FromStandardName takes name in Standard encoding and converts
// it in this encoding.
func (mask MultiEncoder) FromStandardName(s string) string {
return FromStandardName(mask, s)
}
// ToStandardPath takes a / separated path in this encoding
// and converts it to a / separated path in Standard encoding.
func (mask MultiEncoder) ToStandardPath(s string) string {
return ToStandardPath(mask, s)
}
// ToStandardName takes name in this encoding and converts
// it in Standard encoding.
func (mask MultiEncoder) ToStandardName(s string) string {
return ToStandardName(mask, s)
}
func appendQuotedBytes(w io.Writer, s string) {
for _, b := range []byte(s) {
_, _ = fmt.Fprintf(w, string(QuoteRune)+"%02X", b)
}
}
func appendUnquotedByte(w io.Writer, s string) bool {
if len(s) < 2 {
return false
}
u, err := strconv.ParseUint(s[:2], 16, 8)
if err != nil {
return false
}
n, _ := w.Write([]byte{byte(u)})
return n == 1
}
type identity struct{}
func (identity) Encode(in string) string { return in }
func (identity) Decode(in string) string { return in }
func (i identity) FromStandardPath(s string) string {
return FromStandardPath(i, s)
}
func (i identity) FromStandardName(s string) string {
return FromStandardName(i, s)
}
func (i identity) ToStandardPath(s string) string {
return ToStandardPath(i, s)
}
func (i identity) ToStandardName(s string) string {
return ToStandardName(i, s)
}
// Identity returns an Encoder that always returns the input value
func Identity() Encoder {
return identity{}
}
// FromStandardPath takes a / separated path in Standard encoding
// and converts it to a / separated path in the given encoding.
func FromStandardPath(e Encoder, s string) string {
if e == Standard {
return s
}
parts := strings.Split(s, "/")
encoded := make([]string, len(parts))
changed := false
for i, p := range parts {
enc := FromStandardName(e, p)
changed = changed || enc != p
encoded[i] = enc
}
if !changed {
return s
}
return strings.Join(encoded, "/")
}
// FromStandardName takes name in Standard encoding and converts
// it in the given encoding.
func FromStandardName(e Encoder, s string) string {
if e == Standard {
return s
}
return e.Encode(Standard.Decode(s))
}
// ToStandardPath takes a / separated path in the given encoding
// and converts it to a / separated path in Standard encoding.
func ToStandardPath(e Encoder, s string) string {
if e == Standard {
return s
}
parts := strings.Split(s, "/")
encoded := make([]string, len(parts))
changed := false
for i, p := range parts {
dec := ToStandardName(e, p)
changed = changed || dec != p
encoded[i] = dec
}
if !changed {
return s
}
return strings.Join(encoded, "/")
}
// ToStandardName takes name in the given encoding and converts
// it in Standard encoding.
func ToStandardName(e Encoder, s string) string {
if e == Standard {
return s
}
return Standard.Encode(e.Decode(s))
}