From 960150bb034dc9a549ee7289b1a4eb4abafeb30a Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sat, 25 Feb 2023 19:34:27 -0500 Subject: [PATCH] caddyfile: Implement heredoc support (#5385) --- caddyconfig/caddyfile/dispenser.go | 58 ++--- caddyconfig/caddyfile/lexer.go | 226 ++++++++++++++---- caddyconfig/caddyfile/lexer_test.go | 125 +++++++++- .../caddyfile/testdata/import_args0.txt | 2 +- .../caddyfile/testdata/import_args1.txt | 2 +- .../integration/caddyfile_adapt/heredoc.txt | 50 ++++ .../caddyfile_adapt/import_args_snippet.txt | 2 +- .../caddyfile_adapt/reverse_proxy_options.txt | 4 +- .../integration/testdata/import_respond.txt | 2 +- modules/caddyhttp/celmatcher.go | 6 +- .../reverseproxy/fastcgi/caddyfile.go | 42 +--- .../reverseproxy/forwardauth/caddyfile.go | 13 +- 12 files changed, 397 insertions(+), 135 deletions(-) create mode 100644 caddytest/integration/caddyfile_adapt/heredoc.txt diff --git a/caddyconfig/caddyfile/dispenser.go b/caddyconfig/caddyfile/dispenser.go index 91bd9a502..0daed3c44 100644 --- a/caddyconfig/caddyfile/dispenser.go +++ b/caddyconfig/caddyfile/dispenser.go @@ -20,7 +20,6 @@ import ( "io" "log" "strconv" - "strings" ) // Dispenser is a type that dispenses tokens, similarly to a lexer, @@ -101,12 +100,12 @@ func (d *Dispenser) nextOnSameLine() bool { d.cursor++ return true } - if d.cursor >= len(d.tokens) { + if d.cursor >= len(d.tokens)-1 { return false } - if d.cursor < len(d.tokens)-1 && - d.tokens[d.cursor].File == d.tokens[d.cursor+1].File && - d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) == d.tokens[d.cursor+1].Line { + curr := d.tokens[d.cursor] + next := d.tokens[d.cursor+1] + if curr.File == next.File && curr.Line+curr.NumLineBreaks() == next.Line { d.cursor++ return true } @@ -122,12 +121,12 @@ func (d *Dispenser) NextLine() bool { d.cursor++ return true } - if d.cursor >= len(d.tokens) { + if d.cursor >= len(d.tokens)-1 { return false } - if d.cursor < len(d.tokens)-1 && - (d.tokens[d.cursor].File != d.tokens[d.cursor+1].File || - d.tokens[d.cursor].Line+d.numLineBreaks(d.cursor) < d.tokens[d.cursor+1].Line) { + curr := d.tokens[d.cursor] + next := d.tokens[d.cursor+1] + if curr.File != next.File || curr.Line+curr.NumLineBreaks() < next.Line { d.cursor++ return true } @@ -203,14 +202,17 @@ func (d *Dispenser) Val() string { } // ValRaw gets the raw text of the current token (including quotes). +// If the token was a heredoc, then the delimiter is not included, +// because that is not relevant to any unmarshaling logic at this time. // If there is no token loaded, it returns empty string. func (d *Dispenser) ValRaw() string { if d.cursor < 0 || d.cursor >= len(d.tokens) { return "" } quote := d.tokens[d.cursor].wasQuoted - if quote > 0 { - return string(quote) + d.tokens[d.cursor].Text + string(quote) // string literal + if quote > 0 && quote != '<' { + // string literal + return string(quote) + d.tokens[d.cursor].Text + string(quote) } return d.tokens[d.cursor].Text } @@ -438,14 +440,14 @@ func (d *Dispenser) Delete() []Token { return d.tokens } -// numLineBreaks counts how many line breaks are in the token -// value given by the token index tknIdx. It returns 0 if the -// token does not exist or there are no line breaks. -func (d *Dispenser) numLineBreaks(tknIdx int) int { - if tknIdx < 0 || tknIdx >= len(d.tokens) { - return 0 +// DeleteN is the same as Delete, but can delete many tokens at once. +// If there aren't N tokens available to delete, none are deleted. +func (d *Dispenser) DeleteN(amount int) []Token { + if amount > 0 && d.cursor >= (amount-1) && d.cursor <= len(d.tokens)-1 { + d.tokens = append(d.tokens[:d.cursor-(amount-1)], d.tokens[d.cursor+1:]...) + d.cursor -= amount } - return strings.Count(d.tokens[tknIdx].Text, "\n") + return d.tokens } // isNewLine determines whether the current token is on a different @@ -468,18 +470,10 @@ func (d *Dispenser) isNewLine() bool { return true } - // The previous token may contain line breaks if - // it was quoted and spanned multiple lines. e.g: - // - // dir "foo - // bar - // baz" - prevLineBreaks := d.numLineBreaks(d.cursor - 1) - // If the previous token (incl line breaks) ends // on a line earlier than the current token, // then the current token is on a new line - return prev.Line+prevLineBreaks < curr.Line + return prev.Line+prev.NumLineBreaks() < curr.Line } // isNextOnNewLine determines whether the current token is on a different @@ -502,16 +496,8 @@ func (d *Dispenser) isNextOnNewLine() bool { return true } - // The current token may contain line breaks if - // it was quoted and spanned multiple lines. e.g: - // - // dir "foo - // bar - // baz" - currLineBreaks := d.numLineBreaks(d.cursor) - // If the current token (incl line breaks) ends // on a line earlier than the next token, // then the next token is on a new line - return curr.Line+currLineBreaks < next.Line + return curr.Line+curr.NumLineBreaks() < next.Line } diff --git a/caddyconfig/caddyfile/lexer.go b/caddyconfig/caddyfile/lexer.go index 09b04c17e..ba8b87987 100644 --- a/caddyconfig/caddyfile/lexer.go +++ b/caddyconfig/caddyfile/lexer.go @@ -17,7 +17,10 @@ package caddyfile import ( "bufio" "bytes" + "fmt" "io" + "regexp" + "strings" "unicode" ) @@ -35,30 +38,39 @@ type ( // Token represents a single parsable unit. Token struct { - File string - origFile string - Line int - Text string - wasQuoted rune // enclosing quote character, if any - snippetName string + File string + origFile string + Line int + Text string + wasQuoted rune // enclosing quote character, if any + heredocMarker string + snippetName string } ) -// originalFile gets original filename before import modification. -func (t Token) originalFile() string { - if t.origFile != "" { - return t.origFile +// Tokenize takes bytes as input and lexes it into +// a list of tokens that can be parsed as a Caddyfile. +// Also takes a filename to fill the token's File as +// the source of the tokens, which is important to +// determine relative paths for `import` directives. +func Tokenize(input []byte, filename string) ([]Token, error) { + l := lexer{} + if err := l.load(bytes.NewReader(input)); err != nil { + return nil, err } - return t.File -} - -// updateFile updates the token's source filename for error display -// and remembers the original filename. Used during "import" processing. -func (t *Token) updateFile(file string) { - if t.origFile == "" { - t.origFile = t.File + var tokens []Token + for { + found, err := l.next() + if err != nil { + return nil, err + } + if !found { + break + } + l.token.File = filename + tokens = append(tokens, l.token) } - t.File = file + return tokens, nil } // load prepares the lexer to scan an input for tokens. @@ -92,28 +104,93 @@ func (l *lexer) load(input io.Reader) error { // may be escaped. The rest of the line is skipped // if a "#" character is read in. Returns true if // a token was loaded; false otherwise. -func (l *lexer) next() bool { +func (l *lexer) next() (bool, error) { var val []rune - var comment, quoted, btQuoted, escaped bool + var comment, quoted, btQuoted, inHeredoc, heredocEscaped, escaped bool + var heredocMarker string makeToken := func(quoted rune) bool { l.token.Text = string(val) l.token.wasQuoted = quoted + l.token.heredocMarker = heredocMarker return true } for { + // Read a character in; if err then if we had + // read some characters, make a token. If we + // reached EOF, then no more tokens to read. + // If no EOF, then we had a problem. ch, _, err := l.reader.ReadRune() if err != nil { if len(val) > 0 { - return makeToken(0) + if inHeredoc { + return false, fmt.Errorf("incomplete heredoc <<%s on line #%d, expected ending marker %s", heredocMarker, l.line+l.skippedLines, heredocMarker) + } + + return makeToken(0), nil } if err == io.EOF { - return false + return false, nil } - panic(err) + return false, err } + // detect whether we have the start of a heredoc + if !inHeredoc && !heredocEscaped && len(val) > 1 && string(val[:2]) == "<<" { + if ch == '<' { + return false, fmt.Errorf("too many '<' for heredoc on line #%d; only use two, for example < len(heredocMarker) && heredocMarker == string(val[len(val)-len(heredocMarker):]) { + // set the final value + val, err = l.finalizeHeredoc(val, heredocMarker) + if err != nil { + return false, err + } + + // set the line counter, and make the token + l.line += l.skippedLines + l.skippedLines = 0 + return makeToken('<'), nil + } + + // stay in the heredoc until we find the ending marker + continue + } + + // track whether we found an escape '\' for the next + // iteration to be contextually aware if !escaped && !btQuoted && ch == '\\' { escaped = true continue @@ -128,26 +205,29 @@ func (l *lexer) next() bool { } escaped = false } else { - if quoted && ch == '"' { - return makeToken('"') - } - if btQuoted && ch == '`' { - return makeToken('`') + if (quoted && ch == '"') || (btQuoted && ch == '`') { + return makeToken(ch), nil } } + // allow quoted text to wrap continue on multiple lines if ch == '\n' { l.line += 1 + l.skippedLines l.skippedLines = 0 } + // collect this character as part of the quoted token val = append(val, ch) continue } if unicode.IsSpace(ch) { + // ignore CR altogether, we only actually care about LF (\n) if ch == '\r' { continue } + // end of the line if ch == '\n' { + // newlines can be escaped to chain arguments + // onto multiple lines; else, increment the line count if escaped { l.skippedLines++ escaped = false @@ -155,14 +235,18 @@ func (l *lexer) next() bool { l.line += 1 + l.skippedLines l.skippedLines = 0 } + // comments (#) are single-line only comment = false } + // any kind of space means we're at the end of this token if len(val) > 0 { - return makeToken(0) + return makeToken(0), nil } continue } + // comments must be at the start of a token, + // in other words, preceded by space or newline if ch == '#' && len(val) == 0 { comment = true } @@ -183,7 +267,12 @@ func (l *lexer) next() bool { } if escaped { - val = append(val, '\\') + // allow escaping the first < to skip the heredoc syntax + if ch == '<' { + heredocEscaped = true + } else { + val = append(val, '\\') + } escaped = false } @@ -191,24 +280,71 @@ func (l *lexer) next() bool { } } -// Tokenize takes bytes as input and lexes it into -// a list of tokens that can be parsed as a Caddyfile. -// Also takes a filename to fill the token's File as -// the source of the tokens, which is important to -// determine relative paths for `import` directives. -func Tokenize(input []byte, filename string) ([]Token, error) { - l := lexer{} - if err := l.load(bytes.NewReader(input)); err != nil { - return nil, err +// finalizeHeredoc takes the runes read as the heredoc text and the marker, +// and processes the text to strip leading whitespace, returning the final +// value without the leading whitespace. +func (l *lexer) finalizeHeredoc(val []rune, marker string) ([]rune, error) { + // find the last newline of the heredoc, which is where the contents end + lastNewline := strings.LastIndex(string(val), "\n") + + // collapse the content, then split into separate lines + lines := strings.Split(string(val[:lastNewline+1]), "\n") + + // figure out how much whitespace we need to strip from the front of every line + // by getting the string that precedes the marker, on the last line + paddingToStrip := string(val[lastNewline+1 : len(val)-len(marker)]) + + // iterate over each line and strip the whitespace from the front + var out string + for lineNum, lineText := range lines[:len(lines)-1] { + // find an exact match for the padding + index := strings.Index(lineText, paddingToStrip) + + // if the padding doesn't match exactly at the start then we can't safely strip + if index != 0 { + return nil, fmt.Errorf("mismatched leading whitespace in heredoc <<%s on line #%d [%s], expected whitespace [%s] to match the closing marker", marker, l.line+lineNum+1, lineText, paddingToStrip) + } + + // strip, then append the line, with the newline, to the output. + // also removes all "\r" because Windows. + out += strings.ReplaceAll(lineText[len(paddingToStrip):]+"\n", "\r", "") } - var tokens []Token - for l.next() { - l.token.File = filename - tokens = append(tokens, l.token) + + // return the final value + return []rune(out), nil +} + +// originalFile gets original filename before import modification. +func (t Token) originalFile() string { + if t.origFile != "" { + return t.origFile } - return tokens, nil + return t.File +} + +// updateFile updates the token's source filename for error display +// and remembers the original filename. Used during "import" processing. +func (t *Token) updateFile(file string) { + if t.origFile == "" { + t.origFile = t.File + } + t.File = file } func (t Token) Quoted() bool { return t.wasQuoted > 0 } + +// NumLineBreaks counts how many line breaks are in the token text. +func (t Token) NumLineBreaks() int { + lineBreaks := strings.Count(t.Text, "\n") + if t.wasQuoted == '<' { + // heredocs have an extra linebreak because the opening + // delimiter is on its own line and is not included in + // the token Text itself + lineBreaks++ + } + return lineBreaks +} + +var heredocMarkerRegexp = regexp.MustCompile("^[A-Za-z0-9_-]+$") diff --git a/caddyconfig/caddyfile/lexer_test.go b/caddyconfig/caddyfile/lexer_test.go index 30ee0f632..3c7e157ea 100644 --- a/caddyconfig/caddyfile/lexer_test.go +++ b/caddyconfig/caddyfile/lexer_test.go @@ -18,13 +18,13 @@ import ( "testing" ) -type lexerTestCase struct { - input []byte - expected []Token -} - func TestLexer(t *testing.T) { - testCases := []lexerTestCase{ + testCases := []struct { + input []byte + expected []Token + expectErr bool + errorMessage string + }{ { input: []byte(`host:123`), expected: []Token{ @@ -249,10 +249,123 @@ func TestLexer(t *testing.T) { {Line: 1, Text: `quotes`}, }, }, + { + input: []byte(`heredoc < + Foo + Foo + + EOF 200 +} +---------- +{ + "apps": { + "http": { + "servers": { + "srv0": { + "listen": [ + ":443" + ], + "routes": [ + { + "match": [ + { + "host": [ + "example.com" + ] + } + ], + "handle": [ + { + "handler": "subroute", + "routes": [ + { + "handle": [ + { + "body": "\u003chtml\u003e\n \u003chead\u003e\u003ctitle\u003eFoo\u003c/title\u003e\n \u003cbody\u003eFoo\u003c/body\u003e\n\u003c/html\u003e\n", + "handler": "static_response", + "status_code": 200 + } + ] + } + ] + } + ], + "terminal": true + } + ] + } + } + } + } +} \ No newline at end of file diff --git a/caddytest/integration/caddyfile_adapt/import_args_snippet.txt b/caddytest/integration/caddyfile_adapt/import_args_snippet.txt index 9fce9abe0..10d9f4cfc 100644 --- a/caddytest/integration/caddyfile_adapt/import_args_snippet.txt +++ b/caddytest/integration/caddyfile_adapt/import_args_snippet.txt @@ -1,6 +1,6 @@ (logging) { log { - output file /var/log/caddy/{args.0}.access.log + output file /var/log/caddy/{args[0]}.access.log } } diff --git a/caddytest/integration/caddyfile_adapt/reverse_proxy_options.txt b/caddytest/integration/caddyfile_adapt/reverse_proxy_options.txt index b22333a4c..f6420ca01 100644 --- a/caddytest/integration/caddyfile_adapt/reverse_proxy_options.txt +++ b/caddytest/integration/caddyfile_adapt/reverse_proxy_options.txt @@ -6,7 +6,7 @@ https://example.com { method GET rewrite /rewritten?uri={uri} - buffer_requests + request_buffers 4KB transport http { read_buffer 10MB @@ -54,7 +54,6 @@ https://example.com { { "handle": [ { - "buffer_requests": true, "handler": "reverse_proxy", "headers": { "request": { @@ -68,6 +67,7 @@ https://example.com { } } }, + "request_buffers": 4000, "rewrite": { "method": "GET", "uri": "/rewritten?uri={http.request.uri}" diff --git a/caddytest/integration/testdata/import_respond.txt b/caddytest/integration/testdata/import_respond.txt index 0907fbeb7..45130885c 100644 --- a/caddytest/integration/testdata/import_respond.txt +++ b/caddytest/integration/testdata/import_respond.txt @@ -1 +1 @@ -respond "'I am {args.0}', hears {args.1}" \ No newline at end of file +respond "'I am {args[0]}', hears {args[1]}" \ No newline at end of file diff --git a/modules/caddyhttp/celmatcher.go b/modules/caddyhttp/celmatcher.go index 60ca00b6f..c4635e2a1 100644 --- a/modules/caddyhttp/celmatcher.go +++ b/modules/caddyhttp/celmatcher.go @@ -191,15 +191,17 @@ func (m MatchExpression) caddyPlaceholderFunc(lhs, rhs ref.Val) ref.Val { celReq, ok := lhs.(celHTTPRequest) if !ok { return types.NewErr( - "invalid request of type '%v' to "+placeholderFuncName+"(request, placeholderVarName)", + "invalid request of type '%v' to %s(request, placeholderVarName)", lhs.Type(), + placeholderFuncName, ) } phStr, ok := rhs.(types.String) if !ok { return types.NewErr( - "invalid placeholder variable name of type '%v' to "+placeholderFuncName+"(request, placeholderVarName)", + "invalid placeholder variable name of type '%v' to %s(request, placeholderVarName)", rhs.Type(), + placeholderFuncName, ) } diff --git a/modules/caddyhttp/reverseproxy/fastcgi/caddyfile.go b/modules/caddyhttp/reverseproxy/fastcgi/caddyfile.go index 799050e97..4687e6865 100644 --- a/modules/caddyhttp/reverseproxy/fastcgi/caddyfile.go +++ b/modules/caddyhttp/reverseproxy/fastcgi/caddyfile.go @@ -217,25 +217,18 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error return nil, dispenser.ArgErr() } fcgiTransport.Root = dispenser.Val() - dispenser.Delete() - dispenser.Delete() + dispenser.DeleteN(2) case "split": extensions = dispenser.RemainingArgs() - dispenser.Delete() - for range extensions { - dispenser.Delete() - } + dispenser.DeleteN(len(extensions) + 1) if len(extensions) == 0 { return nil, dispenser.ArgErr() } case "env": args := dispenser.RemainingArgs() - dispenser.Delete() - for range args { - dispenser.Delete() - } + dispenser.DeleteN(len(args) + 1) if len(args) != 2 { return nil, dispenser.ArgErr() } @@ -246,10 +239,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error case "index": args := dispenser.RemainingArgs() - dispenser.Delete() - for range args { - dispenser.Delete() - } + dispenser.DeleteN(len(args) + 1) if len(args) != 1 { return nil, dispenser.ArgErr() } @@ -257,10 +247,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error case "try_files": args := dispenser.RemainingArgs() - dispenser.Delete() - for range args { - dispenser.Delete() - } + dispenser.DeleteN(len(args) + 1) if len(args) < 1 { return nil, dispenser.ArgErr() } @@ -268,10 +255,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error case "resolve_root_symlink": args := dispenser.RemainingArgs() - dispenser.Delete() - for range args { - dispenser.Delete() - } + dispenser.DeleteN(len(args) + 1) fcgiTransport.ResolveRootSymlink = true case "dial_timeout": @@ -283,8 +267,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error return nil, dispenser.Errf("bad timeout value %s: %v", dispenser.Val(), err) } fcgiTransport.DialTimeout = caddy.Duration(dur) - dispenser.Delete() - dispenser.Delete() + dispenser.DeleteN(2) case "read_timeout": if !dispenser.NextArg() { @@ -295,8 +278,7 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error return nil, dispenser.Errf("bad timeout value %s: %v", dispenser.Val(), err) } fcgiTransport.ReadTimeout = caddy.Duration(dur) - dispenser.Delete() - dispenser.Delete() + dispenser.DeleteN(2) case "write_timeout": if !dispenser.NextArg() { @@ -307,15 +289,11 @@ func parsePHPFastCGI(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error return nil, dispenser.Errf("bad timeout value %s: %v", dispenser.Val(), err) } fcgiTransport.WriteTimeout = caddy.Duration(dur) - dispenser.Delete() - dispenser.Delete() + dispenser.DeleteN(2) case "capture_stderr": args := dispenser.RemainingArgs() - dispenser.Delete() - for range args { - dispenser.Delete() - } + dispenser.DeleteN(len(args) + 1) fcgiTransport.CaptureStderr = true } } diff --git a/modules/caddyhttp/reverseproxy/forwardauth/caddyfile.go b/modules/caddyhttp/reverseproxy/forwardauth/caddyfile.go index cecc00013..dbd2a2906 100644 --- a/modules/caddyhttp/reverseproxy/forwardauth/caddyfile.go +++ b/modules/caddyhttp/reverseproxy/forwardauth/caddyfile.go @@ -129,8 +129,7 @@ func parseCaddyfile(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error) return nil, dispenser.ArgErr() } rpHandler.Rewrite.URI = dispenser.Val() - dispenser.Delete() - dispenser.Delete() + dispenser.DeleteN(2) case "copy_headers": args := dispenser.RemainingArgs() @@ -140,13 +139,11 @@ func parseCaddyfile(h httpcaddyfile.Helper) ([]httpcaddyfile.ConfigValue, error) args = append(args, dispenser.Val()) } - dispenser.Delete() // directive name + // directive name + args + dispenser.DeleteN(len(args) + 1) if hadBlock { - dispenser.Delete() // opening brace - dispenser.Delete() // closing brace - } - for range args { - dispenser.Delete() + // opening & closing brace + dispenser.DeleteN(2) } for _, headerField := range args {