From ec3d3a481bc1605a7935c212c73dc6afe4b60f96 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Fri, 2 Jul 2021 23:11:03 +0200 Subject: [PATCH] Support "$(cmd)" command substitution without line splitting This adds a hack to the parser. Given a command echo "x$()y z" we virtually insert double quotes before and after the command substitution, so the command internally looks like echo "x"$()"y z" This hack allows to reuse the existing logic for handling (recursive) command substitutions. This makes the quoting syntax more complex; external highlighters should consider adding this if possible. The upside (more Bash compatibility) seems worth it. Closes #159 --- src/common.cpp | 25 ++++++++++++++++---- src/common.h | 12 ++++++---- src/expand.cpp | 51 ++++++++++++++++++++++++++++++++++++++-- src/fish_tests.cpp | 13 ++++++++-- src/highlight.cpp | 3 +++ src/parse_util.cpp | 43 ++++++++++++++++++++++++++------- src/parse_util.h | 2 +- src/tokenizer.cpp | 36 ++++++++++++++++++++++------ tests/checks/cmdsub.fish | 44 ++++++++++++++++++++++++++++++++++ 9 files changed, 201 insertions(+), 28 deletions(-) create mode 100644 tests/checks/cmdsub.fish diff --git a/src/common.cpp b/src/common.cpp index 94157d907..e4e284848 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -501,9 +501,7 @@ void append_format(wcstring &str, const wchar_t *format, ...) { va_end(va); } -wchar_t *quote_end(const wchar_t *pos) { - wchar_t c = *pos; - +wchar_t *quote_end(const wchar_t *pos, wchar_t quote) { while (true) { pos++; @@ -513,7 +511,10 @@ wchar_t *quote_end(const wchar_t *pos) { pos++; if (!*pos) return nullptr; } else { - if (*pos == c) { + if (*pos == quote || + // Command substitutions also end a double quoted string. This is how we + // support command substitutions inside double quotes. + (quote == L'"' && *pos == L'$' && *(pos + 1) == L'(')) { return const_cast(pos); } } @@ -854,6 +855,22 @@ static bool unescape_string_var(const wchar_t *in, wcstring *out) { return true; } +wcstring escape_string_for_double_quotes(wcstring in) { + // We need to escape backslashes, double quotes, and dollars only. + wcstring result = std::move(in); + size_t idx = result.size(); + while (idx--) { + switch (result[idx]) { + case L'\\': + case L'$': + case L'"': + result.insert(idx, 1, L'\\'); + break; + } + } + return result; +} + /// Escape a string in a fashion suitable for using in fish script. Store the result in out_str. static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out, escape_flags_t flags) { diff --git a/src/common.h b/src/common.h index 6912e75eb..2aa148b1d 100644 --- a/src/common.h +++ b/src/common.h @@ -441,11 +441,11 @@ std::unique_ptr make_unique(Args &&...args) { } #endif -/// This functions returns the end of the quoted substring beginning at \c in. The type of quoting -/// character is detemrined by examining \c in. Returns 0 on error. +/// This functions returns the end of the quoted substring beginning at \c pos. Returns 0 on error. /// -/// \param in the position of the opening quote. -wchar_t *quote_end(const wchar_t *pos); +/// \param pos the position of the opening quote. +/// \param quote the quote to use, usually pointed to by \c pos. +wchar_t *quote_end(const wchar_t *pos, wchar_t quote); /// This function should be called after calling `setlocale()` to perform fish specific locale /// initialization. @@ -473,6 +473,10 @@ wcstring escape_string(const wchar_t *in, escape_flags_t flags, wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_style_t style = STRING_STYLE_SCRIPT); +/// Escape a string so that it may be inserted into a double-quoted string. +/// This permits ownership transfer. +wcstring escape_string_for_double_quotes(wcstring in); + /// \return a string representation suitable for debugging (not for presenting to the user). This /// replaces non-ASCII characters with either tokens like or <\xfdd7>. No other escapes /// are made (i.e. this is a lossy escape). diff --git a/src/expand.cpp b/src/expand.cpp index 10a3d88c4..b33f4131e 100644 --- a/src/expand.cpp +++ b/src/expand.cpp @@ -620,8 +620,9 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t size_t paren_end = 0; wcstring subcmd; + bool is_quoted = false; switch (parse_util_locate_cmdsubst_range(input, &cursor, &subcmd, &paren_begin, &paren_end, - false)) { + false, &is_quoted)) { case -1: { append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, L"Mismatched parenthesis"); return expand_result_t::make_error(STATUS_EXPAND_ERROR); @@ -640,6 +641,8 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t } } + bool have_dollar = paren_begin > 0 && input.at(paren_begin - 1) == L'$'; + wcstring_list_t sub_res; int subshell_status = exec_subshell_for_expand(subcmd, *ctx.parser, ctx.job_group, sub_res); if (subshell_status != 0) { @@ -702,12 +705,56 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t // Recursively call ourselves to expand any remaining command substitutions. The result of this // recursive call using the tail of the string is inserted into the tail_expand array list completion_receiver_t tail_expand_recv = out->subreceiver(); - expand_cmdsubst(input.substr(tail_begin), ctx, &tail_expand_recv, + wcstring tail = input.substr(tail_begin); + // A command substitution inside double quotes magically closes the quoted string. + // Reopen the quotes just after the command substitution. + if (is_quoted) { + tail.insert(0, L"\""); + } + expand_cmdsubst(std::move(tail), ctx, &tail_expand_recv, errors); // TODO: offset error locations completion_list_t tail_expand = tail_expand_recv.take(); // Combine the result of the current command substitution with the result of the recursive tail // expansion. + + if (is_quoted) { + // Awkwardly reconstruct the command output. + size_t approx_size = 0; + for (const wcstring &sub_item : sub_res) { + approx_size += sub_item.size() + 1; + } + wcstring sub_res_joined; + sub_res_joined.reserve(approx_size); + for (size_t i = 0; i < sub_res.size(); i++) { + sub_res_joined.append(escape_string_for_double_quotes(std::move(sub_res.at(i)))); + sub_res_joined.push_back(L'\n'); + } + // Mimic POSIX shells by stripping all trailing newlines. + if (!sub_res_joined.empty()) { + size_t i; + for (i = sub_res_joined.size(); i > 0; i--) { + if (sub_res_joined[i - 1] != L'\n') break; + } + sub_res_joined.erase(i); + } + // Instead of performing cartesian product expansion, we directly insert the command + // substitution output into the current expansion results. + for (const completion_t &tail_item : tail_expand) { + wcstring whole_item; + whole_item.reserve(paren_begin + 1 + sub_res_joined.size() + 1 + + tail_item.completion.size()); + whole_item.append(input, 0, paren_begin - have_dollar); + whole_item.append(sub_res_joined); + whole_item.append(tail_item.completion.substr(const_strlen(L"\""))); + if (!out->add(std::move(whole_item))) { + return append_overflow_error(errors); + } + } + + return expand_result_t::ok; + } + for (const wcstring &sub_item : sub_res) { wcstring sub_item2 = escape_string(sub_item, ESCAPE_ALL); for (const completion_t &tail_item : tail_expand) { diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 2b46986f2..805509a2f 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -5103,8 +5103,7 @@ static void test_error_messages() { {L"echo foo\"$\"bar", ERROR_NO_VAR_NAME}, {L"echo \"foo\"$\"bar\"", ERROR_NO_VAR_NAME}, {L"echo foo $ bar", ERROR_NO_VAR_NAME}, - {L"echo foo$(foo)bar", ERROR_BAD_VAR_SUBCOMMAND1}, - {L"echo \"foo$(foo)bar\"", ERROR_BAD_VAR_SUBCOMMAND1}}; + {L"echo foo$(foo)bar", ERROR_BAD_VAR_SUBCOMMAND1}}; parse_error_list_t errors; for (const auto &test : error_tests) { @@ -5194,6 +5193,16 @@ static void test_highlighting() { {L"|", highlight_role_t::statement_terminator}, {L"cat", highlight_role_t::command}, }); + highlight_tests.push_back({ + {L"true", highlight_role_t::command}, + {L"\"before", highlight_role_t::quote}, + {L"$(", highlight_role_t::operat}, + {L"true", highlight_role_t::command}, + {L"param1", highlight_role_t::param}, + {L")", highlight_role_t::operat}, + {L"after\"", highlight_role_t::quote}, + {L"param2", highlight_role_t::param}, + }); // Redirections substitutions. highlight_tests.push_back({ diff --git a/src/highlight.cpp b/src/highlight.cpp index 0a38a5cec..85f7c81aa 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -494,6 +494,9 @@ static size_t color_variable(const wchar_t *in, size_t in_len, wchar_t next = in[idx + 1]; if (next == L'$' || valid_var_name_char(next)) { colors[idx] = highlight_role_t::operat; + } else if (next == L'(') { + colors[idx] = highlight_role_t::operat; + return idx + 1; } else { colors[idx] = highlight_role_t::error; } diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 668fe4c0b..827ad79a7 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -106,12 +106,13 @@ size_t parse_util_get_offset(const wcstring &str, int line, long line_offset) { static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin, wchar_t **end, bool allow_incomplete, wchar_t open_type, - wchar_t close_type) { + wchar_t close_type, bool *is_quoted = nullptr) { // open_type is typically ( or [, and close type is the corresponding value. wchar_t *pos; bool escaped = false; bool syntax_error = false; int paran_count = 0; + std::vector quoted_cmdsubs; wchar_t *paran_begin = nullptr, *paran_end = nullptr; @@ -120,8 +121,14 @@ static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin for (pos = const_cast(in); *pos; pos++) { if (!escaped) { if (std::wcschr(L"'\"", *pos)) { - wchar_t *q_end = quote_end(pos); + wchar_t *q_end = quote_end(pos, *pos); if (q_end && *q_end) { + if (open_type == L'(' && *q_end == L'$') { + quoted_cmdsubs.push_back(paran_count); + // We want to report if the outermost comand substitution between + // paran_begin..paran_end is quoted. + if (paran_count == 0 && is_quoted) *is_quoted = true; + } pos = q_end; } else { break; @@ -145,6 +152,25 @@ static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin syntax_error = true; break; } + + // Check if the ) did complete a quoted command substituion. + if (open_type == L'(' && !quoted_cmdsubs.empty() && + quoted_cmdsubs.back() == paran_count) { + quoted_cmdsubs.pop_back(); + // Quoted command substitutions temporarily close double quotes. + // In "foo$(bar)baz$(qux)" + // We are here ^ + // After the ) in a quoted command substitution, we need to act as if + // there was an invisible double quote. + wchar_t *q_end = quote_end(pos, L'"'); + if (q_end && *q_end) { // Found a valid closing quote. + // Stop at $(qux), which is another quoted command substitution. + if (*q_end == L'$') quoted_cmdsubs.push_back(paran_count); + pos = q_end; + } else { + break; + } + } } } } @@ -185,7 +211,8 @@ int parse_util_locate_slice(const wchar_t *in, wchar_t **begin, wchar_t **end, static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, bool accept_incomplete, - wchar_t open_type, wchar_t close_type) { + wchar_t open_type, wchar_t close_type, + bool *out_is_quoted) { // Clear the return values. if (out_contents != nullptr) out_contents->clear(); *out_start = 0; @@ -201,7 +228,7 @@ static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_c wchar_t *bracket_range_begin = nullptr, *bracket_range_end = nullptr; int ret = parse_util_locate_brackets_of_type(valid_range_start, &bracket_range_begin, &bracket_range_end, accept_incomplete, open_type, - close_type); + close_type, out_is_quoted); if (ret <= 0) { return ret; } @@ -231,9 +258,9 @@ static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_c int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, - bool accept_incomplete) { + bool accept_incomplete, bool *out_is_quoted) { return parse_util_locate_brackets_range(str, inout_cursor_offset, out_contents, out_start, - out_end, accept_incomplete, L'(', L')'); + out_end, accept_incomplete, L'(', L')', out_is_quoted); } void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **a, @@ -465,7 +492,7 @@ static wchar_t get_quote(const wcstring &cmd_str, size_t len) { i++; } else { if (cmd[i] == L'\'' || cmd[i] == L'\"') { - const wchar_t *end = quote_end(&cmd[i]); + const wchar_t *end = quote_end(&cmd[i], cmd[i]); // std::fwprintf( stderr, L"Jump %d\n", end-cmd ); if ((end == nullptr) || (!*end) || (end > cmd + len)) { res = cmd[i]; @@ -1046,7 +1073,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen wchar_t next_char = idx + 1 < unesc_size ? unesc.at(idx + 1) : L'\0'; if (next_char != VARIABLE_EXPAND && next_char != VARIABLE_EXPAND_SINGLE && - !valid_var_name_char(next_char)) { + next_char != '(' && !valid_var_name_char(next_char)) { err = 1; if (out_errors) { // We have something like $$$^.... Back up until we reach the first $. diff --git a/src/parse_util.h b/src/parse_util.h index 0ef2c5ef0..3ee3a67d4 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -31,7 +31,7 @@ int parse_util_locate_slice(const wchar_t *in, wchar_t **begin, wchar_t **end, /// \return -1 on syntax error, 0 if no subshells exist and 1 on success int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, - bool accept_incomplete); + bool accept_incomplete, bool *out_is_quoted = nullptr); /// Find the beginning and end of the command substitution under the cursor. If no subshell is /// found, the entire string is returned. If the current command substitution is not ended, i.e. the diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index b19547ec6..d9c179d6f 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -145,10 +145,24 @@ tok_t tokenizer_t::read_string() { std::vector paran_offsets; std::vector brace_offsets; std::vector expecting; + std::vector quoted_cmdsubs; int slice_offset = 0; const wchar_t *const buff_start = this->token_cursor; bool is_first = true; + auto process_opening_quote = [&](wchar_t quote) -> const wchar_t * { + const wchar_t *end = quote_end(this->token_cursor, quote); + if (end) { + if (*end == L'$') quoted_cmdsubs.push_back(paran_offsets.size()); + this->token_cursor = end; + return nullptr; + } else { + const wchar_t *error_loc = this->token_cursor; + this->token_cursor += std::wcslen(this->token_cursor); + return error_loc; + } + }; + while (true) { wchar_t c = *this->token_cursor; #if false @@ -195,6 +209,19 @@ tok_t tokenizer_t::read_string() { mode &= ~(tok_modes::subshell); } expecting.pop_back(); + // Check if the ) did complete a quoted command substituion. + if (!quoted_cmdsubs.empty() && quoted_cmdsubs.back() == paran_offsets.size()) { + quoted_cmdsubs.pop_back(); + // Quoted command substitutions temporarily close double quotes, after ), + // we need to act as if there was an invisible double quote. + if (const wchar_t *error_loc = process_opening_quote(L'"')) { + if (!this->accept_unfinished) { + return this->call_error(tokenizer_error_t::unterminated_quote, buff_start, + error_loc); + } + break; + } + } } else if (c == L'}') { if (!expecting.empty() && expecting.back() == L')') { return this->call_error(tokenizer_error_t::expected_pclose_found_bclose, @@ -225,13 +252,8 @@ tok_t tokenizer_t::read_string() { else if (c == L']' && ((mode & tok_modes::array_brackets) == tok_modes::array_brackets)) { mode &= ~(tok_modes::array_brackets); } else if (c == L'\'' || c == L'"') { - const wchar_t *end = quote_end(this->token_cursor); - if (end) { - this->token_cursor = end; - } else { - const wchar_t *error_loc = this->token_cursor; - this->token_cursor += std::wcslen(this->token_cursor); - if ((!this->accept_unfinished)) { + if (const wchar_t *error_loc = process_opening_quote(c)) { + if (!this->accept_unfinished) { return this->call_error(tokenizer_error_t::unterminated_quote, buff_start, error_loc); } diff --git a/tests/checks/cmdsub.fish b/tests/checks/cmdsub.fish new file mode 100644 index 000000000..bfc9f60af --- /dev/null +++ b/tests/checks/cmdsub.fish @@ -0,0 +1,44 @@ +#RUN: %fish %s + +# Command substitution inside double quotes strips trailing newline. +echo "a$(echo b)c" +# CHECK: abc + +# Nesting +echo "$(echo "$(echo a)")" +# CHECK: a +echo "$(echo $(echo b))" +# CHECK: b + +echo "$(echo multiple).$(echo command).$(echo substitutions)" +# CHECK: multiple.command.substitutions + +test -n "$()" || echo "empty list is interpolated to empty string" +# CHECK: empty list is interpolated to empty string + +# Variables in command substitution output are not expanded. +echo "$(echo \~ \$HOME)" +# CHECK: ~ $HOME + +echo "$(printf %s 'quoted command substitution multiline output +line 2 +line 3 +')" +# CHECK: quoted command substitution multiline output +# CHECK: line 2 +# CHECK: line 3 + +echo trim any newlines "$(echo \n\n\n)" after cmdsub +#CHECK: trim any newlines after cmdsub + +echo i{1, (echo 2), "$(echo 3)"} +# CHECK: i1 i2 i3 + +echo "$(echo index\nrange\nexpansion)[2]" +#CHECK: range + +echo "$(echo '"')" +#CHECK: " + +echo "$(echo $(echo 1) ())" +#CHECK: 1