Support "$(cmd)" command substitution without line splitting

This adds a hack to the parser. Given a command

	echo "x$()y z"

we virtually insert double quotes before and after the command
substitution, so the command internally looks like

	echo "x"$()"y z"

This hack allows to reuse the existing logic for handling (recursive)
command substitutions.

This makes the quoting syntax more complex; external highlighters
should consider adding this if possible.

The upside (more Bash compatibility) seems worth it.

Closes #159
This commit is contained in:
Johannes Altmanninger 2021-07-02 23:11:03 +02:00
parent 4437a0d02a
commit ec3d3a481b
9 changed files with 201 additions and 28 deletions

View File

@ -501,9 +501,7 @@ void append_format(wcstring &str, const wchar_t *format, ...) {
va_end(va);
}
wchar_t *quote_end(const wchar_t *pos) {
wchar_t c = *pos;
wchar_t *quote_end(const wchar_t *pos, wchar_t quote) {
while (true) {
pos++;
@ -513,7 +511,10 @@ wchar_t *quote_end(const wchar_t *pos) {
pos++;
if (!*pos) return nullptr;
} else {
if (*pos == c) {
if (*pos == quote ||
// Command substitutions also end a double quoted string. This is how we
// support command substitutions inside double quotes.
(quote == L'"' && *pos == L'$' && *(pos + 1) == L'(')) {
return const_cast<wchar_t *>(pos);
}
}
@ -854,6 +855,22 @@ static bool unescape_string_var(const wchar_t *in, wcstring *out) {
return true;
}
wcstring escape_string_for_double_quotes(wcstring in) {
// We need to escape backslashes, double quotes, and dollars only.
wcstring result = std::move(in);
size_t idx = result.size();
while (idx--) {
switch (result[idx]) {
case L'\\':
case L'$':
case L'"':
result.insert(idx, 1, L'\\');
break;
}
}
return result;
}
/// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
escape_flags_t flags) {

View File

@ -441,11 +441,11 @@ std::unique_ptr<T> make_unique(Args &&...args) {
}
#endif
/// This functions returns the end of the quoted substring beginning at \c in. The type of quoting
/// character is detemrined by examining \c in. Returns 0 on error.
/// This functions returns the end of the quoted substring beginning at \c pos. Returns 0 on error.
///
/// \param in the position of the opening quote.
wchar_t *quote_end(const wchar_t *pos);
/// \param pos the position of the opening quote.
/// \param quote the quote to use, usually pointed to by \c pos.
wchar_t *quote_end(const wchar_t *pos, wchar_t quote);
/// This function should be called after calling `setlocale()` to perform fish specific locale
/// initialization.
@ -473,6 +473,10 @@ wcstring escape_string(const wchar_t *in, escape_flags_t flags,
wcstring escape_string(const wcstring &in, escape_flags_t flags,
escape_string_style_t style = STRING_STYLE_SCRIPT);
/// Escape a string so that it may be inserted into a double-quoted string.
/// This permits ownership transfer.
wcstring escape_string_for_double_quotes(wcstring in);
/// \return a string representation suitable for debugging (not for presenting to the user). This
/// replaces non-ASCII characters with either tokens like <BRACE_SEP> or <\xfdd7>. No other escapes
/// are made (i.e. this is a lossy escape).

View File

@ -620,8 +620,9 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t
size_t paren_end = 0;
wcstring subcmd;
bool is_quoted = false;
switch (parse_util_locate_cmdsubst_range(input, &cursor, &subcmd, &paren_begin, &paren_end,
false)) {
false, &is_quoted)) {
case -1: {
append_syntax_error(errors, SOURCE_LOCATION_UNKNOWN, L"Mismatched parenthesis");
return expand_result_t::make_error(STATUS_EXPAND_ERROR);
@ -640,6 +641,8 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t
}
}
bool have_dollar = paren_begin > 0 && input.at(paren_begin - 1) == L'$';
wcstring_list_t sub_res;
int subshell_status = exec_subshell_for_expand(subcmd, *ctx.parser, ctx.job_group, sub_res);
if (subshell_status != 0) {
@ -702,12 +705,56 @@ static expand_result_t expand_cmdsubst(wcstring input, const operation_context_t
// Recursively call ourselves to expand any remaining command substitutions. The result of this
// recursive call using the tail of the string is inserted into the tail_expand array list
completion_receiver_t tail_expand_recv = out->subreceiver();
expand_cmdsubst(input.substr(tail_begin), ctx, &tail_expand_recv,
wcstring tail = input.substr(tail_begin);
// A command substitution inside double quotes magically closes the quoted string.
// Reopen the quotes just after the command substitution.
if (is_quoted) {
tail.insert(0, L"\"");
}
expand_cmdsubst(std::move(tail), ctx, &tail_expand_recv,
errors); // TODO: offset error locations
completion_list_t tail_expand = tail_expand_recv.take();
// Combine the result of the current command substitution with the result of the recursive tail
// expansion.
if (is_quoted) {
// Awkwardly reconstruct the command output.
size_t approx_size = 0;
for (const wcstring &sub_item : sub_res) {
approx_size += sub_item.size() + 1;
}
wcstring sub_res_joined;
sub_res_joined.reserve(approx_size);
for (size_t i = 0; i < sub_res.size(); i++) {
sub_res_joined.append(escape_string_for_double_quotes(std::move(sub_res.at(i))));
sub_res_joined.push_back(L'\n');
}
// Mimic POSIX shells by stripping all trailing newlines.
if (!sub_res_joined.empty()) {
size_t i;
for (i = sub_res_joined.size(); i > 0; i--) {
if (sub_res_joined[i - 1] != L'\n') break;
}
sub_res_joined.erase(i);
}
// Instead of performing cartesian product expansion, we directly insert the command
// substitution output into the current expansion results.
for (const completion_t &tail_item : tail_expand) {
wcstring whole_item;
whole_item.reserve(paren_begin + 1 + sub_res_joined.size() + 1 +
tail_item.completion.size());
whole_item.append(input, 0, paren_begin - have_dollar);
whole_item.append(sub_res_joined);
whole_item.append(tail_item.completion.substr(const_strlen(L"\"")));
if (!out->add(std::move(whole_item))) {
return append_overflow_error(errors);
}
}
return expand_result_t::ok;
}
for (const wcstring &sub_item : sub_res) {
wcstring sub_item2 = escape_string(sub_item, ESCAPE_ALL);
for (const completion_t &tail_item : tail_expand) {

View File

@ -5103,8 +5103,7 @@ static void test_error_messages() {
{L"echo foo\"$\"bar", ERROR_NO_VAR_NAME},
{L"echo \"foo\"$\"bar\"", ERROR_NO_VAR_NAME},
{L"echo foo $ bar", ERROR_NO_VAR_NAME},
{L"echo foo$(foo)bar", ERROR_BAD_VAR_SUBCOMMAND1},
{L"echo \"foo$(foo)bar\"", ERROR_BAD_VAR_SUBCOMMAND1}};
{L"echo foo$(foo)bar", ERROR_BAD_VAR_SUBCOMMAND1}};
parse_error_list_t errors;
for (const auto &test : error_tests) {
@ -5194,6 +5193,16 @@ static void test_highlighting() {
{L"|", highlight_role_t::statement_terminator},
{L"cat", highlight_role_t::command},
});
highlight_tests.push_back({
{L"true", highlight_role_t::command},
{L"\"before", highlight_role_t::quote},
{L"$(", highlight_role_t::operat},
{L"true", highlight_role_t::command},
{L"param1", highlight_role_t::param},
{L")", highlight_role_t::operat},
{L"after\"", highlight_role_t::quote},
{L"param2", highlight_role_t::param},
});
// Redirections substitutions.
highlight_tests.push_back({

View File

@ -494,6 +494,9 @@ static size_t color_variable(const wchar_t *in, size_t in_len,
wchar_t next = in[idx + 1];
if (next == L'$' || valid_var_name_char(next)) {
colors[idx] = highlight_role_t::operat;
} else if (next == L'(') {
colors[idx] = highlight_role_t::operat;
return idx + 1;
} else {
colors[idx] = highlight_role_t::error;
}

View File

@ -106,12 +106,13 @@ size_t parse_util_get_offset(const wcstring &str, int line, long line_offset) {
static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin, wchar_t **end,
bool allow_incomplete, wchar_t open_type,
wchar_t close_type) {
wchar_t close_type, bool *is_quoted = nullptr) {
// open_type is typically ( or [, and close type is the corresponding value.
wchar_t *pos;
bool escaped = false;
bool syntax_error = false;
int paran_count = 0;
std::vector<int> quoted_cmdsubs;
wchar_t *paran_begin = nullptr, *paran_end = nullptr;
@ -120,8 +121,14 @@ static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin
for (pos = const_cast<wchar_t *>(in); *pos; pos++) {
if (!escaped) {
if (std::wcschr(L"'\"", *pos)) {
wchar_t *q_end = quote_end(pos);
wchar_t *q_end = quote_end(pos, *pos);
if (q_end && *q_end) {
if (open_type == L'(' && *q_end == L'$') {
quoted_cmdsubs.push_back(paran_count);
// We want to report if the outermost comand substitution between
// paran_begin..paran_end is quoted.
if (paran_count == 0 && is_quoted) *is_quoted = true;
}
pos = q_end;
} else {
break;
@ -145,6 +152,25 @@ static int parse_util_locate_brackets_of_type(const wchar_t *in, wchar_t **begin
syntax_error = true;
break;
}
// Check if the ) did complete a quoted command substituion.
if (open_type == L'(' && !quoted_cmdsubs.empty() &&
quoted_cmdsubs.back() == paran_count) {
quoted_cmdsubs.pop_back();
// Quoted command substitutions temporarily close double quotes.
// In "foo$(bar)baz$(qux)"
// We are here ^
// After the ) in a quoted command substitution, we need to act as if
// there was an invisible double quote.
wchar_t *q_end = quote_end(pos, L'"');
if (q_end && *q_end) { // Found a valid closing quote.
// Stop at $(qux), which is another quoted command substitution.
if (*q_end == L'$') quoted_cmdsubs.push_back(paran_count);
pos = q_end;
} else {
break;
}
}
}
}
}
@ -185,7 +211,8 @@ int parse_util_locate_slice(const wchar_t *in, wchar_t **begin, wchar_t **end,
static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_cursor_offset,
wcstring *out_contents, size_t *out_start,
size_t *out_end, bool accept_incomplete,
wchar_t open_type, wchar_t close_type) {
wchar_t open_type, wchar_t close_type,
bool *out_is_quoted) {
// Clear the return values.
if (out_contents != nullptr) out_contents->clear();
*out_start = 0;
@ -201,7 +228,7 @@ static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_c
wchar_t *bracket_range_begin = nullptr, *bracket_range_end = nullptr;
int ret = parse_util_locate_brackets_of_type(valid_range_start, &bracket_range_begin,
&bracket_range_end, accept_incomplete, open_type,
close_type);
close_type, out_is_quoted);
if (ret <= 0) {
return ret;
}
@ -231,9 +258,9 @@ static int parse_util_locate_brackets_range(const wcstring &str, size_t *inout_c
int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset,
wcstring *out_contents, size_t *out_start, size_t *out_end,
bool accept_incomplete) {
bool accept_incomplete, bool *out_is_quoted) {
return parse_util_locate_brackets_range(str, inout_cursor_offset, out_contents, out_start,
out_end, accept_incomplete, L'(', L')');
out_end, accept_incomplete, L'(', L')', out_is_quoted);
}
void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **a,
@ -465,7 +492,7 @@ static wchar_t get_quote(const wcstring &cmd_str, size_t len) {
i++;
} else {
if (cmd[i] == L'\'' || cmd[i] == L'\"') {
const wchar_t *end = quote_end(&cmd[i]);
const wchar_t *end = quote_end(&cmd[i], cmd[i]);
// std::fwprintf( stderr, L"Jump %d\n", end-cmd );
if ((end == nullptr) || (!*end) || (end > cmd + len)) {
res = cmd[i];
@ -1046,7 +1073,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
wchar_t next_char = idx + 1 < unesc_size ? unesc.at(idx + 1) : L'\0';
if (next_char != VARIABLE_EXPAND && next_char != VARIABLE_EXPAND_SINGLE &&
!valid_var_name_char(next_char)) {
next_char != '(' && !valid_var_name_char(next_char)) {
err = 1;
if (out_errors) {
// We have something like $$$^.... Back up until we reach the first $.

View File

@ -31,7 +31,7 @@ int parse_util_locate_slice(const wchar_t *in, wchar_t **begin, wchar_t **end,
/// \return -1 on syntax error, 0 if no subshells exist and 1 on success
int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset,
wcstring *out_contents, size_t *out_start, size_t *out_end,
bool accept_incomplete);
bool accept_incomplete, bool *out_is_quoted = nullptr);
/// Find the beginning and end of the command substitution under the cursor. If no subshell is
/// found, the entire string is returned. If the current command substitution is not ended, i.e. the

View File

@ -145,10 +145,24 @@ tok_t tokenizer_t::read_string() {
std::vector<int> paran_offsets;
std::vector<int> brace_offsets;
std::vector<char> expecting;
std::vector<size_t> quoted_cmdsubs;
int slice_offset = 0;
const wchar_t *const buff_start = this->token_cursor;
bool is_first = true;
auto process_opening_quote = [&](wchar_t quote) -> const wchar_t * {
const wchar_t *end = quote_end(this->token_cursor, quote);
if (end) {
if (*end == L'$') quoted_cmdsubs.push_back(paran_offsets.size());
this->token_cursor = end;
return nullptr;
} else {
const wchar_t *error_loc = this->token_cursor;
this->token_cursor += std::wcslen(this->token_cursor);
return error_loc;
}
};
while (true) {
wchar_t c = *this->token_cursor;
#if false
@ -195,6 +209,19 @@ tok_t tokenizer_t::read_string() {
mode &= ~(tok_modes::subshell);
}
expecting.pop_back();
// Check if the ) did complete a quoted command substituion.
if (!quoted_cmdsubs.empty() && quoted_cmdsubs.back() == paran_offsets.size()) {
quoted_cmdsubs.pop_back();
// Quoted command substitutions temporarily close double quotes, after ),
// we need to act as if there was an invisible double quote.
if (const wchar_t *error_loc = process_opening_quote(L'"')) {
if (!this->accept_unfinished) {
return this->call_error(tokenizer_error_t::unterminated_quote, buff_start,
error_loc);
}
break;
}
}
} else if (c == L'}') {
if (!expecting.empty() && expecting.back() == L')') {
return this->call_error(tokenizer_error_t::expected_pclose_found_bclose,
@ -225,13 +252,8 @@ tok_t tokenizer_t::read_string() {
else if (c == L']' && ((mode & tok_modes::array_brackets) == tok_modes::array_brackets)) {
mode &= ~(tok_modes::array_brackets);
} else if (c == L'\'' || c == L'"') {
const wchar_t *end = quote_end(this->token_cursor);
if (end) {
this->token_cursor = end;
} else {
const wchar_t *error_loc = this->token_cursor;
this->token_cursor += std::wcslen(this->token_cursor);
if ((!this->accept_unfinished)) {
if (const wchar_t *error_loc = process_opening_quote(c)) {
if (!this->accept_unfinished) {
return this->call_error(tokenizer_error_t::unterminated_quote, buff_start,
error_loc);
}

44
tests/checks/cmdsub.fish Normal file
View File

@ -0,0 +1,44 @@
#RUN: %fish %s
# Command substitution inside double quotes strips trailing newline.
echo "a$(echo b)c"
# CHECK: abc
# Nesting
echo "$(echo "$(echo a)")"
# CHECK: a
echo "$(echo $(echo b))"
# CHECK: b
echo "$(echo multiple).$(echo command).$(echo substitutions)"
# CHECK: multiple.command.substitutions
test -n "$()" || echo "empty list is interpolated to empty string"
# CHECK: empty list is interpolated to empty string
# Variables in command substitution output are not expanded.
echo "$(echo \~ \$HOME)"
# CHECK: ~ $HOME
echo "$(printf %s 'quoted command substitution multiline output
line 2
line 3
')"
# CHECK: quoted command substitution multiline output
# CHECK: line 2
# CHECK: line 3
echo trim any newlines "$(echo \n\n\n)" after cmdsub
#CHECK: trim any newlines after cmdsub
echo i{1, (echo 2), "$(echo 3)"}
# CHECK: i1 i2 i3
echo "$(echo index\nrange\nexpansion)[2]"
#CHECK: range
echo "$(echo '"')"
#CHECK: "
echo "$(echo $(echo 1) ())"
#CHECK: 1