From afd20b8e1abf330c7d4ffbd7db2d7b23864e46b8 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 27 Oct 2019 15:44:08 -0700 Subject: [PATCH] Correctly report the range of tokenizer errors This enables proper syntax highlighting of tokenizer errors. --- src/fish_tests.cpp | 5 +++++ src/parse_tree.cpp | 4 ++-- src/tokenizer.cpp | 21 +++++++++++---------- src/tokenizer.h | 2 +- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 4476c6019..3b0526a53 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4623,6 +4623,11 @@ static void test_highlighting() { {L"true", highlight_role_t::command}, }); + highlight_tests.push_back({ + {L"false", highlight_role_t::command}, + {L"|&", highlight_role_t::error}, + }); + auto &vars = parser_t::principal_parser().vars(); // Verify variables and wildcards in commands using /bin/cat. vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"}); diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index e23b6a177..269b36d60 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -83,7 +83,7 @@ wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring assert(line_end >= line_start); assert(source_start >= line_start); - // Don't include the caret and line if we're interactive this is the first line, because + // Don't include the caret and line if we're interactive and this is the first line, because // then it's obvious. bool interactive_skip_caret = is_interactive && source_start == 0; if (interactive_skip_caret) { @@ -637,7 +637,7 @@ void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_leng err.source_start = source_start; err.source_length = source_length; - this->errors.push_back(err); + this->errors.push_back(std::move(err)); } } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 1320c9698..39d54fc81 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -60,7 +60,7 @@ static bool caret_redirs() { return !feature_test(features_t::stderr_nocaret); } /// Return an error token and mark that we no longer have a next token. tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token_start, - const wchar_t *error_loc) { + const wchar_t *error_loc, maybe_t token_length) { assert(error_type != tokenizer_error_t::none && "tokenizer_error_t::none passed to call_error"); assert(error_loc >= token_start && "Invalid error location"); assert(this->buff >= token_start && "Invalid buff location"); @@ -70,7 +70,8 @@ tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token tok_t result{token_type_t::error}; result.error = error_type; result.offset = token_start - this->start; - result.length = this->buff - token_start; + // If we are passed a token_length, then use it; otherwise infer it from the buffer. + result.length = token_length ? *token_length : this->buff - token_start; result.error_offset = error_loc - token_start; return result; } @@ -174,12 +175,12 @@ tok_t tokenizer_t::read_string() { } else if (c == L')') { if (expecting.size() > 0 && expecting.back() == L'}') { return this->call_error(tokenizer_error_t::expected_bclose_found_pclose, - this->start, this->buff); + this->start, this->buff, 1); } switch (paran_offsets.size()) { case 0: return this->call_error(tokenizer_error_t::closing_unopened_subshell, - this->start, this->buff); + this->start, this->buff, 1); case 1: mode &= ~(tok_modes::subshell); default: @@ -189,7 +190,7 @@ tok_t tokenizer_t::read_string() { } else if (c == L'}') { if (expecting.size() > 0 && expecting.back() == L')') { return this->call_error(tokenizer_error_t::expected_pclose_found_bclose, - this->start, this->buff); + this->start, this->buff, 1); } switch (brace_offsets.size()) { case 0: @@ -248,7 +249,7 @@ tok_t tokenizer_t::read_string() { if ((!this->accept_unfinished) && (mode != tok_modes::regular_text)) { if (mode & tok_modes::char_escape) { return this->call_error(tokenizer_error_t::unterminated_escape, buff_start, - this->buff - 1); + this->buff - 1, 1); } else if (mode & tok_modes::array_brackets) { return this->call_error(tokenizer_error_t::unterminated_slice, buff_start, this->start + slice_offset); @@ -575,7 +576,7 @@ maybe_t tokenizer_t::next() { } else if (this->buff[1] == L'&') { // |& is a bashism; in fish it's &|. return this->call_error(tokenizer_error_t::invalid_pipe_ampersand, this->buff, - this->buff); + this->buff, 2); } else { auto pipe = pipe_or_redir_t::from_string(buff); assert(pipe.has_value() && pipe->is_pipe && @@ -594,8 +595,8 @@ maybe_t tokenizer_t::next() { // redirection is an error! auto redir_or_pipe = pipe_or_redir_t::from_string(this->buff); if (!redir_or_pipe || redir_or_pipe->fd < 0) { - return this->call_error(tokenizer_error_t::invalid_redirect, this->buff, - this->buff); + return this->call_error(tokenizer_error_t::invalid_redirect, this->buff, this->buff, + redir_or_pipe ? redir_or_pipe->consumed : 0); } result.emplace(redir_or_pipe->token_type()); result->offset = start_pos; @@ -617,7 +618,7 @@ maybe_t tokenizer_t::next() { // tokenizer error. if (redir_or_pipe->is_pipe && redir_or_pipe->fd == 0) { return this->call_error(tokenizer_error_t::invalid_pipe, error_location, - error_location); + error_location, redir_or_pipe->consumed); } result.emplace(redir_or_pipe->token_type()); result->offset = start_pos; diff --git a/src/tokenizer.h b/src/tokenizer.h index 571d20c01..1e97b893c 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -108,7 +108,7 @@ class tokenizer_t { bool continue_line_after_comment{false}; tok_t call_error(tokenizer_error_t error_type, const wchar_t *token_start, - const wchar_t *error_loc); + const wchar_t *error_loc, maybe_t token_length = {}); tok_t read_string(); public: