Correctly report the range of tokenizer errors

This enables proper syntax highlighting of tokenizer errors.
2025-03-15 23:22:53 +08:00 · 2019-10-27 15:44:08 -07:00 · 2019-10-27 15:44:08 -07:00 · afd20b8e1a
commit afd20b8e1a
parent 0bfd897ee0
4 changed files with 19 additions and 13 deletions
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -4623,6 +4623,11 @@ static void test_highlighting() {
        {L"true", highlight_role_t::command},
    });

+    highlight_tests.push_back({
+        {L"false", highlight_role_t::command},
+        {L"|&", highlight_role_t::error},
+    });
+
    auto &vars = parser_t::principal_parser().vars();
    // Verify variables and wildcards in commands using /bin/cat.
    vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"});
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@ -83,7 +83,7 @@ wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring
    assert(line_end >= line_start);
    assert(source_start >= line_start);

-    // Don't include the caret and line if we're interactive this is the first line, because
+    // Don't include the caret and line if we're interactive and this is the first line, because
    // then it's obvious.
    bool interactive_skip_caret = is_interactive && source_start == 0;
    if (interactive_skip_caret) {
@ -637,7 +637,7 @@ void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_leng

        err.source_start = source_start;
        err.source_length = source_length;
-        this->errors.push_back(err);
+        this->errors.push_back(std::move(err));
    }
 }

--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -60,7 +60,7 @@ static bool caret_redirs() { return !feature_test(features_t::stderr_nocaret); }

 /// Return an error token and mark that we no longer have a next token.
 tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token_start,
-                              const wchar_t *error_loc) {
+                              const wchar_t *error_loc, maybe_t<size_t> token_length) {
    assert(error_type != tokenizer_error_t::none && "tokenizer_error_t::none passed to call_error");
    assert(error_loc >= token_start && "Invalid error location");
    assert(this->buff >= token_start && "Invalid buff location");
@ -70,7 +70,8 @@ tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token
    tok_t result{token_type_t::error};
    result.error = error_type;
    result.offset = token_start - this->start;
-    result.length = this->buff - token_start;
+    // If we are passed a token_length, then use it; otherwise infer it from the buffer.
+    result.length = token_length ? *token_length : this->buff - token_start;
    result.error_offset = error_loc - token_start;
    return result;
 }
@ -174,12 +175,12 @@ tok_t tokenizer_t::read_string() {
        } else if (c == L')') {
            if (expecting.size() > 0 && expecting.back() == L'}') {
                return this->call_error(tokenizer_error_t::expected_bclose_found_pclose,
-                                        this->start, this->buff);
+                                        this->start, this->buff, 1);
            }
            switch (paran_offsets.size()) {
                case 0:
                    return this->call_error(tokenizer_error_t::closing_unopened_subshell,
-                                            this->start, this->buff);
+                                            this->start, this->buff, 1);
                case 1:
                    mode &= ~(tok_modes::subshell);
                default:
@ -189,7 +190,7 @@ tok_t tokenizer_t::read_string() {
        } else if (c == L'}') {
            if (expecting.size() > 0 && expecting.back() == L')') {
                return this->call_error(tokenizer_error_t::expected_pclose_found_bclose,
-                                        this->start, this->buff);
+                                        this->start, this->buff, 1);
            }
            switch (brace_offsets.size()) {
                case 0:
@ -248,7 +249,7 @@ tok_t tokenizer_t::read_string() {
    if ((!this->accept_unfinished) && (mode != tok_modes::regular_text)) {
        if (mode & tok_modes::char_escape) {
            return this->call_error(tokenizer_error_t::unterminated_escape, buff_start,
-                                    this->buff - 1);
+                                    this->buff - 1, 1);
        } else if (mode & tok_modes::array_brackets) {
            return this->call_error(tokenizer_error_t::unterminated_slice, buff_start,
                                    this->start + slice_offset);
@ -575,7 +576,7 @@ maybe_t<tok_t> tokenizer_t::next() {
            } else if (this->buff[1] == L'&') {
                // |& is a bashism; in fish it's &|.
                return this->call_error(tokenizer_error_t::invalid_pipe_ampersand, this->buff,
-                                        this->buff);
+                                        this->buff, 2);
            } else {
                auto pipe = pipe_or_redir_t::from_string(buff);
                assert(pipe.has_value() && pipe->is_pipe &&
@ -594,8 +595,8 @@ maybe_t<tok_t> tokenizer_t::next() {
            // redirection is an error!
            auto redir_or_pipe = pipe_or_redir_t::from_string(this->buff);
            if (!redir_or_pipe || redir_or_pipe->fd < 0) {
-                return this->call_error(tokenizer_error_t::invalid_redirect, this->buff,
-                                        this->buff);
+                return this->call_error(tokenizer_error_t::invalid_redirect, this->buff, this->buff,
+                                        redir_or_pipe ? redir_or_pipe->consumed : 0);
            }
            result.emplace(redir_or_pipe->token_type());
            result->offset = start_pos;
@ -617,7 +618,7 @@ maybe_t<tok_t> tokenizer_t::next() {
                // tokenizer error.
                if (redir_or_pipe->is_pipe && redir_or_pipe->fd == 0) {
                    return this->call_error(tokenizer_error_t::invalid_pipe, error_location,
-                                            error_location);
+                                            error_location, redir_or_pipe->consumed);
                }
                result.emplace(redir_or_pipe->token_type());
                result->offset = start_pos;
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -108,7 +108,7 @@ class tokenizer_t {
    bool continue_line_after_comment{false};

    tok_t call_error(tokenizer_error_t error_type, const wchar_t *token_start,
-                     const wchar_t *error_loc);
+                     const wchar_t *error_loc, maybe_t<size_t> token_length = {});
    tok_t read_string();

   public: