From afd20b8e1abf330c7d4ffbd7db2d7b23864e46b8 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 27 Oct 2019 15:44:08 -0700
Subject: [PATCH] Correctly report the range of tokenizer errors

This enables proper syntax highlighting of tokenizer errors.
---
 src/fish_tests.cpp |  5 +++++
 src/parse_tree.cpp |  4 ++--
 src/tokenizer.cpp  | 21 +++++++++++----------
 src/tokenizer.h    |  2 +-
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 4476c6019..3b0526a53 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4623,6 +4623,11 @@ static void test_highlighting() {
         {L"true", highlight_role_t::command},
     });
 
+    highlight_tests.push_back({
+        {L"false", highlight_role_t::command},
+        {L"|&", highlight_role_t::error},
+    });
+
     auto &vars = parser_t::principal_parser().vars();
     // Verify variables and wildcards in commands using /bin/cat.
     vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"});
diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index e23b6a177..269b36d60 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -83,7 +83,7 @@ wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring
     assert(line_end >= line_start);
     assert(source_start >= line_start);
 
-    // Don't include the caret and line if we're interactive this is the first line, because
+    // Don't include the caret and line if we're interactive and this is the first line, because
     // then it's obvious.
     bool interactive_skip_caret = is_interactive && source_start == 0;
     if (interactive_skip_caret) {
@@ -637,7 +637,7 @@ void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_leng
 
         err.source_start = source_start;
         err.source_length = source_length;
-        this->errors.push_back(err);
+        this->errors.push_back(std::move(err));
     }
 }
 
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 1320c9698..39d54fc81 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -60,7 +60,7 @@ static bool caret_redirs() { return !feature_test(features_t::stderr_nocaret); }
 
 /// Return an error token and mark that we no longer have a next token.
 tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token_start,
-                              const wchar_t *error_loc) {
+                              const wchar_t *error_loc, maybe_t<size_t> token_length) {
     assert(error_type != tokenizer_error_t::none && "tokenizer_error_t::none passed to call_error");
     assert(error_loc >= token_start && "Invalid error location");
     assert(this->buff >= token_start && "Invalid buff location");
@@ -70,7 +70,8 @@ tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token
     tok_t result{token_type_t::error};
     result.error = error_type;
     result.offset = token_start - this->start;
-    result.length = this->buff - token_start;
+    // If we are passed a token_length, then use it; otherwise infer it from the buffer.
+    result.length = token_length ? *token_length : this->buff - token_start;
     result.error_offset = error_loc - token_start;
     return result;
 }
@@ -174,12 +175,12 @@ tok_t tokenizer_t::read_string() {
         } else if (c == L')') {
             if (expecting.size() > 0 && expecting.back() == L'}') {
                 return this->call_error(tokenizer_error_t::expected_bclose_found_pclose,
-                                        this->start, this->buff);
+                                        this->start, this->buff, 1);
             }
             switch (paran_offsets.size()) {
                 case 0:
                     return this->call_error(tokenizer_error_t::closing_unopened_subshell,
-                                            this->start, this->buff);
+                                            this->start, this->buff, 1);
                 case 1:
                     mode &= ~(tok_modes::subshell);
                 default:
@@ -189,7 +190,7 @@ tok_t tokenizer_t::read_string() {
         } else if (c == L'}') {
             if (expecting.size() > 0 && expecting.back() == L')') {
                 return this->call_error(tokenizer_error_t::expected_pclose_found_bclose,
-                                        this->start, this->buff);
+                                        this->start, this->buff, 1);
             }
             switch (brace_offsets.size()) {
                 case 0:
@@ -248,7 +249,7 @@ tok_t tokenizer_t::read_string() {
     if ((!this->accept_unfinished) && (mode != tok_modes::regular_text)) {
         if (mode & tok_modes::char_escape) {
             return this->call_error(tokenizer_error_t::unterminated_escape, buff_start,
-                                    this->buff - 1);
+                                    this->buff - 1, 1);
         } else if (mode & tok_modes::array_brackets) {
             return this->call_error(tokenizer_error_t::unterminated_slice, buff_start,
                                     this->start + slice_offset);
@@ -575,7 +576,7 @@ maybe_t<tok_t> tokenizer_t::next() {
             } else if (this->buff[1] == L'&') {
                 // |& is a bashism; in fish it's &|.
                 return this->call_error(tokenizer_error_t::invalid_pipe_ampersand, this->buff,
-                                        this->buff);
+                                        this->buff, 2);
             } else {
                 auto pipe = pipe_or_redir_t::from_string(buff);
                 assert(pipe.has_value() && pipe->is_pipe &&
@@ -594,8 +595,8 @@ maybe_t<tok_t> tokenizer_t::next() {
             // redirection is an error!
             auto redir_or_pipe = pipe_or_redir_t::from_string(this->buff);
             if (!redir_or_pipe || redir_or_pipe->fd < 0) {
-                return this->call_error(tokenizer_error_t::invalid_redirect, this->buff,
-                                        this->buff);
+                return this->call_error(tokenizer_error_t::invalid_redirect, this->buff, this->buff,
+                                        redir_or_pipe ? redir_or_pipe->consumed : 0);
             }
             result.emplace(redir_or_pipe->token_type());
             result->offset = start_pos;
@@ -617,7 +618,7 @@ maybe_t<tok_t> tokenizer_t::next() {
                 // tokenizer error.
                 if (redir_or_pipe->is_pipe && redir_or_pipe->fd == 0) {
                     return this->call_error(tokenizer_error_t::invalid_pipe, error_location,
-                                            error_location);
+                                            error_location, redir_or_pipe->consumed);
                 }
                 result.emplace(redir_or_pipe->token_type());
                 result->offset = start_pos;
diff --git a/src/tokenizer.h b/src/tokenizer.h
index 571d20c01..1e97b893c 100644
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@@ -108,7 +108,7 @@ class tokenizer_t {
     bool continue_line_after_comment{false};
 
     tok_t call_error(tokenizer_error_t error_type, const wchar_t *token_start,
-                     const wchar_t *error_loc);
+                     const wchar_t *error_loc, maybe_t<size_t> token_length = {});
     tok_t read_string();
 
    public: