From 0f62161b2bb7c68b61a7711cd8ff41fcd1039b48 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Mon, 19 Feb 2018 16:31:39 -0800
Subject: [PATCH] Clean up tokenizer error handling.

---
 src/parse_tree.cpp |  3 ++-
 src/tokenizer.cpp  | 58 ++++++++++++++++++++++++++++------------------
 src/tokenizer.h    |  6 ++---
 3 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp
index 88dcd54b4..bb36a927e 100644
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
@@ -730,7 +730,8 @@ void parse_ll_t::report_tokenizer_error(const tok_t &tok) {
             parse_error_code = parse_error_tokenizer_unterminated_escape;
             break;
         }
-        case TOK_OTHER:
+        case TOK_INVALID_REDIRECT:
+        case TOK_INVALID_PIPE:
         default: {
             parse_error_code = parse_error_tokenizer_other;
             break;
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 2de682a16..ffce52d9c 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -16,12 +16,6 @@
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep
 
-// Wow what a hack.
-#define TOK_CALL_ERROR(t, e, x, where)                               \
-    do {                                                             \
-        (t)->call_error((e), where, (t)->squash_errors ? L"" : (x)); \
-    } while (0)
-
 /// Error string for unexpected end of string.
 #define QUOTE_ERROR _(L"Unexpected end of string, quotes are not balanced")
 
@@ -41,13 +35,38 @@
 #define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
 
 /// Set the latest tokens string to be the specified error message.
-void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *where,
-                             const wchar_t *error_message) {
+void tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *where) {
+    assert(error_type != TOK_ERROR_NONE && "TOK_ERROR_NONE passed to call_error");
     this->last_type = TOK_ERROR;
     this->error = error_type;
-    this->global_error_offset = where ? where - this->start : 0;
-    this->last_token = error_message;
     this->has_next = false;
+    this->global_error_offset = where ? where - this->start : 0;
+    if (this->squash_errors) {
+        this->last_token.clear();
+    } else {
+        switch (error_type) {
+            case TOK_UNTERMINATED_QUOTE:
+                this->last_token = QUOTE_ERROR;
+                break;
+            case TOK_UNTERMINATED_SUBSHELL:
+                this->last_token = PARAN_ERROR;
+                break;
+            case TOK_UNTERMINATED_SLICE:
+                this->last_token = SQUARE_BRACKET_ERROR;
+                break;
+            case TOK_UNTERMINATED_ESCAPE:
+                this->last_token = UNTERMINATED_ESCAPE_ERROR;
+                break;
+            case TOK_INVALID_REDIRECT:
+                this->last_token = REDIRECT_ERROR;
+                break;
+            case TOK_INVALID_PIPE:
+                this->last_token = PIPE_ERROR;
+                break;
+            default:
+                assert(0 && "Unknown error type");
+        }
+    }
 }
 
 tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start), start(start) {
@@ -151,8 +170,7 @@ void tokenizer_t::read_string() {
                 this->buff++;
                 if (*this->buff == L'\0') {
                     if ((!this->accept_unfinished)) {
-                        TOK_CALL_ERROR(this, TOK_UNTERMINATED_ESCAPE, UNTERMINATED_ESCAPE_ERROR,
-                                       error_location);
+                        this->call_error(TOK_UNTERMINATED_ESCAPE, error_location);
                         return;
                     }
                     // Since we are about to increment tok->buff, decrement it first so the
@@ -191,8 +209,7 @@ void tokenizer_t::read_string() {
                                 this->buff += wcslen(this->buff);
 
                                 if (!this->accept_unfinished) {
-                                    TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR,
-                                                   error_loc);
+                                    this->call_error(TOK_UNTERMINATED_QUOTE, error_loc);
                                     return;
                                 }
                                 do_loop = 0;
@@ -221,8 +238,7 @@ void tokenizer_t::read_string() {
                                 const wchar_t *error_loc = this->buff;
                                 this->buff += wcslen(this->buff);
                                 if ((!this->accept_unfinished)) {
-                                    TOK_CALL_ERROR(this, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR,
-                                                   error_loc);
+                                    this->call_error(TOK_UNTERMINATED_QUOTE, error_loc);
                                     return;
                                 }
                                 do_loop = 0;
@@ -298,14 +314,12 @@ void tokenizer_t::read_string() {
                     offset_of_open_paran = paran_offsets[paran_count - 1];
                 }
 
-                TOK_CALL_ERROR(this, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR,
-                               this->start + offset_of_open_paran);
+                this->call_error(TOK_UNTERMINATED_SUBSHELL, this->start + offset_of_open_paran);
                 break;
             }
             case mode_array_brackets:
             case mode_array_brackets_and_subshell: {
-                TOK_CALL_ERROR(this, TOK_UNTERMINATED_SLICE, SQUARE_BRACKET_ERROR,
-                               this->start + offset_of_bracket);
+                this->call_error(TOK_UNTERMINATED_SLICE, this->start + offset_of_bracket);
                 break;
             }
             default: {
@@ -551,7 +565,7 @@ bool tokenizer_t::tok_next() {
             int fd = -1;
             size_t consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd);
             if (consumed == 0 || fd < 0) {
-                TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff);
+                this->call_error(TOK_INVALID_REDIRECT, this->buff);
             } else {
                 this->buff += consumed;
                 this->last_type = mode;
@@ -574,7 +588,7 @@ bool tokenizer_t::tok_next() {
                 // that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer
                 // error.
                 if (mode == TOK_PIPE && fd == 0) {
-                    TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location);
+                    this->call_error(TOK_INVALID_PIPE, error_location);
                 } else {
                     this->buff += consumed;
                     this->last_type = mode;
diff --git a/src/tokenizer.h b/src/tokenizer.h
index 9d755f7e5..c90d4c9fc 100644
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@@ -30,7 +30,8 @@ enum tokenizer_error {
     TOK_UNTERMINATED_SUBSHELL,
     TOK_UNTERMINATED_SLICE,
     TOK_UNTERMINATED_ESCAPE,
-    TOK_OTHER
+    TOK_INVALID_REDIRECT,
+    TOK_INVALID_PIPE
 };
 
 /// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching
@@ -101,8 +102,7 @@ class tokenizer_t {
     /// Whether to continue the previous line after the comment.
     bool continue_line_after_comment{false};
 
-    void call_error(enum tokenizer_error error_type, const wchar_t *where,
-                    const wchar_t *error_message);
+    void call_error(enum tokenizer_error error_type, const wchar_t *where);
     void read_string();
     bool tok_next();