Correct escaping behavior in new tokenizer code

2024-11-23 10:43:32 +08:00 · 2018-03-11 17:10:16 -05:00 · 2018-03-11 17:10:16 -05:00 · df89d71237
commit df89d71237
parent f508a1f274
1 changed files with 54 additions and 54 deletions
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -154,6 +154,10 @@ tok_t tokenizer_t::read_string() {
        tok_mode mode_begin = mode;
 #endif
        if (c == L'\0') {
            break;
        }
        // Make sure this character isn't being escaped before anything else
        if ((mode & tok_mode::char_escape) == tok_mode::char_escape) {
            mode &= ~(tok_mode::char_escape);
@ -163,69 +167,65 @@ tok_t tokenizer_t::read_string() {
            // Early exit optimization in case the character is just a letter,
            // which has no special meaning to the tokenizer, i.e. the same mode continues.
        }
        // This check has to be after the char_escape check above
        else if (c == L'\0') {
            break;
        }
        // Now proceed with the evaluation of the token, first checking to see if the token
        // has been explicitly ignored (escaped).
        else if (c == L'\\') {
-                mode |= tok_mode::char_escape;
+            mode |= tok_mode::char_escape;
        }
        else if (c == L'(') {
            paran_offsets.push_back(this->buff - this->start);
            mode |= tok_mode::subshell;
        }
        else if (c == L')') {
            switch (paran_offsets.size()) {
                case 0:
                    return this->call_error(TOK_CLOSING_UNOPENED_SUBSHELL, buff_start, this->buff);
                case 1:
                    mode &= ~(tok_mode::subshell);
                default:
                    paran_offsets.pop_back();
            }
-            else if (c == L'(') {
+        }
-                paran_offsets.push_back(this->buff - this->start);
+        else if (c == L'[') {
-                mode |= tok_mode::subshell;
+            if (this->buff != buff_start) {
-            }
+                if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
-            else if (c == L')') {
+                    // Nested brackets should not overwrite the existing slice_offset
-                switch (paran_offsets.size()) {
+                    //mqudsi: TOK_ILLEGAL_SLICE is the right error here, but the shell
-                    case 0:
+                    //prints an error message with the caret pointing at token_start,
-                        return this->call_error(TOK_CLOSING_UNOPENED_SUBSHELL, buff_start, this->buff);
+                    //not err_loc, making the TOK_ILLEGAL_SLICE message misleading.
-                    case 1:
+                    // return call_error(TOK_ILLEGAL_SLICE, buff_start, this->buff);
-                        mode &= ~(tok_mode::subshell);
+                    return call_error(TOK_UNTERMINATED_SLICE, buff_start, this->buff);
                    default:
                        paran_offsets.pop_back();
                }
                slice_offset = this->buff - this->start;
                mode |= tok_mode::array_brackets;
            }
-            else if (c == L'[') {
+            else {
-                if (this->buff != buff_start) {
+                // This is actually allowed so the test operator `[` can be used as the head of a command
-                    if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
+            }
-                        // Nested brackets should not overwrite the existing slice_offset
+        }
-                        //mqudsi: TOK_ILLEGAL_SLICE is the right error here, but the shell
+        // Only exit bracket mode if we are in bracket mode.
-                        //prints an error message with the caret pointing at token_start,
+        // Reason: `]` can be a parameter, e.g. last parameter to `[` test alias.
-                        //not err_loc, making the TOK_ILLEGAL_SLICE message misleading.
+        // e.g. echo $argv[([ $x -eq $y ])] # must not end bracket mode on first bracket
-                        // return call_error(TOK_ILLEGAL_SLICE, buff_start, this->buff);
+        else if (c == L']' && ((mode & tok_mode::array_brackets) == tok_mode::array_brackets)) {
-                        return call_error(TOK_UNTERMINATED_SLICE, buff_start, this->buff);
+            mode &= ~(tok_mode::array_brackets);
-                    }
+        }
-                    slice_offset = this->buff - this->start;
+        else if (c == L'\'' || c == L'"') {
-                    mode |= tok_mode::array_brackets;
+            const wchar_t *end = quote_end(this->buff);
            if (end) {
                this->buff = end;
            } else {
                const wchar_t *error_loc = this->buff;
                this->buff += wcslen(this->buff);
                if ((!this->accept_unfinished)) {
                    return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start, error_loc);
                }
                else {
                    // This is actually allowed so the test operator `[` can be used as the head of a command
                }
            }
            // Only exit bracket mode if we are in bracket mode.
            // Reason: `]` can be a parameter, e.g. last parameter to `[` test alias.
            // e.g. echo $argv[([ $x -eq $y ])] # must not end bracket mode on first bracket
            else if (c == L']' && ((mode & tok_mode::array_brackets) == tok_mode::array_brackets)) {
                mode &= ~(tok_mode::array_brackets);
            }
            else if (c == L'\'' || c == L'"') {
                const wchar_t *end = quote_end(this->buff);
                if (end) {
                    this->buff = end;
                } else {
                    const wchar_t *error_loc = this->buff;
                    this->buff += wcslen(this->buff);
                    if ((!this->accept_unfinished)) {
                        return this->call_error(TOK_UNTERMINATED_QUOTE, buff_start, error_loc);
                    }
                    break;
                }
            }
            else if (mode == tok_mode::regular_text && !tok_is_string_character(c, is_first)) {
                break;
            }
        }
        else if (mode == tok_mode::regular_text && !tok_is_string_character(c, is_first)) {
            break;
        }
 #if false
        if (mode != mode_begin) {
@ -244,7 +244,7 @@ tok_t tokenizer_t::read_string() {
        tok_t error;
        if ((mode & tok_mode::char_escape) == tok_mode::char_escape) {
            error = this->call_error(TOK_UNTERMINATED_ESCAPE, buff_start,
-                    this->buff);
+                    this->buff - 1);
        }
        else if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
            error = this->call_error(TOK_UNTERMINATED_SLICE, buff_start,