diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index 0f877431e..a284b346d 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -374,8 +374,8 @@ class parse_ll_t { void parse_error_unexpected_token(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...); - void parse_error_at_location(size_t location, parse_error_code_t code, const wchar_t *format, - ...); + void parse_error_at_location(size_t source_start, size_t source_length, size_t error_location, + parse_error_code_t code, const wchar_t *format, ...); void parse_error_failed_production(struct parse_stack_element_t &elem, parse_token_t token); void parse_error_unbalancing_token(parse_token_t token); @@ -608,7 +608,8 @@ void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const } } -void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_code_t code, +void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_length, + size_t error_location, parse_error_code_t code, const wchar_t *fmt, ...) { this->fatal_errored = true; if (this->should_generate_error_messages) { @@ -621,8 +622,8 @@ void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_cod err.code = code; va_end(va); - err.source_start = source_location; - err.source_length = 0; + err.source_start = source_start; + err.source_length = source_length; this->errors.push_back(err); } } @@ -733,8 +734,10 @@ void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_ break; } } - this->parse_error_at_location(tok.offset + tok.error_offset, parse_error_code, L"%ls", - tokenizer.text_of(tok).c_str()); + + this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset, + parse_error_code, L"%ls", + error_message_for_code(tok.error).c_str()); } void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) { @@ -811,8 +814,9 @@ bool parse_ll_t::report_error_for_unclosed_block() { } if (cursor->source_start != NODE_OFFSET_INVALID) { const wcstring node_desc = block_type_user_presentable_description(block_node->type); - this->parse_error_at_location(cursor->source_start, parse_error_generic, - L"Missing end to balance this %ls", node_desc.c_str()); + this->parse_error_at_location(cursor->source_start, 0, cursor->source_start, + parse_error_generic, L"Missing end to balance this %ls", + node_desc.c_str()); reported_error = true; } return reported_error; @@ -1098,8 +1102,6 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, if (parse_flags & parse_flag_show_blank_lines) tok_options |= TOK_SHOW_BLANK_LINES; - if (errors == NULL) tok_options |= TOK_SQUASH_ERRORS; - tokenizer_t tok(str.c_str(), tok_options); // We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 9c8b681bc..24007a988 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -371,7 +371,7 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin); - tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); + tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED); tok_t token; while (tok.next(&token)) { size_t tok_begin = token.offset; @@ -474,7 +474,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_ size_t prev_pos = 0; wchar_t last_quote = L'\0'; - tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); + tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED); tok_t token; while (tok.next(&token)) { if (token.offset > pos) break; diff --git a/src/reader.cpp b/src/reader.cpp index 04b26704d..7f99a1148 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -2316,7 +2316,7 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos) { /// Returns true if the last token is a comment. static bool text_ends_in_comment(const wcstring &text) { - tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS); + tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS); tok_t token; while (tok.next(&token)) { ; // pass diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index fdcf052c7..a292de542 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -34,6 +34,26 @@ /// Error string for when trying to pipe from fd 0. #define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output") +wcstring error_message_for_code(tokenizer_error err) { + switch (err) { + case TOK_UNTERMINATED_QUOTE: + return QUOTE_ERROR; + case TOK_UNTERMINATED_SUBSHELL: + return PARAN_ERROR; + case TOK_UNTERMINATED_SLICE: + return SQUARE_BRACKET_ERROR; + case TOK_UNTERMINATED_ESCAPE: + return UNTERMINATED_ESCAPE_ERROR; + case TOK_INVALID_REDIRECT: + return REDIRECT_ERROR; + case TOK_INVALID_PIPE: + return PIPE_ERROR; + default: + assert(0 && "Unknown error type"); + return {}; + } +} + /// Return an error token and mark that we no longer have a next token. tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start, const wchar_t *error_loc) { @@ -49,30 +69,6 @@ tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *to result.offset = token_start - this->start; result.length = this->buff - token_start; result.error_offset = error_loc - token_start; - if (!this->squash_errors) { - switch (error_type) { - case TOK_UNTERMINATED_QUOTE: - result.error_text = QUOTE_ERROR; - break; - case TOK_UNTERMINATED_SUBSHELL: - result.error_text = PARAN_ERROR; - break; - case TOK_UNTERMINATED_SLICE: - result.error_text = SQUARE_BRACKET_ERROR; - break; - case TOK_UNTERMINATED_ESCAPE: - result.error_text = UNTERMINATED_ESCAPE_ERROR; - break; - case TOK_INVALID_REDIRECT: - result.error_text = REDIRECT_ERROR; - break; - case TOK_INVALID_PIPE: - result.error_text = PIPE_ERROR; - break; - default: - assert(0 && "Unknown error type"); - } - } return result; } @@ -81,7 +77,6 @@ tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start), this->accept_unfinished = static_cast(flags & TOK_ACCEPT_UNFINISHED); this->show_comments = static_cast(flags & TOK_SHOW_COMMENTS); - this->squash_errors = static_cast(flags & TOK_SQUASH_ERRORS); this->show_blank_lines = static_cast(flags & TOK_SHOW_BLANK_LINES); } @@ -590,7 +585,7 @@ maybe_t tokenizer_t::tok_next() { } wcstring tok_first(const wcstring &str) { - tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS); + tokenizer_t t(str.c_str(), 0); tok_t token; if (t.next(&token) && token.type == TOK_STRING) { return t.text_of(token); diff --git a/src/tokenizer.h b/src/tokenizer.h index 72e5ab5a6..0c5226ead 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -46,13 +46,9 @@ enum class redirection_type_t { /// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting. #define TOK_SHOW_COMMENTS 2 -/// Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing -/// off of the main thread (since wgettext is not thread safe). -#define TOK_SQUASH_ERRORS 4 - /// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells /// the tokenizer to return each of them as a separate END. -#define TOK_SHOW_BLANK_LINES 8 +#define TOK_SHOW_BLANK_LINES 4 typedef unsigned int tok_flags_t; @@ -70,11 +66,10 @@ struct tok_t { // If an error, this is the error code. enum tokenizer_error error { TOK_ERROR_NONE }; + // If an error, this is the offset of the error within the token. A value of 0 means it occurred // at 'offset'. size_t error_offset{size_t(-1)}; - // If there is an error, the text of the error; otherwise empty. - wcstring error_text{}; tok_t() = default; }; @@ -97,8 +92,6 @@ class tokenizer_t { bool show_comments{false}; /// Whether all blank lines are returned. bool show_blank_lines{false}; - /// Whether we are squashing errors. - bool squash_errors{false}; /// Whether to continue the previous line after the comment. bool continue_line_after_comment{false}; @@ -145,6 +138,9 @@ int fd_redirected_by_pipe(const wcstring &str); /// Helper function to return oflags (as in open(2)) for a redirection type. int oflags_for_redirection_type(redirection_type_t type); +/// Returns an error message for an error code. +wcstring error_message_for_code(tokenizer_error err); + enum move_word_style_t { move_word_style_punctuation, // stop at punctuation move_word_style_path_components, // stops at path components