mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-03-27 14:45:13 +08:00
Rationalize how the parser reports tokenizer errors
Remove the unnecessary SQUASH_ERROR flag and correctly report errors generated from the tokenizer.
This commit is contained in:
parent
0950c35eb2
commit
c4d903ff98
@ -374,8 +374,8 @@ class parse_ll_t {
|
|||||||
|
|
||||||
void parse_error_unexpected_token(const wchar_t *expected, parse_token_t token);
|
void parse_error_unexpected_token(const wchar_t *expected, parse_token_t token);
|
||||||
void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...);
|
void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...);
|
||||||
void parse_error_at_location(size_t location, parse_error_code_t code, const wchar_t *format,
|
void parse_error_at_location(size_t source_start, size_t source_length, size_t error_location,
|
||||||
...);
|
parse_error_code_t code, const wchar_t *format, ...);
|
||||||
void parse_error_failed_production(struct parse_stack_element_t &elem, parse_token_t token);
|
void parse_error_failed_production(struct parse_stack_element_t &elem, parse_token_t token);
|
||||||
void parse_error_unbalancing_token(parse_token_t token);
|
void parse_error_unbalancing_token(parse_token_t token);
|
||||||
|
|
||||||
@ -608,7 +608,8 @@ void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_code_t code,
|
void parse_ll_t::parse_error_at_location(size_t source_start, size_t source_length,
|
||||||
|
size_t error_location, parse_error_code_t code,
|
||||||
const wchar_t *fmt, ...) {
|
const wchar_t *fmt, ...) {
|
||||||
this->fatal_errored = true;
|
this->fatal_errored = true;
|
||||||
if (this->should_generate_error_messages) {
|
if (this->should_generate_error_messages) {
|
||||||
@ -621,8 +622,8 @@ void parse_ll_t::parse_error_at_location(size_t source_location, parse_error_cod
|
|||||||
err.code = code;
|
err.code = code;
|
||||||
va_end(va);
|
va_end(va);
|
||||||
|
|
||||||
err.source_start = source_location;
|
err.source_start = source_start;
|
||||||
err.source_length = 0;
|
err.source_length = source_length;
|
||||||
this->errors.push_back(err);
|
this->errors.push_back(err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -733,8 +734,10 @@ void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this->parse_error_at_location(tok.offset + tok.error_offset, parse_error_code, L"%ls",
|
|
||||||
tokenizer.text_of(tok).c_str());
|
this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset,
|
||||||
|
parse_error_code, L"%ls",
|
||||||
|
error_message_for_code(tok.error).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
|
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
|
||||||
@ -811,8 +814,9 @@ bool parse_ll_t::report_error_for_unclosed_block() {
|
|||||||
}
|
}
|
||||||
if (cursor->source_start != NODE_OFFSET_INVALID) {
|
if (cursor->source_start != NODE_OFFSET_INVALID) {
|
||||||
const wcstring node_desc = block_type_user_presentable_description(block_node->type);
|
const wcstring node_desc = block_type_user_presentable_description(block_node->type);
|
||||||
this->parse_error_at_location(cursor->source_start, parse_error_generic,
|
this->parse_error_at_location(cursor->source_start, 0, cursor->source_start,
|
||||||
L"Missing end to balance this %ls", node_desc.c_str());
|
parse_error_generic, L"Missing end to balance this %ls",
|
||||||
|
node_desc.c_str());
|
||||||
reported_error = true;
|
reported_error = true;
|
||||||
}
|
}
|
||||||
return reported_error;
|
return reported_error;
|
||||||
@ -1098,8 +1102,6 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
|
|||||||
|
|
||||||
if (parse_flags & parse_flag_show_blank_lines) tok_options |= TOK_SHOW_BLANK_LINES;
|
if (parse_flags & parse_flag_show_blank_lines) tok_options |= TOK_SHOW_BLANK_LINES;
|
||||||
|
|
||||||
if (errors == NULL) tok_options |= TOK_SQUASH_ERRORS;
|
|
||||||
|
|
||||||
tokenizer_t tok(str.c_str(), tok_options);
|
tokenizer_t tok(str.c_str(), tok_options);
|
||||||
|
|
||||||
// We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our
|
// We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our
|
||||||
|
@ -371,7 +371,7 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar
|
|||||||
|
|
||||||
const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin);
|
const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin);
|
||||||
|
|
||||||
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||||
tok_t token;
|
tok_t token;
|
||||||
while (tok.next(&token)) {
|
while (tok.next(&token)) {
|
||||||
size_t tok_begin = token.offset;
|
size_t tok_begin = token.offset;
|
||||||
@ -474,7 +474,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
|
|||||||
size_t prev_pos = 0;
|
size_t prev_pos = 0;
|
||||||
wchar_t last_quote = L'\0';
|
wchar_t last_quote = L'\0';
|
||||||
|
|
||||||
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||||
tok_t token;
|
tok_t token;
|
||||||
while (tok.next(&token)) {
|
while (tok.next(&token)) {
|
||||||
if (token.offset > pos) break;
|
if (token.offset > pos) break;
|
||||||
|
@ -2316,7 +2316,7 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos) {
|
|||||||
|
|
||||||
/// Returns true if the last token is a comment.
|
/// Returns true if the last token is a comment.
|
||||||
static bool text_ends_in_comment(const wcstring &text) {
|
static bool text_ends_in_comment(const wcstring &text) {
|
||||||
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
|
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS);
|
||||||
tok_t token;
|
tok_t token;
|
||||||
while (tok.next(&token)) {
|
while (tok.next(&token)) {
|
||||||
; // pass
|
; // pass
|
||||||
|
@ -34,6 +34,26 @@
|
|||||||
/// Error string for when trying to pipe from fd 0.
|
/// Error string for when trying to pipe from fd 0.
|
||||||
#define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
|
#define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
|
||||||
|
|
||||||
|
wcstring error_message_for_code(tokenizer_error err) {
|
||||||
|
switch (err) {
|
||||||
|
case TOK_UNTERMINATED_QUOTE:
|
||||||
|
return QUOTE_ERROR;
|
||||||
|
case TOK_UNTERMINATED_SUBSHELL:
|
||||||
|
return PARAN_ERROR;
|
||||||
|
case TOK_UNTERMINATED_SLICE:
|
||||||
|
return SQUARE_BRACKET_ERROR;
|
||||||
|
case TOK_UNTERMINATED_ESCAPE:
|
||||||
|
return UNTERMINATED_ESCAPE_ERROR;
|
||||||
|
case TOK_INVALID_REDIRECT:
|
||||||
|
return REDIRECT_ERROR;
|
||||||
|
case TOK_INVALID_PIPE:
|
||||||
|
return PIPE_ERROR;
|
||||||
|
default:
|
||||||
|
assert(0 && "Unknown error type");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Return an error token and mark that we no longer have a next token.
|
/// Return an error token and mark that we no longer have a next token.
|
||||||
tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
|
tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
|
||||||
const wchar_t *error_loc) {
|
const wchar_t *error_loc) {
|
||||||
@ -49,30 +69,6 @@ tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *to
|
|||||||
result.offset = token_start - this->start;
|
result.offset = token_start - this->start;
|
||||||
result.length = this->buff - token_start;
|
result.length = this->buff - token_start;
|
||||||
result.error_offset = error_loc - token_start;
|
result.error_offset = error_loc - token_start;
|
||||||
if (!this->squash_errors) {
|
|
||||||
switch (error_type) {
|
|
||||||
case TOK_UNTERMINATED_QUOTE:
|
|
||||||
result.error_text = QUOTE_ERROR;
|
|
||||||
break;
|
|
||||||
case TOK_UNTERMINATED_SUBSHELL:
|
|
||||||
result.error_text = PARAN_ERROR;
|
|
||||||
break;
|
|
||||||
case TOK_UNTERMINATED_SLICE:
|
|
||||||
result.error_text = SQUARE_BRACKET_ERROR;
|
|
||||||
break;
|
|
||||||
case TOK_UNTERMINATED_ESCAPE:
|
|
||||||
result.error_text = UNTERMINATED_ESCAPE_ERROR;
|
|
||||||
break;
|
|
||||||
case TOK_INVALID_REDIRECT:
|
|
||||||
result.error_text = REDIRECT_ERROR;
|
|
||||||
break;
|
|
||||||
case TOK_INVALID_PIPE:
|
|
||||||
result.error_text = PIPE_ERROR;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0 && "Unknown error type");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,7 +77,6 @@ tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start),
|
|||||||
|
|
||||||
this->accept_unfinished = static_cast<bool>(flags & TOK_ACCEPT_UNFINISHED);
|
this->accept_unfinished = static_cast<bool>(flags & TOK_ACCEPT_UNFINISHED);
|
||||||
this->show_comments = static_cast<bool>(flags & TOK_SHOW_COMMENTS);
|
this->show_comments = static_cast<bool>(flags & TOK_SHOW_COMMENTS);
|
||||||
this->squash_errors = static_cast<bool>(flags & TOK_SQUASH_ERRORS);
|
|
||||||
this->show_blank_lines = static_cast<bool>(flags & TOK_SHOW_BLANK_LINES);
|
this->show_blank_lines = static_cast<bool>(flags & TOK_SHOW_BLANK_LINES);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -590,7 +585,7 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
wcstring tok_first(const wcstring &str) {
|
wcstring tok_first(const wcstring &str) {
|
||||||
tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS);
|
tokenizer_t t(str.c_str(), 0);
|
||||||
tok_t token;
|
tok_t token;
|
||||||
if (t.next(&token) && token.type == TOK_STRING) {
|
if (t.next(&token) && token.type == TOK_STRING) {
|
||||||
return t.text_of(token);
|
return t.text_of(token);
|
||||||
|
@ -46,13 +46,9 @@ enum class redirection_type_t {
|
|||||||
/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting.
|
/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting.
|
||||||
#define TOK_SHOW_COMMENTS 2
|
#define TOK_SHOW_COMMENTS 2
|
||||||
|
|
||||||
/// Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing
|
|
||||||
/// off of the main thread (since wgettext is not thread safe).
|
|
||||||
#define TOK_SQUASH_ERRORS 4
|
|
||||||
|
|
||||||
/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells
|
/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells
|
||||||
/// the tokenizer to return each of them as a separate END.
|
/// the tokenizer to return each of them as a separate END.
|
||||||
#define TOK_SHOW_BLANK_LINES 8
|
#define TOK_SHOW_BLANK_LINES 4
|
||||||
|
|
||||||
typedef unsigned int tok_flags_t;
|
typedef unsigned int tok_flags_t;
|
||||||
|
|
||||||
@ -70,11 +66,10 @@ struct tok_t {
|
|||||||
|
|
||||||
// If an error, this is the error code.
|
// If an error, this is the error code.
|
||||||
enum tokenizer_error error { TOK_ERROR_NONE };
|
enum tokenizer_error error { TOK_ERROR_NONE };
|
||||||
|
|
||||||
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
|
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
|
||||||
// at 'offset'.
|
// at 'offset'.
|
||||||
size_t error_offset{size_t(-1)};
|
size_t error_offset{size_t(-1)};
|
||||||
// If there is an error, the text of the error; otherwise empty.
|
|
||||||
wcstring error_text{};
|
|
||||||
|
|
||||||
tok_t() = default;
|
tok_t() = default;
|
||||||
};
|
};
|
||||||
@ -97,8 +92,6 @@ class tokenizer_t {
|
|||||||
bool show_comments{false};
|
bool show_comments{false};
|
||||||
/// Whether all blank lines are returned.
|
/// Whether all blank lines are returned.
|
||||||
bool show_blank_lines{false};
|
bool show_blank_lines{false};
|
||||||
/// Whether we are squashing errors.
|
|
||||||
bool squash_errors{false};
|
|
||||||
/// Whether to continue the previous line after the comment.
|
/// Whether to continue the previous line after the comment.
|
||||||
bool continue_line_after_comment{false};
|
bool continue_line_after_comment{false};
|
||||||
|
|
||||||
@ -145,6 +138,9 @@ int fd_redirected_by_pipe(const wcstring &str);
|
|||||||
/// Helper function to return oflags (as in open(2)) for a redirection type.
|
/// Helper function to return oflags (as in open(2)) for a redirection type.
|
||||||
int oflags_for_redirection_type(redirection_type_t type);
|
int oflags_for_redirection_type(redirection_type_t type);
|
||||||
|
|
||||||
|
/// Returns an error message for an error code.
|
||||||
|
wcstring error_message_for_code(tokenizer_error err);
|
||||||
|
|
||||||
enum move_word_style_t {
|
enum move_word_style_t {
|
||||||
move_word_style_punctuation, // stop at punctuation
|
move_word_style_punctuation, // stop at punctuation
|
||||||
move_word_style_path_components, // stops at path components
|
move_word_style_path_components, // stops at path components
|
||||||
|
Loading…
x
Reference in New Issue
Block a user