Reduce copying in parse_util_detect_errors

Allow parse_util_detect_errors to accept an already-parsed ast. This
eliminates a copy of the source, which is helpful when executing large
scripts.
This commit is contained in:
ridiculousfish 2020-07-12 13:55:51 -07:00
parent dfeec433d8
commit 487de1e6c3
9 changed files with 144 additions and 134 deletions

View File

@ -287,8 +287,7 @@ int builtin_complete(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
if (condition && std::wcslen(condition)) {
const wcstring condition_string = condition;
parse_error_list_t errors;
if (parse_util_detect_errors(condition_string, &errors,
false /* do not accept incomplete */)) {
if (parse_util_detect_errors(condition_string, &errors)) {
streams.err.append_format(L"%ls: Condition '%ls' contained a syntax error", cmd,
condition);
for (const auto &error : errors) {

View File

@ -1001,117 +1001,119 @@ static parser_test_error_bits_t detect_argument_errors(const wcstring &src) {
static void test_parser() {
say(L"Testing parser");
auto parser = parser_t::principal_parser().shared();
auto detect_errors = [](const wcstring &s) {
return parse_util_detect_errors(s, nullptr, true /* accept incomplete */);
};
say(L"Testing block nesting");
if (!parse_util_detect_errors(L"if; end")) {
if (!detect_errors(L"if; end")) {
err(L"Incomplete if statement undetected");
}
if (!parse_util_detect_errors(L"if test; echo")) {
if (!detect_errors(L"if test; echo")) {
err(L"Missing end undetected");
}
if (!parse_util_detect_errors(L"if test; end; end")) {
if (!detect_errors(L"if test; end; end")) {
err(L"Unbalanced end undetected");
}
say(L"Testing detection of invalid use of builtin commands");
if (!parse_util_detect_errors(L"case foo")) {
if (!detect_errors(L"case foo")) {
err(L"'case' command outside of block context undetected");
}
if (!parse_util_detect_errors(L"switch ggg; if true; case foo;end;end")) {
if (!detect_errors(L"switch ggg; if true; case foo;end;end")) {
err(L"'case' command outside of switch block context undetected");
}
if (!parse_util_detect_errors(L"else")) {
if (!detect_errors(L"else")) {
err(L"'else' command outside of conditional block context undetected");
}
if (!parse_util_detect_errors(L"else if")) {
if (!detect_errors(L"else if")) {
err(L"'else if' command outside of conditional block context undetected");
}
if (!parse_util_detect_errors(L"if false; else if; end")) {
if (!detect_errors(L"if false; else if; end")) {
err(L"'else if' missing command undetected");
}
if (!parse_util_detect_errors(L"break")) {
if (!detect_errors(L"break")) {
err(L"'break' command outside of loop block context undetected");
}
if (parse_util_detect_errors(L"break --help")) {
if (detect_errors(L"break --help")) {
err(L"'break --help' incorrectly marked as error");
}
if (!parse_util_detect_errors(L"while false ; function foo ; break ; end ; end ")) {
if (!detect_errors(L"while false ; function foo ; break ; end ; end ")) {
err(L"'break' command inside function allowed to break from loop outside it");
}
if (!parse_util_detect_errors(L"exec ls|less") || !parse_util_detect_errors(L"echo|return")) {
if (!detect_errors(L"exec ls|less") || !detect_errors(L"echo|return")) {
err(L"Invalid pipe command undetected");
}
if (parse_util_detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end ")) {
if (detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end ")) {
err(L"'break' command inside switch falsely reported as error");
}
if (parse_util_detect_errors(L"or cat | cat") || parse_util_detect_errors(L"and cat | cat")) {
if (detect_errors(L"or cat | cat") || detect_errors(L"and cat | cat")) {
err(L"boolean command at beginning of pipeline falsely reported as error");
}
if (!parse_util_detect_errors(L"cat | and cat")) {
if (!detect_errors(L"cat | and cat")) {
err(L"'and' command in pipeline not reported as error");
}
if (!parse_util_detect_errors(L"cat | or cat")) {
if (!detect_errors(L"cat | or cat")) {
err(L"'or' command in pipeline not reported as error");
}
if (!parse_util_detect_errors(L"cat | exec") || !parse_util_detect_errors(L"exec | cat")) {
if (!detect_errors(L"cat | exec") || !detect_errors(L"exec | cat")) {
err(L"'exec' command in pipeline not reported as error");
}
if (!parse_util_detect_errors(L"begin ; end arg")) {
if (!detect_errors(L"begin ; end arg")) {
err(L"argument to 'end' not reported as error");
}
if (!parse_util_detect_errors(L"switch foo ; end arg")) {
if (!detect_errors(L"switch foo ; end arg")) {
err(L"argument to 'end' not reported as error");
}
if (!parse_util_detect_errors(L"if true; else if false ; end arg")) {
if (!detect_errors(L"if true; else if false ; end arg")) {
err(L"argument to 'end' not reported as error");
}
if (!parse_util_detect_errors(L"if true; else ; end arg")) {
if (!detect_errors(L"if true; else ; end arg")) {
err(L"argument to 'end' not reported as error");
}
if (parse_util_detect_errors(L"begin ; end 2> /dev/null")) {
if (detect_errors(L"begin ; end 2> /dev/null")) {
err(L"redirection after 'end' wrongly reported as error");
}
if (parse_util_detect_errors(L"true | ") != PARSER_TEST_INCOMPLETE) {
if (detect_errors(L"true | ") != PARSER_TEST_INCOMPLETE) {
err(L"unterminated pipe not reported properly");
}
if (parse_util_detect_errors(L"echo (\nfoo\n bar") != PARSER_TEST_INCOMPLETE) {
if (detect_errors(L"echo (\nfoo\n bar") != PARSER_TEST_INCOMPLETE) {
err(L"unterminated multiline subshell not reported properly");
}
if (parse_util_detect_errors(L"begin ; true ; end | ") != PARSER_TEST_INCOMPLETE) {
if (detect_errors(L"begin ; true ; end | ") != PARSER_TEST_INCOMPLETE) {
err(L"unterminated pipe not reported properly");
}
if (parse_util_detect_errors(L" | true ") != PARSER_TEST_ERROR) {
if (detect_errors(L" | true ") != PARSER_TEST_ERROR) {
err(L"leading pipe not reported properly");
}
if (parse_util_detect_errors(L"true | # comment") != PARSER_TEST_INCOMPLETE) {
if (detect_errors(L"true | # comment") != PARSER_TEST_INCOMPLETE) {
err(L"comment after pipe not reported as incomplete");
}
if (parse_util_detect_errors(L"true | # comment \n false ")) {
if (detect_errors(L"true | # comment \n false ")) {
err(L"comment and newline after pipe wrongly reported as error");
}
if (parse_util_detect_errors(L"true | ; false ") != PARSER_TEST_ERROR) {
if (detect_errors(L"true | ; false ") != PARSER_TEST_ERROR) {
err(L"semicolon after pipe not detected as error");
}
@ -1149,59 +1151,59 @@ static void test_parser() {
err(L"Bad escape in nested command substitution not reported as error");
}
if (!parse_util_detect_errors(L"false & ; and cat")) {
if (!detect_errors(L"false & ; and cat")) {
err(L"'and' command after background not reported as error");
}
if (!parse_util_detect_errors(L"true & ; or cat")) {
if (!detect_errors(L"true & ; or cat")) {
err(L"'or' command after background not reported as error");
}
if (parse_util_detect_errors(L"true & ; not cat")) {
if (detect_errors(L"true & ; not cat")) {
err(L"'not' command after background falsely reported as error");
}
if (!parse_util_detect_errors(L"if true & ; end")) {
if (!detect_errors(L"if true & ; end")) {
err(L"backgrounded 'if' conditional not reported as error");
}
if (!parse_util_detect_errors(L"if false; else if true & ; end")) {
if (!detect_errors(L"if false; else if true & ; end")) {
err(L"backgrounded 'else if' conditional not reported as error");
}
if (!parse_util_detect_errors(L"while true & ; end")) {
if (!detect_errors(L"while true & ; end")) {
err(L"backgrounded 'while' conditional not reported as error");
}
if (!parse_util_detect_errors(L"true | || false")) {
if (!detect_errors(L"true | || false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
if (!parse_util_detect_errors(L"|| false")) {
if (!detect_errors(L"|| false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
if (!parse_util_detect_errors(L"&& false")) {
if (!detect_errors(L"&& false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
if (!parse_util_detect_errors(L"true ; && false")) {
if (!detect_errors(L"true ; && false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
if (!parse_util_detect_errors(L"true ; || false")) {
if (!detect_errors(L"true ; || false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
if (!parse_util_detect_errors(L"true || && false")) {
if (!detect_errors(L"true || && false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
if (!parse_util_detect_errors(L"true && || false")) {
if (!detect_errors(L"true && || false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
if (!parse_util_detect_errors(L"true && && false")) {
if (!detect_errors(L"true && && false")) {
err(L"bogus boolean statement error not detected on line %d", __LINE__);
}
@ -1209,6 +1211,7 @@ static void test_parser() {
// Ensure that we don't crash on infinite self recursion and mutual recursion. These must use
// the principal parser because we cannot yet execute jobs on other parsers.
auto parser = parser_t::principal_parser().shared();
say(L"Testing recursion detection");
parser->eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t());
#if 0
@ -4708,7 +4711,7 @@ static void test_error_messages() {
parse_error_list_t errors;
for (const auto &test : error_tests) {
errors.clear();
parse_util_detect_errors(test.src, &errors, false /* allow_incomplete */);
parse_util_detect_errors(test.src, &errors);
do_test(!errors.empty());
if (!errors.empty()) {
do_test1(string_matches_format(errors.at(0).text, test.error_text_format), test.src);

View File

@ -1100,7 +1100,7 @@ static bool should_import_bash_history_line(const wcstring &line) {
// In doing this test do not allow incomplete strings. Hence the "false" argument.
parse_error_list_t errors;
parse_util_detect_errors(line, &errors, false);
parse_util_detect_errors(line, &errors);
if (!errors.empty()) return false;
// The following are Very naive tests!

View File

@ -202,12 +202,12 @@ wcstring parse_token_t::user_presentable_description() const {
return token_type_user_presentable_description(type, keyword);
}
parsed_source_t::parsed_source_t(wcstring s, ast::ast_t &&ast)
parsed_source_t::parsed_source_t(wcstring &&s, ast::ast_t &&ast)
: src(std::move(s)), ast(std::move(ast)) {}
parsed_source_t::~parsed_source_t() = default;
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags,
parse_error_list_t *errors) {
using namespace ast;
ast_t ast = ast_t::parse(src, flags, errors);

View File

@ -64,7 +64,7 @@ struct parsed_source_t {
wcstring src;
ast::ast_t ast;
parsed_source_t(wcstring s, ast::ast_t &&ast);
parsed_source_t(wcstring &&s, ast::ast_t &&ast);
~parsed_source_t();
parsed_source_t(const parsed_source_t &) = delete;
@ -76,7 +76,7 @@ struct parsed_source_t {
/// Return a shared pointer to parsed_source_t, or null on failure.
/// If parse_flag_continue_after_error is not set, this will return null on any error.
using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
parsed_source_ref_t parse_source(wcstring &&src, parse_tree_flags_t flags,
parse_error_list_t *errors);
/// Error message for improper use of the exec builtin.

View File

@ -722,6 +722,7 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
/// Append a syntax error to the given error list.
static bool append_syntax_error(parse_error_list_t *errors, size_t source_location,
const wchar_t *fmt, ...) {
if (!errors) return true;
parse_error_t error;
error.source_start = source_location;
error.source_length = 0;
@ -732,7 +733,7 @@ static bool append_syntax_error(parse_error_list_t *errors, size_t source_locati
error.text = vformat_string(fmt, va);
va_end(va);
errors->push_back(error);
errors->push_back(std::move(error));
return true;
}
@ -944,8 +945,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
working_copy.replace(cmd_sub_start, cmd_sub_len, wcstring(1, INTERNAL_SEPARATOR));
parse_error_list_t subst_errors;
err |= parse_util_detect_errors(subst, &subst_errors,
false /* do not accept incomplete */);
err |= parse_util_detect_errors(subst, &subst_errors);
// Our command substitution produced error offsets relative to its source. Tweak the
// offsets of the errors in the command substitution to account for both its offset
@ -1203,12 +1203,9 @@ static bool detect_errors_in_block_redirection_list(
return false;
}
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
parse_error_list_t *out_errors,
bool allow_incomplete,
parsed_source_ref_t *out_pstree) {
parse_error_list_t parse_errors;
parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const wcstring &buff_src,
parse_error_list_t *out_errors) {
using namespace ast;
parser_test_error_bits_t res = 0;
// Whether we encountered a parse error.
@ -1222,6 +1219,65 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
// detecting job_continuations that have source for pipes but not the statement.
bool has_unclosed_pipe = false;
// Expand all commands.
// Verify 'or' and 'and' not used inside pipelines.
// Verify pipes via parser_is_pipe_forbidden.
// Verify return only within a function.
// Verify no variable expansions.
wcstring storage;
for (const node_t &node : ast) {
if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) {
// Somewhat clumsy way of checking for a statement without source in a pipeline.
// See if our pipe has source but our statement does not.
if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) {
has_unclosed_pipe = true;
}
} else if (const argument_t *arg = node.try_as<argument_t>()) {
const wcstring &arg_src = arg->source(buff_src, &storage);
res |= parse_util_detect_errors_in_argument(*arg, arg_src, out_errors);
} else if (const ast::job_t *job = node.try_as<ast::job_t>()) {
// Disallow background in the following cases:
//
// foo & ; and bar
// foo & ; or bar
// if foo & ; end
// while foo & ; end
// If it's not a background job, nothing to do.
if (job->bg) {
errored |= detect_errors_in_backgrounded_job(*job, out_errors);
}
} else if (const ast::decorated_statement_t *stmt =
node.try_as<decorated_statement_t>()) {
errored |=
detect_errors_in_decorated_statement(buff_src, *stmt, &storage, out_errors);
} else if (const auto *block = node.try_as<block_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (block->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_block_redirection_list(block->args_or_redirs, out_errors);
} else if (const auto *ifs = node.try_as<if_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (ifs->end.unsourced) has_unclosed_block = true;
errored |= detect_errors_in_block_redirection_list(ifs->args_or_redirs, out_errors);
} else if (const auto *switchs = node.try_as<switch_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (switchs->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_block_redirection_list(switchs->args_or_redirs, out_errors);
}
}
if (errored) res |= PARSER_TEST_ERROR;
if (has_unclosed_block || has_unclosed_pipe) res |= PARSER_TEST_INCOMPLETE;
return res;
}
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
parse_error_list_t *out_errors,
bool allow_incomplete) {
// Whether there's an unclosed quote or subshell, and therefore unfinished. This is only set if
// allow_incomplete is set.
bool has_unclosed_quote_or_subshell = false;
@ -1231,6 +1287,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
// Parse the input string into an ast. Some errors are detected here.
using namespace ast;
parse_error_list_t parse_errors;
auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors);
if (allow_incomplete) {
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
@ -1253,75 +1310,14 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
return PARSER_TEST_INCOMPLETE;
}
errored = !parse_errors.empty();
// Expand all commands.
// Verify 'or' and 'and' not used inside pipelines.
// Verify pipes via parser_is_pipe_forbidden.
// Verify return only within a function.
// Verify no variable expansions.
wcstring storage;
if (!errored) {
for (const node_t &node : ast) {
if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) {
// Somewhat clumsy way of checking for a statement without source in a pipeline.
// See if our pipe has source but our statement does not.
if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) {
has_unclosed_pipe = true;
}
} else if (const argument_t *arg = node.try_as<argument_t>()) {
const wcstring &arg_src = arg->source(buff_src, &storage);
res |= parse_util_detect_errors_in_argument(*arg, arg_src, &parse_errors);
} else if (const ast::job_t *job = node.try_as<ast::job_t>()) {
// Disallow background in the following cases:
//
// foo & ; and bar
// foo & ; or bar
// if foo & ; end
// while foo & ; end
// If it's not a background job, nothing to do.
if (job->bg) {
errored |= detect_errors_in_backgrounded_job(*job, &parse_errors);
}
} else if (const ast::decorated_statement_t *stmt =
node.try_as<decorated_statement_t>()) {
errored |=
detect_errors_in_decorated_statement(buff_src, *stmt, &storage, &parse_errors);
} else if (const auto *block = node.try_as<block_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (block->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_block_redirection_list(block->args_or_redirs, &parse_errors);
} else if (const auto *ifs = node.try_as<if_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (ifs->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_block_redirection_list(ifs->args_or_redirs, &parse_errors);
} else if (const auto *switchs = node.try_as<switch_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (switchs->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_block_redirection_list(switchs->args_or_redirs, &parse_errors);
}
}
// Early parse error, stop here.
if (!parse_errors.empty()) {
if (out_errors) vec_append(*out_errors, std::move(parse_errors));
return PARSER_TEST_ERROR;
}
if (errored) res |= PARSER_TEST_ERROR;
if (has_unclosed_block || has_unclosed_quote_or_subshell || has_unclosed_pipe)
res |= PARSER_TEST_INCOMPLETE;
if (out_errors != nullptr) {
*out_errors = std::move(parse_errors);
}
// \return the ast to our caller if requested.
if (out_pstree != nullptr) {
*out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(ast));
}
return res;
// Defer to the tree-walking version.
return parse_util_detect_errors(ast, buff_src, out_errors);
}
maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_list_src,

View File

@ -133,8 +133,12 @@ std::vector<int> parse_util_compute_indents(const wcstring &src);
/// error. If out_pstree is not NULL, the resulting tree is returned by reference.
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
parse_error_list_t *out_errors = nullptr,
bool allow_incomplete = true,
parsed_source_ref_t *out_pstree = nullptr);
bool allow_incomplete = false);
/// Like parse_util_detect_errors but accepts an already-parsed ast.
/// The top of the ast is assumed to be a job list.
parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const wcstring &buff_src,
parse_error_list_t *out_errors);
/// Detect errors in the specified string when parsed as an argument list. Returns the text of an
/// error, or none if no error occurred.

View File

@ -631,7 +631,7 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io,
const job_group_ref_t &job_group, enum block_type_t block_type) {
// Parse the source into a tree, if we can.
parse_error_list_t error_list;
if (parsed_source_ref_t ps = parse_source(cmd, parse_flag_none, &error_list)) {
if (parsed_source_ref_t ps = parse_source(wcstring{cmd}, parse_flag_none, &error_list)) {
return this->eval(ps, io, job_group, block_type);
} else {
// Get a backtrace. This includes the message.

View File

@ -3887,10 +3887,18 @@ static int read_ni(parser_t &parser, int fd, const io_chain_t &io) {
str.erase(0, 1);
}
// Parse into an ast and detect errors.
parse_error_list_t errors;
parsed_source_ref_t pstree;
if (!parse_util_detect_errors(str, &errors, false /* do not accept incomplete */, &pstree)) {
parser.eval(pstree, io);
auto ast = ast::ast_t::parse(str, parse_flag_none, &errors);
bool errored = ast.errored();
if (!errored) {
errored = parse_util_detect_errors(ast, str, &errors);
}
if (!errored) {
// Construct a parsed source ref.
// Be careful to transfer ownership, this could be a very large string.
parsed_source_ref_t ps = std::make_shared<parsed_source_t>(std::move(str), std::move(ast));
parser.eval(ps, io);
return 0;
} else {
wcstring sb;