diff --git a/CMakeLists.txt b/CMakeLists.txt index 162ae60cb..eba9b553f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ SET(FISH_SRCS src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp - src/signal.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp + src/signal.cpp src/tnode.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp ) diff --git a/Makefile.in b/Makefile.in index 6ba7e14a3..d34d9f286 100644 --- a/Makefile.in +++ b/Makefile.in @@ -124,8 +124,8 @@ FISH_OBJS := obj/autoload.o obj/builtin.o obj/builtin_bg.o obj/builtin_bind.o ob obj/iothread.o obj/kill.o obj/output.o obj/pager.o obj/parse_execution.o \ obj/parse_productions.o obj/parse_tree.o obj/parse_util.o obj/parser.o \ obj/parser_keywords.o obj/path.o obj/postfork.o obj/proc.o obj/reader.o \ - obj/sanity.o obj/screen.o obj/signal.o obj/tokenizer.o obj/utf8.o obj/util.o \ - obj/wcstringutil.o obj/wgetopt.o obj/wildcard.o obj/wutil.o + obj/sanity.o obj/screen.o obj/signal.o obj/tokenizer.o obj/tnode.o obj/utf8.o \ + obj/util.o obj/wcstringutil.o obj/wgetopt.o obj/wildcard.o obj/wutil.o FISH_INDENT_OBJS := obj/fish_indent.o obj/print_help.o $(FISH_OBJS) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 9c365ccbc..d0156b45c 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -67,6 +67,9 @@ /* End PBXAggregateTarget section */ /* Begin PBXBuildFile section */ + 4F2D55CF2013ECDD00822920 /* tnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4F2D55CE2013ECDD00822920 /* tnode.cpp */; }; + 4F2D55D02013ECDD00822920 /* tnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4F2D55CE2013ECDD00822920 /* tnode.cpp */; }; + 4F2D55D12013ED0100822920 /* tnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4F2D55CE2013ECDD00822920 /* tnode.cpp */; }; 63A2C0E91CC60F3B00973404 /* pcre2_find_bracket.c in Sources */ = {isa = PBXBuildFile; fileRef = 63A2C0E81CC5F9FB00973404 /* pcre2_find_bracket.c */; }; 9C7A55271DCD651F0049C25D /* fallback.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853E13B3ACEE0099B651 /* fallback.cpp */; }; 9C7A552F1DCD65820049C25D /* util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855E13B3ACEE0099B651 /* util.cpp */; }; @@ -690,6 +693,8 @@ /* Begin PBXFileReference section */ 4E142D731B56B5D7008783C8 /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = config.h; path = ../osx/config.h; sourceTree = ""; }; + 4F2D55CD2013ECDD00822920 /* tnode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tnode.h; sourceTree = ""; }; + 4F2D55CE2013ECDD00822920 /* tnode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tnode.cpp; sourceTree = ""; }; 63A2C0E81CC5F9FB00973404 /* pcre2_find_bracket.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pcre2_find_bracket.c; sourceTree = ""; }; 9C7A55721DCD71330049C25D /* fish_key_reader */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_key_reader; sourceTree = BUILT_PRODUCTS_DIR; }; 9C7A557C1DCD717C0049C25D /* fish_key_reader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_key_reader.cpp; sourceTree = ""; }; @@ -703,6 +708,7 @@ D025C02815D1FEA100B9DB63 /* functions */ = {isa = PBXFileReference; lastKnownFileType = folder; name = functions; path = share/functions; sourceTree = ""; }; D025C02915D1FEA100B9DB63 /* tools */ = {isa = PBXFileReference; lastKnownFileType = folder; name = tools; path = share/tools; sourceTree = ""; }; D02960E51FBD726100CA3985 /* builtin_wait.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_wait.cpp; sourceTree = ""; }; + D0301C1D2002B90500B1F463 /* parse_grammar.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parse_grammar.h; sourceTree = ""; }; D031890915E36D9800D9CC39 /* base */ = {isa = PBXFileReference; lastKnownFileType = text; path = base; sourceTree = BUILT_PRODUCTS_DIR; }; D03238891849D1980032CF2C /* pager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pager.cpp; sourceTree = ""; }; D032388A1849D1980032CF2C /* pager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pager.h; sourceTree = ""; }; @@ -1232,6 +1238,7 @@ D03238891849D1980032CF2C /* pager.cpp */, D0A0851B13B3ACEE0099B651 /* parse_util.h */, D0A0855213B3ACEE0099B651 /* parse_util.cpp */, + D0301C1D2002B90500B1F463 /* parse_grammar.h */, D0A0851C13B3ACEE0099B651 /* parser_keywords.h */, D0A0855313B3ACEE0099B651 /* parser_keywords.cpp */, D0A0851D13B3ACEE0099B651 /* parser.h */, @@ -1254,6 +1261,8 @@ D0A0855C13B3ACEE0099B651 /* signal.cpp */, D0A0852513B3ACEE0099B651 /* tokenizer.h */, D0A0855D13B3ACEE0099B651 /* tokenizer.cpp */, + 4F2D55CD2013ECDD00822920 /* tnode.h */, + 4F2D55CE2013ECDD00822920 /* tnode.cpp */, D0C9733A18DE5451002D7C81 /* utf8.h */, D0C9733718DE5449002D7C81 /* utf8.cpp */, D0A0852613B3ACEE0099B651 /* util.h */, @@ -1753,6 +1762,7 @@ 9C7A55491DCD71330049C25D /* postfork.cpp in Sources */, 9C7A554A1DCD71330049C25D /* screen.cpp in Sources */, 9C7A554B1DCD71330049C25D /* signal.cpp in Sources */, + 4F2D55D12013ED0100822920 /* tnode.cpp in Sources */, 9C7A554C1DCD71330049C25D /* utf8.cpp in Sources */, 9C7A554E1DCD71330049C25D /* function.cpp in Sources */, 9C7A554F1DCD71330049C25D /* complete.cpp in Sources */, @@ -1972,6 +1982,7 @@ D05F59B41F041AE4003EE978 /* builtin_contains.cpp in Sources */, D030FC081A4A38F300F7ADA0 /* pager.cpp in Sources */, D030FC091A4A38F300F7ADA0 /* parse_util.cpp in Sources */, + 4F2D55D02013ECDD00822920 /* tnode.cpp in Sources */, D0D02AD9159864A6008E62BD /* parser_keywords.cpp in Sources */, D02960E71FBD726200CA3985 /* builtin_wait.cpp in Sources */, D05F59A51F041AE4003EE978 /* builtin_fg.cpp in Sources */, @@ -2055,6 +2066,7 @@ D0D02A6E15983838008E62BD /* kill.cpp in Sources */, D0D02A6F1598383E008E62BD /* parser.cpp in Sources */, D05F59771F041AE4003EE978 /* builtin_string.cpp in Sources */, + 4F2D55CF2013ECDD00822920 /* tnode.cpp in Sources */, D05F597A1F041AE4003EE978 /* builtin_status.cpp in Sources */, D0D02A8F15983D8F008E62BD /* parser_keywords.cpp in Sources */, D0D02A7015983842008E62BD /* proc.cpp in Sources */, diff --git a/src/complete.cpp b/src/complete.cpp index 8e7fa63d7..fbb4e4799 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -34,11 +34,11 @@ #include "function.h" #include "iothread.h" #include "parse_constants.h" -#include "parse_tree.h" #include "parse_util.h" #include "parser.h" #include "path.h" #include "proc.h" +#include "tnode.h" #include "util.h" #include "wildcard.h" #include "wutil.h" // IWYU pragma: keep @@ -159,19 +159,19 @@ class completion_entry_t { /// Set of all completion entries. namespace std { - template<> - struct hash { - size_t operator()(const completion_entry_t &c) const { - std::hash hasher; - return hasher((wcstring) c.cmd); - } - }; - template <> - struct equal_to { - bool operator()(const completion_entry_t &c1, const completion_entry_t &c2) const { - return c1.cmd == c2.cmd; - } - }; +template <> +struct hash { + size_t operator()(const completion_entry_t &c) const { + std::hash hasher; + return hasher((wcstring)c.cmd); + } +}; +template <> +struct equal_to { + bool operator()(const completion_entry_t &c1, const completion_entry_t &c2) const { + return c1.cmd == c2.cmd; + } +}; } typedef std::unordered_set completion_entry_set_t; static completion_entry_set_t completion_set; @@ -1281,10 +1281,9 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c if (!done) { parse_node_tree_t tree; - parse_tree_from_string(cmd, - parse_flag_continue_after_error | - parse_flag_accept_incomplete_tokens | - parse_flag_include_comments, + parse_tree_from_string(cmd, parse_flag_continue_after_error | + parse_flag_accept_incomplete_tokens | + parse_flag_include_comments, &tree, NULL); // Find the plain statement to operate on. The cursor may be past it (#1261), so backtrack @@ -1294,10 +1293,10 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c while (position_in_statement > 0 && cmd.at(position_in_statement - 1) == L' ') { position_in_statement--; } - const parse_node_t *plain_statement = tree.find_node_matching_source_location( - symbol_plain_statement, position_in_statement, NULL); - - if (plain_statement == NULL) { + auto plain_statement = + tnode_t::find_node_matching_source_location( + &tree, position_in_statement, nullptr); + if (!plain_statement) { // Not part of a plain statement. This could be e.g. a for loop header, case expression, // etc. Do generic file completions (issue #1309). If we had to backtrack, it means // there was whitespace; don't do an autosuggestion in that case. Also don't do it if we @@ -1327,18 +1326,17 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c } completer.complete_param_expand(current_token, do_file); } else { - assert(plain_statement->has_source() && - plain_statement->type == symbol_plain_statement); + assert(plain_statement && plain_statement.has_source()); // Get the command node. - const parse_node_t *cmd_node = - tree.get_child(*plain_statement, 0, parse_token_type_string); + tnode_t cmd_node = plain_statement.child<0>(); + assert(cmd_node && cmd_node.has_source() && "Expected command node to be valid"); // Get the actual command string. - if (cmd_node) current_command = cmd_node->get_source(cmd); + current_command = cmd_node.get_source(cmd); // Check the decoration. - switch (tree.decoration_for_plain_statement(*plain_statement)) { + switch (get_decoration(plain_statement)) { case parse_statement_decoration_none: { use_command = true; use_function = true; @@ -1363,21 +1361,20 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c } } - if (cmd_node && cmd_node->location_in_or_at_end_of_source_range(pos)) { + if (cmd_node.location_in_or_at_end_of_source_range(pos)) { // Complete command filename. completer.complete_cmd(current_token, use_function, use_builtin, use_command, use_implicit_cd); } else { // Get all the arguments. - const parse_node_tree_t::parse_node_list_t all_arguments = - tree.find_nodes(*plain_statement, symbol_argument); + auto all_arguments = plain_statement.descendants(); // See whether we are in an argument. We may also be in a redirection, or nothing at // all. size_t matching_arg_index = -1; for (size_t i = 0; i < all_arguments.size(); i++) { - const parse_node_t *node = all_arguments.at(i); - if (node->location_in_or_at_end_of_source_range(position_in_statement)) { + tnode_t arg = all_arguments.at(i); + if (arg.location_in_or_at_end_of_source_range(position_in_statement)) { matching_arg_index = i; break; } @@ -1387,14 +1384,14 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c wcstring current_argument, previous_argument; if (matching_arg_index != (size_t)(-1)) { const wcstring matching_arg = - all_arguments.at(matching_arg_index)->get_source(cmd); + all_arguments.at(matching_arg_index).get_source(cmd); // If the cursor is in whitespace, then the "current" argument is empty and the // previous argument is the matching one. But if the cursor was in or at the end // of the argument, then the current argument is the matching one, and the // previous argument is the one before it. bool cursor_in_whitespace = - !plain_statement->location_in_or_at_end_of_source_range(pos); + !plain_statement.location_in_or_at_end_of_source_range(pos); if (cursor_in_whitespace) { current_argument = L""; previous_argument = matching_arg; @@ -1402,13 +1399,13 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c current_argument = matching_arg; if (matching_arg_index > 0) { previous_argument = - all_arguments.at(matching_arg_index - 1)->get_source(cmd); + all_arguments.at(matching_arg_index - 1).get_source(cmd); } } // Check to see if we have a preceding double-dash. for (size_t i = 0; i < matching_arg_index; i++) { - if (all_arguments.at(i)->get_source(cmd) == L"--") { + if (all_arguments.at(i).get_source(cmd) == L"--") { had_ddash = true; break; } @@ -1418,9 +1415,10 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c // If we are not in an argument, we may be in a redirection. bool in_redirection = false; if (matching_arg_index == (size_t)(-1)) { - const parse_node_t *redirection = tree.find_node_matching_source_location( - symbol_redirection, position_in_statement, plain_statement); - in_redirection = (redirection != NULL); + if (tnode_t::find_node_matching_source_location( + &tree, position_in_statement, plain_statement)) { + in_redirection = true; + } } bool do_file = false, handle_as_special_cd = false; @@ -1452,8 +1450,9 @@ void complete(const wcstring &cmd_with_subcmds, std::vector *out_c assert(wrap_chain.at(i) == current_command_unescape); } else if (!(flags & COMPLETION_REQUEST_AUTOSUGGESTION)) { wcstring faux_cmdline = cmd; - faux_cmdline.replace(cmd_node->source_start, - cmd_node->source_length, wrap_chain.at(i)); + faux_cmdline.replace(cmd_node.source_range()->start, + cmd_node.source_range()->length, + wrap_chain.at(i)); transient_cmd = make_unique( faux_cmdline); } diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index bc33a9e72..801b835ca 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -38,8 +38,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "highlight.h" #include "output.h" #include "parse_constants.h" -#include "parse_tree.h" #include "print_help.h" +#include "tnode.h" #include "wutil.h" // IWYU pragma: keep #define SPACES_PER_INDENT 4 @@ -132,12 +132,11 @@ static void prettify_node_recursive(const wcstring &source, const parse_node_tre if (node.has_comments()) // handle comments, which come before the text { - const parse_node_tree_t::parse_node_list_t comment_nodes = - (tree.comment_nodes_for_node(node)); - for (size_t i = 0; i < comment_nodes.size(); i++) { - const parse_node_t &comment_node = *comment_nodes.at(i); + auto comment_nodes = tree.comment_nodes_for_node(node); + for (const auto &comment : comment_nodes) { append_whitespace(node_indent, do_indent, *has_new_line, out_result); - out_result->append(source, comment_node.source_start, comment_node.source_length); + auto source_range = comment.source_range(); + out_result->append(source, source_range->start, source_range->length); } } diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index b965f6f27..808e07ba7 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -63,6 +63,7 @@ #include "reader.h" #include "screen.h" #include "signal.h" +#include "tnode.h" #include "tokenizer.h" #include "utf8.h" #include "util.h" @@ -601,9 +602,9 @@ static parser_test_error_bits_t detect_argument_errors(const wcstring &src) { } assert(!tree.empty()); //!OCLINT(multiple unary operator) - const parse_node_t *first_arg = tree.next_node_in_node_list(tree.at(0), symbol_argument, NULL); - assert(first_arg != NULL); - return parse_util_detect_errors_in_argument(*first_arg, first_arg->get_source(src)); + tnode_t arg_list{&tree, &tree.at(0)}; + auto first_arg = arg_list.next_in_list(); + return parse_util_detect_errors_in_argument(first_arg, first_arg.get_source(src)); } /// Test the parser. @@ -2314,8 +2315,8 @@ static void test_completion_insertions() { TEST_1_COMPLETION(L"'foo^", L"bar", COMPLETE_REPLACES_TOKEN, false, L"bar ^"); } -static void perform_one_autosuggestion_cd_test(const wcstring &command, - const wcstring &expected, long line) { +static void perform_one_autosuggestion_cd_test(const wcstring &command, const wcstring &expected, + long line) { std::vector comps; complete(command, &comps, COMPLETION_REQUEST_AUTOSUGGESTION); @@ -2350,8 +2351,8 @@ static void perform_one_autosuggestion_cd_test(const wcstring &command, } } -static void perform_one_completion_cd_test(const wcstring &command, - const wcstring &expected, long line) { +static void perform_one_completion_cd_test(const wcstring &command, const wcstring &expected, + long line) { std::vector comps; complete(command, &comps, COMPLETION_REQUEST_DEFAULT); @@ -2375,10 +2376,10 @@ static void perform_one_completion_cd_test(const wcstring &command, const completion_t &suggestion = comps.at(0); if (suggestion.completion != expected) { - fwprintf( - stderr, - L"line %ld: complete() for cd tab completion returned the wrong expected string for command %ls\n", - line, command.c_str()); + fwprintf(stderr, + L"line %ld: complete() for cd tab completion returned the wrong expected " + L"string for command %ls\n", + line, command.c_str()); fwprintf(stderr, L" actual: %ls\n", suggestion.completion.c_str()); fwprintf(stderr, L"expected: %ls\n", expected.c_str()); do_test_from(suggestion.completion == expected, line); @@ -2418,7 +2419,6 @@ static void test_autosuggest_suggest_special() { } const wcstring wd = L"test/autosuggest_test"; - const env_vars_snapshot_t &vars = env_vars_snapshot_t::current(); perform_one_autosuggestion_cd_test(L"cd test/autosuggest_test/0", L"foobar/", __LINE__); perform_one_autosuggestion_cd_test(L"cd \"test/autosuggest_test/0", L"foobar/", __LINE__); @@ -2660,9 +2660,9 @@ static void test_universal_callbacks() { uvars2.sync(callbacks); // Change uvars1. - uvars1.set(L"alpha", {L"2"}, false); // changes value - uvars1.set(L"beta", {L"1"}, true); // changes export - uvars1.remove(L"delta"); // erases value + uvars1.set(L"alpha", {L"2"}, false); // changes value + uvars1.set(L"beta", {L"1"}, true); // changes export + uvars1.remove(L"delta"); // erases value uvars1.set(L"epsilon", {L"1"}, false); // changes nothing uvars1.sync(callbacks); @@ -3394,31 +3394,47 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o } // Get the statement. Should only have one. - const parse_node_tree_t::parse_node_list_t stmt_nodes = - tree.find_nodes(tree.at(0), symbol_plain_statement); - if (stmt_nodes.size() != 1) { - say(L"Unexpected number of statements (%lu) found in '%ls'", stmt_nodes.size(), - src.c_str()); + tnode_t job_list{&tree, &tree.at(0)}; + auto stmts = job_list.descendants(); + if (stmts.size() != 1) { + say(L"Unexpected number of statements (%lu) found in '%ls'", stmts.size(), src.c_str()); return false; } - const parse_node_t &stmt = *stmt_nodes.at(0); + tnode_t stmt = stmts.at(0); - // Return its decoration. - *out_deco = tree.decoration_for_plain_statement(stmt); - - // Return its command. - tree.command_for_plain_statement(stmt, src, out_cmd); + // Return its decoration and command. + *out_deco = get_decoration(stmt); + *out_cmd = *command_for_plain_statement(stmt, src); // Return arguments separated by spaces. - const parse_node_tree_t::parse_node_list_t arg_nodes = tree.find_nodes(stmt, symbol_argument); - for (size_t i = 0; i < arg_nodes.size(); i++) { - if (i > 0) out_joined_args->push_back(L' '); - out_joined_args->append(arg_nodes.at(i)->get_source(src)); + bool first = true; + for (auto arg_node : stmt.descendants()) { + if (!first) out_joined_args->push_back(L' '); + out_joined_args->append(arg_node.get_source(src)); + first = false; } return true; } +// Verify that 'function -h' and 'function --help' are plain statements but 'function --foo' is +// not (issue #1240). +template +static void check_function_help(const wchar_t *src) { + parse_node_tree_t tree; + if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) { + err(L"Failed to parse '%ls'", src); + } + + tnode_t node{&tree, &tree.at(0)}; + auto node_list = node.descendants(); + if (node_list.size() == 0) { + err(L"Failed to find node of type '%ls'", token_type_description(Type::token)); + } else if (node_list.size() > 1) { + err(L"Found too many nodes of type '%ls'", token_type_description(Type::token)); + } +} + // Test the LL2 (two token lookahead) nature of the parser by exercising the special builtin and // command handling. In particular, 'command foo' should be a decorated statement 'foo' but 'command // -help' should be an undecorated statement 'command' with argument '--help', and NOT attempt to @@ -3463,31 +3479,10 @@ static void test_new_parser_ll2(void) { tests[i].src.c_str(), (int)tests[i].deco, (int)deco, (long)__LINE__); } - // Verify that 'function -h' and 'function --help' are plain statements but 'function --foo' is - // not (issue #1240). - const struct { - wcstring src; - parse_token_type_t type; - } tests2[] = { - {L"function -h", symbol_plain_statement}, - {L"function --help", symbol_plain_statement}, - {L"function --foo ; end", symbol_function_header}, - {L"function foo ; end", symbol_function_header}, - }; - for (size_t i = 0; i < sizeof tests2 / sizeof *tests2; i++) { - parse_node_tree_t tree; - if (!parse_tree_from_string(tests2[i].src, parse_flag_none, &tree, NULL)) { - err(L"Failed to parse '%ls'", tests2[i].src.c_str()); - } - - const parse_node_tree_t::parse_node_list_t node_list = - tree.find_nodes(tree.at(0), tests2[i].type); - if (node_list.size() == 0) { - err(L"Failed to find node of type '%ls'", token_type_description(tests2[i].type)); - } else if (node_list.size() > 1) { - err(L"Found too many nodes of type '%ls'", token_type_description(tests2[i].type)); - } - } + check_function_help(L"function -h"); + check_function_help(L"function --help"); + check_function_help(L"function --foo; end"); + check_function_help(L"function foo; end"); } static void test_new_parser_ad_hoc() { @@ -3504,9 +3499,8 @@ static void test_new_parser_ad_hoc() { // Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd // try to run a command 'case'. - const parse_node_t &root = parse_tree.at(0); - const parse_node_tree_t::parse_node_list_t node_list = - parse_tree.find_nodes(root, symbol_case_item_list); + tnode_t root{&parse_tree, &parse_tree.at(0)}; + auto node_list = root.descendants(); if (node_list.size() != 3) { err(L"Expected 3 case item nodes, found %lu", node_list.size()); } @@ -4289,7 +4283,7 @@ static void test_illegal_command_exit_code(void) { void test_maybe() { say(L"Testing maybe_t"); - do_test(! bool(maybe_t())); + do_test(!bool(maybe_t())); maybe_t m(3); do_test(m.has_value()); do_test(m.value() == 3); @@ -4308,7 +4302,7 @@ void test_maybe() { do_test(maybe_t() == none()); do_test(!maybe_t(none()).has_value()); m = none(); - do_test(! bool(m)); + do_test(!bool(m)); maybe_t m2("abc"); do_test(!m2.missing_or_empty()); diff --git a/src/highlight.cpp b/src/highlight.cpp index 0d0b08e0a..e419230ad 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -27,13 +27,15 @@ #include "history.h" #include "output.h" #include "parse_constants.h" -#include "parse_tree.h" #include "parse_util.h" #include "path.h" +#include "tnode.h" #include "tokenizer.h" #include "wildcard.h" #include "wutil.h" // IWYU pragma: keep +namespace g = grammar; + #define CURSOR_POSITION_INVALID ((size_t)(-1)) /// Number of elements in the highlight_var array. @@ -240,16 +242,14 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d // Given a plain statement node in a parse tree, get the command and return it, expanded // appropriately for commands. If we succeed, return true. -bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, - const parse_node_t &plain_statement, wcstring *out_cmd) { - assert(plain_statement.type == symbol_plain_statement); - +static bool plain_statement_get_expanded_command(const wcstring &src, + tnode_t stmt, + wcstring *out_cmd) { // Get the command. Try expanding it. If we cannot, it's an error. - wcstring cmd; - if (tree.command_for_plain_statement(plain_statement, src, &cmd) && - expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) { + maybe_t cmd = command_for_plain_statement(stmt, src); + if (cmd && expand_one(*cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) { // Success, return the expanded string by reference. - *out_cmd = std::move(cmd); + *out_cmd = std::move(*cmd); return true; } return false; @@ -313,9 +313,9 @@ static bool has_expand_reserved(const wcstring &str) { } // Parse a command line. Return by reference the last command, and the last argument to that command -// (as a copied node), if any. This is used by autosuggestions. +// (as a string), if any. This is used by autosuggestions. static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, - parse_node_t *out_last_arg) { + wcstring *out_last_arg) { // Parse the buffer. parse_node_tree_t parse_tree; parse_tree_from_string(buff, @@ -323,15 +323,12 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand &parse_tree, NULL); // Find the last statement. - const parse_node_t *last_statement = - parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); - if (last_statement != NULL && plain_statement_get_expanded_command( - buff, parse_tree, *last_statement, out_expanded_command)) { + auto last_statement = parse_tree.find_last_node(); + if (last_statement && + plain_statement_get_expanded_command(buff, last_statement, out_expanded_command)) { // Find the last argument. If we don't get one, return an invalid node. - const parse_node_t *last_arg = - parse_tree.find_last_node_of_type(symbol_argument, last_statement); - if (last_arg != NULL) { - *out_last_arg = *last_arg; + if (auto last_arg = parse_tree.find_last_node(last_statement)) { + *out_last_arg = last_arg.get_source(buff); } return true; } @@ -347,20 +344,18 @@ bool autosuggest_validate_from_history(const history_item_t &item, // Parse the string. wcstring parsed_command; - parse_node_t last_arg_node(token_type_invalid); - if (!autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node)) return false; + wcstring cd_dir; + if (!autosuggest_parse_command(item.str(), &parsed_command, &cd_dir)) return false; - if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && - last_arg_node.has_source()) { + if (parsed_command == L"cd" && !cd_dir.empty()) { // We can possibly handle this specially. - wcstring dir = last_arg_node.get_source(item.str()); - if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { + if (expand_one(cd_dir, EXPAND_SKIP_CMDSUBST)) { handled = true; bool is_help = - string_prefixes_string(dir, L"--help") || string_prefixes_string(dir, L"-h"); + string_prefixes_string(cd_dir, L"--help") || string_prefixes_string(cd_dir, L"-h"); if (!is_help) { wcstring path; - env_var_t dir_var(L"n/a", dir); + env_var_t dir_var(L"n/a", cd_dir); bool can_cd = path_get_cdpath(dir_var, &path, working_directory.c_str(), vars); if (can_cd && !paths_are_same_file(working_directory, path)) { suggestionOK = true; @@ -675,18 +670,21 @@ class highlighter_t { // The parse tree of the buff. parse_node_tree_t parse_tree; // Color an argument. - void color_argument(const parse_node_t &node); + void color_argument(tnode_t node); // Color a redirection. - void color_redirection(const parse_node_t &node); - // Color the arguments of the given node. - void color_arguments(const parse_node_t &list_node); + void color_redirection(tnode_t node); + // Color a list of arguments. If cmd_is_cd is true, then the arguments are for 'cd'; detect + // invalid directories. + void color_arguments(const std::vector> &args, bool cmd_is_cd = false); // Color the redirections of the given node. - void color_redirections(const parse_node_t &list_node); + void color_redirections(tnode_t list); // Color all the children of the command with the given type. void color_children(const parse_node_t &parent, parse_token_type_t type, highlight_spec_t color); // Colors the source range of a node with a given color. void color_node(const parse_node_t &node, highlight_spec_t color); + // return whether a plain statement is 'cd'. + bool is_cd(tnode_t stmt) const; public: // Constructor @@ -721,13 +719,14 @@ void highlighter_t::color_node(const parse_node_t &node, highlight_spec_t color) } // node does not necessarily have type symbol_argument here. -void highlighter_t::color_argument(const parse_node_t &node) { - if (!node.has_source()) return; +void highlighter_t::color_argument(tnode_t node) { + auto source_range = node.source_range(); + if (!source_range) return; const wcstring arg_str = node.get_source(this->buff); // Get an iterator to the colors associated with the argument. - const size_t arg_start = node.source_start; + const size_t arg_start = source_range->start; const color_array_t::iterator arg_colors = color_array.begin() + arg_start; // Color this argument without concern for command substitutions. @@ -798,70 +797,58 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node return result; } -// Color all of the arguments of the given command. -void highlighter_t::color_arguments(const parse_node_t &list_node) { - // Hack: determine whether the parent is the cd command, so we can show errors for - // non-directories. +bool highlighter_t::is_cd(tnode_t stmt) const { bool cmd_is_cd = false; - if (this->io_ok) { - const parse_node_t *parent = this->parse_tree.get_parent(list_node, symbol_plain_statement); - if (parent != NULL) { - wcstring cmd_str; - if (plain_statement_get_expanded_command(this->buff, this->parse_tree, *parent, - &cmd_str)) { - cmd_is_cd = (cmd_str == L"cd"); - } + if (this->io_ok && stmt.has_source()) { + wcstring cmd_str; + if (plain_statement_get_expanded_command(this->buff, stmt, &cmd_str)) { + cmd_is_cd = (cmd_str == L"cd"); } } + return cmd_is_cd; +} +// Color all of the arguments of the given node list, which should be argument_list or +// argument_or_redirection_list. +void highlighter_t::color_arguments(const std::vector> &args, bool cmd_is_cd) { // Find all the arguments of this list. - const parse_node_tree_t::parse_node_list_t nodes = - this->parse_tree.find_nodes(list_node, symbol_argument); - - for (size_t i = 0; i < nodes.size(); i++) { - const parse_node_t *child = nodes.at(i); - assert(child != NULL && child->type == symbol_argument); - this->color_argument(*child); + for (tnode_t arg : args) { + this->color_argument(arg.child<0>()); if (cmd_is_cd) { // Mark this as an error if it's not 'help' and not a valid cd path. - wcstring param = child->get_source(this->buff); + wcstring param = arg.get_source(this->buff); if (expand_one(param, EXPAND_SKIP_CMDSUBST)) { bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); if (!is_help && this->io_ok && !is_potential_cd_path(param, working_directory, PATH_EXPAND_TILDE)) { - this->color_node(*child, highlight_spec_error); + this->color_node(arg, highlight_spec_error); } } } } } -void highlighter_t::color_redirection(const parse_node_t &redirection_node) { - assert(redirection_node.type == symbol_redirection); +void highlighter_t::color_redirection(tnode_t redirection_node) { if (!redirection_node.has_source()) return; - const parse_node_t *redirection_primitive = - this->parse_tree.get_child(redirection_node, 0, parse_token_type_redirection); // like 2> - const parse_node_t *redirection_target = this->parse_tree.get_child( - redirection_node, 1, parse_token_type_string); // like &1 or file path + tnode_t redir_prim = redirection_node.child<0>(); // like 2> + tnode_t redir_target = redirection_node.child<1>(); // like &1 or file path - if (redirection_primitive != NULL) { + if (redir_prim) { wcstring target; const enum token_type redirect_type = - this->parse_tree.type_for_redirection(redirection_node, this->buff, NULL, &target); + redirection_type(redirection_node, this->buff, nullptr, &target); // We may get a TOK_NONE redirection type, e.g. if the redirection is invalid. auto hl = redirect_type == TOK_NONE ? highlight_spec_error : highlight_spec_redirection; - this->color_node(*redirection_primitive, hl); + this->color_node(redir_prim, hl); // Check if the argument contains a command substitution. If so, highlight it as a param // even though it's a command redirection, and don't try to do any other validation. if (parse_util_locate_cmdsubst(target.c_str(), NULL, NULL, true) != 0) { - if (redirection_target != NULL) { - this->color_argument(*redirection_target); - } + this->color_argument(redir_target); } else { // No command substitution, so we can highlight the target file or fd. For example, // disallow redirections into a non-existent directory. @@ -950,20 +937,18 @@ void highlighter_t::color_redirection(const parse_node_t &redirection_node) { } } - if (redirection_target != NULL) { + if (redir_target) { auto hl = target_is_valid ? highlight_spec_redirection : highlight_spec_error; - this->color_node(*redirection_target, hl); + this->color_node(redir_target, hl); } } } } /// Color all of the redirections of the given command. -void highlighter_t::color_redirections(const parse_node_t &list_node) { - const parse_node_tree_t::parse_node_list_t nodes = - this->parse_tree.find_nodes(list_node, symbol_redirection); - for (size_t i = 0; i < nodes.size(); i++) { - this->color_redirection(*nodes.at(i)); +void highlighter_t::color_redirections(tnode_t list) { + for (const auto &node : list.descendants()) { + this->color_redirection(node); } } @@ -1043,10 +1028,7 @@ const highlighter_t::color_array_t &highlighter_t::highlight() { #endif // Walk the node tree. - for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); - ++iter) { - const parse_node_t &node = *iter; - + for (const parse_node_t &node : parse_tree) { switch (node.type) { // Color direct string descendants, e.g. 'for' and 'in'. case symbol_while_header: @@ -1062,27 +1044,23 @@ const highlighter_t::color_array_t &highlighter_t::highlight() { break; } case symbol_switch_statement: { - const parse_node_t *literal_switch = - this->parse_tree.get_child(node, 0, parse_token_type_string); - const parse_node_t *switch_arg = - this->parse_tree.get_child(node, 1, symbol_argument); - this->color_node(*literal_switch, highlight_spec_command); - this->color_node(*switch_arg, highlight_spec_param); + tnode_t switchn(&parse_tree, &node); + auto literal_switch = switchn.child<0>(); + auto switch_arg = switchn.child<1>(); + this->color_node(literal_switch, highlight_spec_command); + this->color_node(switch_arg, highlight_spec_param); break; } case symbol_for_header: { + tnode_t fhead(&parse_tree, &node); // Color the 'for' and 'in' as commands. - const parse_node_t *literal_for_node = - this->parse_tree.get_child(node, 0, parse_token_type_string); - const parse_node_t *literal_in_node = - this->parse_tree.get_child(node, 2, parse_token_type_string); - this->color_node(*literal_for_node, highlight_spec_command); - this->color_node(*literal_in_node, highlight_spec_command); + auto literal_for = fhead.child<0>(); + auto literal_in = fhead.child<2>(); + this->color_node(literal_for, highlight_spec_command); + this->color_node(literal_in, highlight_spec_command); // Color the variable name as a parameter. - const parse_node_t *var_name_node = - this->parse_tree.get_child(node, 1, parse_token_type_string); - this->color_argument(*var_name_node); + this->color_argument(fhead.child<1>()); break; } case parse_token_type_pipe: @@ -1093,14 +1071,14 @@ const highlighter_t::color_array_t &highlighter_t::highlight() { break; } case symbol_plain_statement: { + tnode_t stmt(&parse_tree, &node); // Get the decoration from the parent. - enum parse_statement_decoration_t decoration = - parse_tree.decoration_for_plain_statement(node); + enum parse_statement_decoration_t decoration = get_decoration(stmt); // Color the command. - const parse_node_t *cmd_node = - parse_tree.get_child(node, 0, parse_token_type_string); - if (cmd_node == NULL || !cmd_node->has_source()) { + tnode_t cmd_node = stmt.child<0>(); + maybe_t cmd = cmd_node.get_source(buff); + if (!cmd) { break; // not much as we can do without a node that has source text } @@ -1110,25 +1088,31 @@ const highlighter_t::color_array_t &highlighter_t::highlight() { is_valid_cmd = true; } else { // Check to see if the command is valid. - wcstring cmd(buff, cmd_node->source_start, cmd_node->source_length); - // Try expanding it. If we cannot, it's an error. bool expanded = expand_one( - cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (expanded && !has_expand_reserved(cmd)) { - is_valid_cmd = command_is_valid(cmd, decoration, working_directory, vars); + *cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); + if (expanded && !has_expand_reserved(*cmd)) { + is_valid_cmd = command_is_valid(*cmd, decoration, working_directory, vars); } } this->color_node(*cmd_node, is_valid_cmd ? highlight_spec_command : highlight_spec_error); break; } - case symbol_arguments_or_redirections_list: + // Only work on root lists, so that we don't re-color child lists. + case symbol_arguments_or_redirections_list: { + tnode_t list(&parse_tree, &node); + if (argument_list_is_root(list)) { + bool cmd_is_cd = is_cd(list.try_get_parent()); + this->color_arguments(list.descendants(), cmd_is_cd); + this->color_redirections(list); + } + break; + } case symbol_argument_list: { - // Only work on root lists, so that we don't re-color child lists. - if (parse_tree.argument_list_is_root(node)) { - this->color_arguments(node); - this->color_redirections(node); + tnode_t list(&parse_tree, &node); + if (argument_list_is_root(list)) { + this->color_arguments(list.descendants()); } break; } diff --git a/src/history.cpp b/src/history.cpp index 9dd70968c..8023a496a 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -36,12 +36,12 @@ #include "iothread.h" #include "lru.h" #include "parse_constants.h" -#include "parse_tree.h" #include "parse_util.h" #include "path.h" #include "reader.h" +#include "tnode.h" #include "wildcard.h" // IWYU pragma: keep -#include "wutil.h" // IWYU pragma: keep +#include "wutil.h" // IWYU pragma: keep // Our history format is intended to be valid YAML. Here it is: // @@ -1892,11 +1892,9 @@ void history_t::add_pending_with_file_detection(const wcstring &str) { bool impending_exit = false; parse_node_tree_t tree; parse_tree_from_string(str, parse_flag_none, &tree, NULL); - size_t count = tree.size(); path_list_t potential_paths; - for (size_t i = 0; i < count; i++) { - const parse_node_t &node = tree.at(i); + for (const parse_node_t &node : tree) { if (!node.has_source()) { continue; } @@ -1911,15 +1909,15 @@ void history_t::add_pending_with_file_detection(const wcstring &str) { // Hack hack hack - if the command is likely to trigger an exit, then don't do // background file detection, because we won't be able to write it to our history file // before we exit. - if (tree.decoration_for_plain_statement(node) == parse_statement_decoration_exec) { + if (get_decoration({&tree, &node}) == parse_statement_decoration_exec) { impending_exit = true; } - wcstring command; - tree.command_for_plain_statement(node, str, &command); - unescape_string_in_place(&command, UNESCAPE_DEFAULT); - if (command == L"exit" || command == L"reboot") { - impending_exit = true; + if (maybe_t command = command_for_plain_statement({&tree, &node}, str)) { + unescape_string_in_place(&*command, UNESCAPE_DEFAULT); + if (*command == L"exit" || *command == L"reboot") { + impending_exit = true; + } } } } diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index c3c552816..620873cce 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -36,17 +36,19 @@ #include "maybe.h" #include "parse_constants.h" #include "parse_execution.h" -#include "parse_tree.h" #include "parse_util.h" #include "parser.h" #include "path.h" #include "proc.h" #include "reader.h" +#include "tnode.h" #include "tokenizer.h" #include "util.h" #include "wildcard.h" #include "wutil.h" +namespace g = grammar; + /// These are the specific statement types that support redirections. static bool specific_statement_type_is_redirectable_block(const parse_node_t &node) { return node.type == symbol_block_statement || node.type == symbol_if_statement || @@ -64,13 +66,9 @@ static wcstring profiling_cmd_name_for_redirectable_block(const parse_node_t &no const size_t src_start = node.source_start; size_t src_len = node.source_length; - const parse_node_tree_t::parse_node_list_t statement_terminator_nodes = - tree.find_nodes(node, parse_token_type_end, 1); - if (!statement_terminator_nodes.empty()) { - const parse_node_t *term = statement_terminator_nodes.at(0); - assert(term->source_start >= src_start); - src_len = term->source_start - src_start; - } + auto term = tree.find_child(node); + assert(term.has_source() && term.source_range()->start >= src_start); + src_len = term.source_range()->start - src_start; wcstring result = wcstring(src, src_start, src_len); result.append(L"..."); @@ -87,12 +85,6 @@ wcstring parse_execution_context_t::get_source(const parse_node_t &node) const { return node.get_source(pstree->src); } -const parse_node_t *parse_execution_context_t::get_child(const parse_node_t &parent, - node_offset_t which, - parse_token_type_t expected_type) const { - return this->tree().get_child(parent, which, expected_type); -} - node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) const { // Get the offset of a node via pointer arithmetic, very hackish. const parse_node_t *addr = &node; @@ -104,9 +96,8 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co return offset; } -const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_job_list( - const parse_node_t &job_list, wcstring *out_func_name) const { - assert(job_list.type == symbol_job_list); +tnode_t parse_execution_context_t::infinite_recursive_statement_in_job_list( + tnode_t job_list, wcstring *out_func_name) const { // This is a bit fragile. It is a test to see if we are inside of function call, but not inside // a block in that function call. If, in the future, the rules for what block scopes are pushed // on function invocation changes, then this check will break. @@ -114,74 +105,58 @@ const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_j bool is_within_function_call = (current && parent && current->type() == TOP && parent->type() == FUNCTION_CALL); if (!is_within_function_call) { - return NULL; + return {}; } // Check to see which function call is forbidden. if (parser->forbidden_function.empty()) { - return NULL; + return {}; } const wcstring &forbidden_function_name = parser->forbidden_function.back(); // Get the first job in the job list. - const parse_node_t *first_job = tree().next_node_in_node_list(job_list, symbol_job, NULL); - if (first_job == NULL) { - return NULL; + auto first_job = job_list.next_in_list(); + if (!first_job) { + return {}; } // Here's the statement node we find that's infinite recursive. - const parse_node_t *infinite_recursive_statement = NULL; + tnode_t infinite_recursive_statement; - // Get the list of statements. - const parse_node_tree_t::parse_node_list_t statements = - tree().specific_statements_for_job(*first_job); - - // Find all the decorated statements. We are interested in statements with no decoration (i.e. - // not command, not builtin) whose command expands to the forbidden function. - for (size_t i = 0; i < statements.size(); i++) { - // We only care about decorated statements, not while statements, etc. - const parse_node_t &statement = *statements.at(i); - if (statement.type != symbol_decorated_statement) { - continue; - } - - const parse_node_t &plain_statement = tree().find_child(statement, symbol_plain_statement); - if (tree().decoration_for_plain_statement(plain_statement) != - parse_statement_decoration_none) { - // This statement has a decoration like 'builtin' or 'command', and therefore is not - // infinite recursion. In particular this is what enables 'wrapper functions'. - continue; - } - - // Ok, this is an undecorated plain statement. Get and expand its command. - wcstring cmd; - tree().command_for_plain_statement(plain_statement, pstree->src, &cmd); - - if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES, NULL) && - cmd == forbidden_function_name) { - // This is it. - infinite_recursive_statement = &statement; - if (out_func_name != NULL) { - *out_func_name = forbidden_function_name; + // Get the list of plain statements. + // Ignore statements with decorations like 'builtin' or 'command', since those + // are not infinite recursion. In particular that is what enables 'wrapper functions'. + tnode_t statement = first_job.child<0>(); + tnode_t continuation = first_job.child<1>(); + while (statement) { + tnode_t plain_statement = + statement.try_get_child() + .try_get_child(); + if (plain_statement) { + maybe_t cmd = command_for_plain_statement(plain_statement, pstree->src); + if (cmd && expand_one(*cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES, NULL) && + cmd == forbidden_function_name) { + // This is it. + infinite_recursive_statement = plain_statement; + if (out_func_name != NULL) { + *out_func_name = forbidden_function_name; + } + break; } - break; } + statement = continuation.next_in_list(); } - assert(infinite_recursive_statement == NULL || - infinite_recursive_statement->type == symbol_decorated_statement); return infinite_recursive_statement; } enum process_type_t parse_execution_context_t::process_type_for_command( - const parse_node_t &plain_statement, const wcstring &cmd) const { - assert(plain_statement.type == symbol_plain_statement); + tnode_t statement, const wcstring &cmd) const { enum process_type_t process_type = EXTERNAL; // Determine the process type, which depends on the statement decoration (command, builtin, // etc). - enum parse_statement_decoration_t decoration = - tree().decoration_for_plain_statement(plain_statement); + enum parse_statement_decoration_t decoration = get_decoration(statement); if (decoration == parse_statement_decoration_exec) { // Always exec. @@ -224,21 +199,17 @@ parse_execution_context_t::cancellation_reason(const block_t *block) const { } /// Return whether the job contains a single statement, of block type, with no redirections. -bool parse_execution_context_t::job_is_simple_block(const parse_node_t &job_node) const { - assert(job_node.type == symbol_job); - +bool parse_execution_context_t::job_is_simple_block(tnode_t job_node) const { // Must have one statement. - const parse_node_t &statement = *get_child(job_node, 0, symbol_statement); - const parse_node_t &specific_statement = *get_child(statement, 0); + tnode_t statement = job_node.child<0>(); + const parse_node_t &specific_statement = statement.get_child_node<0>(); if (!specific_statement_type_is_redirectable_block(specific_statement)) { // Not an appropriate block type. return false; } // Must be no pipes. - const parse_node_t &continuation = *get_child(job_node, 1, symbol_job_continuation); - if (continuation.child_count > 0) { - // Multiple statements in this job, so there's pipes involved. + if (job_node.child<1>().try_get_child()) { return false; } @@ -256,20 +227,18 @@ bool parse_execution_context_t::job_is_simple_block(const parse_node_t &job_node } parse_execution_result_t parse_execution_context_t::run_if_statement( - const parse_node_t &statement) { - assert(statement.type == symbol_if_statement); - + tnode_t statement) { // Push an if block. if_block_t *ib = parser->push_block(); - ib->node_offset = this->get_offset(statement); + ib->node_offset = this->get_offset(*statement); parse_execution_result_t result = parse_execution_success; // We have a sequence of if clauses, with a final else, resulting in a single job list that we // execute. - const parse_node_t *job_list_to_execute = NULL; - const parse_node_t *if_clause = get_child(statement, 0, symbol_if_clause); - const parse_node_t *else_clause = get_child(statement, 1, symbol_else_clause); + tnode_t job_list_to_execute; + tnode_t if_clause = statement.child<0>(); + tnode_t else_clause = statement.child<1>(); for (;;) { if (should_cancel_execution(ib)) { result = parse_execution_cancelled; @@ -277,10 +246,8 @@ parse_execution_result_t parse_execution_context_t::run_if_statement( } // An if condition has a job and a "tail" of andor jobs, e.g. "foo ; and bar; or baz". - assert(if_clause != NULL && else_clause != NULL); - const parse_node_t &condition_head = *get_child(*if_clause, 1, symbol_job); - const parse_node_t &condition_boolean_tail = - *get_child(*if_clause, 3, symbol_andor_job_list); + tnode_t condition_head = if_clause.child<1>(); + tnode_t condition_boolean_tail = if_clause.child<3>(); // Check the condition and the tail. We treat parse_execution_errored here as failure, in // accordance with historic behavior. @@ -293,32 +260,33 @@ parse_execution_result_t parse_execution_context_t::run_if_statement( if (take_branch) { // Condition succeeded. - job_list_to_execute = get_child(*if_clause, 4, symbol_job_list); + job_list_to_execute = if_clause.child<4>(); break; - } else if (else_clause->child_count == 0) { + } + auto else_cont = else_clause.try_get_child(); + if (!else_cont) { // 'if' condition failed, no else clause, return 0, we're done. - job_list_to_execute = NULL; proc_set_last_status(STATUS_CMD_OK); break; } else { // We have an 'else continuation' (either else-if or else). - const parse_node_t &else_cont = *get_child(*else_clause, 1, symbol_else_continuation); - const parse_node_t *maybe_if_clause = get_child(else_cont, 0); - if (maybe_if_clause && maybe_if_clause->type == symbol_if_clause) { + if (auto maybe_if_clause = else_cont.try_get_child()) { // it's an 'else if', go to the next one. if_clause = maybe_if_clause; - else_clause = get_child(else_cont, 1, symbol_else_clause); + else_clause = else_cont.try_get_child(); + assert(else_clause && "Expected to have an else clause"); } else { // It's the final 'else', we're done. - job_list_to_execute = get_child(else_cont, 1, symbol_job_list); + job_list_to_execute = else_cont.try_get_child(); + assert(job_list_to_execute && "Should have a job list"); break; } } } // Execute any job list we got. - if (job_list_to_execute != NULL) { - run_job_list(*job_list_to_execute, ib); + if (job_list_to_execute) { + run_job_list(job_list_to_execute, ib); } else { // No job list means no sucessful conditions, so return 0 (issue #1443). proc_set_last_status(STATUS_CMD_OK); @@ -337,10 +305,7 @@ parse_execution_result_t parse_execution_context_t::run_if_statement( } parse_execution_result_t parse_execution_context_t::run_begin_statement( - const parse_node_t &header, const parse_node_t &contents) { - assert(header.type == symbol_begin_header); - assert(contents.type == symbol_job_list); - + tnode_t header, tnode_t contents) { // Basic begin/end block. Push a scope block, run jobs, pop it scope_block_t *sb = parser->push_block(BEGIN); parse_execution_result_t ret = run_job_list(contents, sb); @@ -351,13 +316,12 @@ parse_execution_result_t parse_execution_context_t::run_begin_statement( // Define a function. parse_execution_result_t parse_execution_context_t::run_function_statement( - const parse_node_t &header, const parse_node_t &block_end_command) { - assert(header.type == symbol_function_header); - assert(block_end_command.type == symbol_end_command); - + tnode_t header, tnode_t block_end_command) { // Get arguments. - wcstring_list_t argument_list; - parse_execution_result_t result = this->determine_arguments(header, &argument_list, failglob); + wcstring_list_t arguments; + argument_node_list_t arg_nodes = header.descendants(); + parse_execution_result_t result = + this->expand_arguments_from_nodes(arg_nodes, &arguments, failglob); if (result != parse_execution_success) { return result; @@ -366,9 +330,10 @@ parse_execution_result_t parse_execution_context_t::run_function_statement( // The function definition extends from the end of the header to the function end. It's not // just the range of the contents because that loses comments - see issue #1710. assert(block_end_command.has_source()); - size_t contents_start = header.source_start + header.source_length; - size_t contents_end = - block_end_command.source_start; // 1 past the last character in the function definition + auto header_range = header.source_range(); + size_t contents_start = header_range->start + header_range->length; + size_t contents_end = block_end_command.source_range() + ->start; // 1 past the last character in the function definition assert(contents_end >= contents_start); // Swallow whitespace at both ends. @@ -384,8 +349,7 @@ parse_execution_result_t parse_execution_context_t::run_function_statement( wcstring(pstree->src, contents_start, contents_end - contents_start); int definition_line_offset = this->line_offset_of_character_at_offset(contents_start); io_streams_t streams(0); // no limit on the amount of output from builtin_function() - int err = - builtin_function(*parser, streams, argument_list, contents_str, definition_line_offset); + int err = builtin_function(*parser, streams, arguments, contents_str, definition_line_offset); proc_set_last_status(err); if (!streams.err.empty()) { @@ -397,42 +361,24 @@ parse_execution_result_t parse_execution_context_t::run_function_statement( } parse_execution_result_t parse_execution_context_t::run_block_statement( - const parse_node_t &statement) { - assert(statement.type == symbol_block_statement); - - const parse_node_t &block_header = - *get_child(statement, 0, symbol_block_header); // block header - const parse_node_t &header = - *get_child(block_header, 0); // specific header type (e.g. for loop) - const parse_node_t &contents = *get_child(statement, 1, symbol_job_list); // block contents + tnode_t statement) { + tnode_t bheader = statement.child<0>(); + tnode_t contents = statement.child<1>(); parse_execution_result_t ret = parse_execution_success; - switch (header.type) { - case symbol_for_header: { - ret = run_for_statement(header, contents); - break; - } - case symbol_while_header: { - ret = run_while_statement(header, contents); - break; - } - case symbol_function_header: { - const parse_node_t &function_end = *get_child( - statement, 2, symbol_end_command); // the 'end' associated with the block - ret = run_function_statement(header, function_end); - break; - } - case symbol_begin_header: { - ret = run_begin_statement(header, contents); - break; - } - default: { - debug(0, L"Unexpected block header: %ls\n", header.describe().c_str()); - PARSER_DIE(); - break; - } + if (auto header = bheader.try_get_child()) { + ret = run_for_statement(header, contents); + } else if (auto header = bheader.try_get_child()) { + ret = run_while_statement(header, contents); + } else if (auto header = bheader.try_get_child()) { + tnode_t func_end = statement.child<2>(); + ret = run_function_statement(header, func_end); + } else if (auto header = bheader.try_get_child()) { + ret = run_begin_statement(header, contents); + } else { + debug(0, L"Unexpected block header: %ls\n", bheader.node()->describe().c_str()); + PARSER_DIE(); } - return ret; } @@ -446,13 +392,10 @@ bool parse_execution_context_t::is_function_context() const { } parse_execution_result_t parse_execution_context_t::run_for_statement( - const parse_node_t &header, const parse_node_t &block_contents) { - assert(header.type == symbol_for_header); - assert(block_contents.type == symbol_job_list); - + tnode_t header, tnode_t block_contents) { // Get the variable name: `for var_name in ...`. We expand the variable name. It better result // in just one. - const parse_node_t &var_name_node = *get_child(header, 1, parse_token_type_string); + tnode_t var_name_node = header.child<1>(); wcstring for_var_name = get_source(var_name_node); if (!expand_one(for_var_name, 0, NULL)) { report_error(var_name_node, FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG, for_var_name.c_str()); @@ -460,8 +403,9 @@ parse_execution_result_t parse_execution_context_t::run_for_statement( } // Get the contents to iterate over. - wcstring_list_t argument_sequence; - parse_execution_result_t ret = this->determine_arguments(header, &argument_sequence, nullglob); + wcstring_list_t arguments; + parse_execution_result_t ret = this->expand_arguments_from_nodes( + get_argument_nodes(header.child<3>()), &arguments, nullglob); if (ret != parse_execution_success) { return ret; } @@ -480,14 +424,12 @@ parse_execution_result_t parse_execution_context_t::run_for_statement( for_block_t *fb = parser->push_block(); // Now drive the for loop. - const size_t arg_count = argument_sequence.size(); - for (size_t i = 0; i < arg_count; i++) { + for (const wcstring &val : arguments) { if (should_cancel_execution(fb)) { ret = parse_execution_cancelled; break; } - const wcstring &val = argument_sequence.at(i); int retval = env_set_one(for_var_name, ENV_DEFAULT | ENV_USER, val); assert(retval == ENV_OK && "for loop variable should have been successfully set"); (void)retval; @@ -512,21 +454,19 @@ parse_execution_result_t parse_execution_context_t::run_for_statement( } parse_execution_result_t parse_execution_context_t::run_switch_statement( - const parse_node_t &statement) { - assert(statement.type == symbol_switch_statement); - + tnode_t statement) { parse_execution_result_t result = parse_execution_success; // Get the switch variable. - const parse_node_t &switch_value_node = *get_child(statement, 1, symbol_argument); - const wcstring switch_value = get_source(switch_value_node); + tnode_t switch_value_n = statement.child<1>(); + const wcstring switch_value = get_source(switch_value_n); // Expand it. We need to offset any errors by the position of the string. std::vector switch_values_expanded; parse_error_list_t errors; int expand_ret = expand_string(switch_value, &switch_values_expanded, EXPAND_NO_DESCRIPTIONS, &errors); - parse_error_offset_source_start(&errors, switch_value_node.source_start); + parse_error_offset_source_start(&errors, switch_value_n.source_range()->start); switch (expand_ret) { case EXPAND_ERROR: { @@ -534,7 +474,7 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement( break; } case EXPAND_WILDCARD_NO_MATCH: { - result = report_unmatched_wildcard_error(switch_value_node); + result = report_unmatched_wildcard_error(switch_value_n); break; } case EXPAND_WILDCARD_MATCH: @@ -549,7 +489,7 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement( if (result == parse_execution_success && switch_values_expanded.size() != 1) { result = - report_error(switch_value_node, _(L"switch: Expected exactly one argument, got %lu\n"), + report_error(switch_value_n, _(L"switch: Expected exactly one argument, got %lu\n"), switch_values_expanded.size()); } @@ -562,37 +502,23 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement( switch_block_t *sb = parser->push_block(); // Expand case statements. - const parse_node_t *case_item_list = get_child(statement, 3, symbol_case_item_list); - - // Loop while we don't have a match but do have more of the list. - const parse_node_t *matching_case_item = NULL; - while (matching_case_item == NULL && case_item_list != NULL) { + tnode_t case_item_list = statement.child<3>(); + tnode_t matching_case_item{}; + while (auto case_item = case_item_list.next_in_list()) { if (should_cancel_execution(sb)) { result = parse_execution_cancelled; break; } - // Get the next item and the remainder of the list. - const parse_node_t *case_item = - tree().next_node_in_node_list(*case_item_list, symbol_case_item, &case_item_list); - if (case_item == NULL) { - // No more items. - break; - } - - // Pull out the argument list. - const parse_node_t &arg_list = *get_child(*case_item, 1, symbol_argument_list); - // Expand arguments. A case item list may have a wildcard that fails to expand to // anything. We also report case errors, but don't stop execution; i.e. a case item that // contains an unexpandable process will report and then fail to match. + auto arg_nodes = get_argument_nodes(case_item.child<1>()); wcstring_list_t case_args; parse_execution_result_t case_result = - this->determine_arguments(arg_list, &case_args, failglob); + this->expand_arguments_from_nodes(arg_nodes, &case_args, failglob); if (case_result == parse_execution_success) { - for (size_t i = 0; i < case_args.size(); i++) { - const wcstring &arg = case_args.at(i); - + for (const wcstring &arg : case_args) { // Unescape wildcards so they can be expanded again. wcstring unescaped_arg = parse_util_unescape_wildcards(arg); bool match = wildcard_match(switch_value_expanded, unescaped_arg); @@ -604,12 +530,14 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement( } } } + if (matching_case_item) break; } - if (result == parse_execution_success && matching_case_item != NULL) { + if (matching_case_item) { // Success, evaluate the job list. - const parse_node_t *job_list = get_child(*matching_case_item, 3, symbol_job_list); - result = this->run_job_list(*job_list, sb); + assert(result == parse_execution_success && "Expected success"); + auto job_list = matching_case_item.child<3>(); + result = this->run_job_list(job_list, sb); } parser->pop_block(sb); @@ -617,10 +545,7 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement( } parse_execution_result_t parse_execution_context_t::run_while_statement( - const parse_node_t &header, const parse_node_t &block_contents) { - assert(header.type == symbol_while_header); - assert(block_contents.type == symbol_job_list); - + tnode_t header, tnode_t contents) { // Push a while block. while_block_t *wb = parser->push_block(); wb->node_offset = this->get_offset(header); @@ -628,8 +553,8 @@ parse_execution_result_t parse_execution_context_t::run_while_statement( parse_execution_result_t ret = parse_execution_success; // The conditions of the while loop. - const parse_node_t &condition_head = *get_child(header, 1, symbol_job); - const parse_node_t &condition_boolean_tail = *get_child(header, 3, symbol_andor_job_list); + tnode_t condition_head = header.child<1>(); + tnode_t condition_boolean_tail = header.child<3>(); // Run while the condition is true. for (;;) { @@ -651,7 +576,7 @@ parse_execution_result_t parse_execution_context_t::run_while_statement( } // The block ought to go inside the loop (see issue #1212). - this->run_job_list(block_contents, wb); + this->run_job_list(contents, wb); if (this->cancellation_reason(wb) == execution_cancellation_loop_control) { // Handle break or continue. @@ -749,8 +674,7 @@ static wcstring reconstruct_orig_str(wcstring tokenized_str) { /// Handle the case of command not found. parse_execution_result_t parse_execution_context_t::handle_command_not_found( - const wcstring &cmd_str, const parse_node_t &statement_node, int err_code) { - assert(statement_node.type == symbol_plain_statement); + const wcstring &cmd_str, tnode_t statement, int err_code) { // We couldn't find the specified command. This is a non-fatal error. We want to set the exit // status to 127, which is the standard number used by other shells like bash and zsh. @@ -763,22 +687,20 @@ parse_execution_result_t parse_execution_context_t::handle_command_not_found( const wcstring name_str = wcstring(cmd, equals_ptr - cmd); // variable name, up to the = const wcstring val_str = wcstring(equals_ptr + 1); // variable value, past the = - const parse_node_tree_t::parse_node_list_t args = - tree().find_nodes(statement_node, symbol_argument, 1); - + auto args = statement.descendants(1); if (!args.empty()) { - const wcstring argument = get_source(*args.at(0)); + const wcstring argument = get_source(args.at(0)); wcstring ellipsis_str = wcstring(1, ellipsis_char); if (ellipsis_str == L"$") ellipsis_str = L"..."; // Looks like a command. - this->report_error(statement_node, ERROR_BAD_EQUALS_IN_COMMAND5, argument.c_str(), + this->report_error(statement, ERROR_BAD_EQUALS_IN_COMMAND5, argument.c_str(), name_str.c_str(), val_str.c_str(), argument.c_str(), ellipsis_str.c_str()); } else { wcstring assigned_val = reconstruct_orig_str(val_str); - this->report_error(statement_node, ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, name_str.c_str(), + this->report_error(statement, ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, name_str.c_str(), assigned_val.c_str()); } } else if (wcschr(cmd, L'$') || wcschr(cmd, VARIABLE_EXPAND_SINGLE) || @@ -787,17 +709,17 @@ parse_execution_result_t parse_execution_context_t::handle_command_not_found( _(L"Variables may not be used as commands. In fish, " L"please define a function or use 'eval %ls'."); wcstring eval_cmd = reconstruct_orig_str(cmd_str); - this->report_error(statement_node, msg, eval_cmd.c_str()); + this->report_error(statement, msg, eval_cmd.c_str()); } else if (err_code != ENOENT) { - this->report_error(statement_node, _(L"The file '%ls' is not executable by this user"), - cmd); + this->report_error(statement, _(L"The file '%ls' is not executable by this user"), cmd); } else { // Handle unrecognized commands with standard command not found handler that can make better // error messages. wcstring_list_t event_args; { + auto args = get_argument_nodes(statement.child<1>()); parse_execution_result_t arg_result = - this->determine_arguments(statement_node, &event_args, failglob); + this->expand_arguments_from_nodes(args, &event_args, failglob); if (arg_result != parse_execution_success) { return arg_result; @@ -809,7 +731,7 @@ parse_execution_result_t parse_execution_context_t::handle_command_not_found( event_fire_generic(L"fish_command_not_found", &event_args); // Here we want to report an error (so it shows a backtrace), but with no text. - this->report_error(statement_node, L""); + this->report_error(statement, L""); } // Set the last proc status appropriately. @@ -820,18 +742,15 @@ parse_execution_result_t parse_execution_context_t::handle_command_not_found( /// Creates a 'normal' (non-block) process. parse_execution_result_t parse_execution_context_t::populate_plain_process( - job_t *job, process_t *proc, const parse_node_t &statement) { + job_t *job, process_t *proc, tnode_t statement) { assert(job != NULL); assert(proc != NULL); - assert(statement.type == symbol_plain_statement); // We may decide that a command should be an implicit cd. bool use_implicit_cd = false; // Get the command. We expect to always get it here. - wcstring cmd; - bool got_cmd = tree().command_for_plain_statement(statement, pstree->src, &cmd); - assert(got_cmd); + wcstring cmd = *command_for_plain_statement(statement, pstree->src); // Expand it as a command. Return an error on failure. bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES, NULL); @@ -861,12 +780,10 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process( const int no_cmd_err_code = errno; // If the specified command does not exist, and is undecorated, try using an implicit cd. - if (!has_command && - tree().decoration_for_plain_statement(statement) == parse_statement_decoration_none) { + if (!has_command && get_decoration(statement) == parse_statement_decoration_none) { // Implicit cd requires an empty argument and redirection list. - const parse_node_t *args = - get_child(statement, 1, symbol_arguments_or_redirections_list); - if (args->child_count == 0) { + tnode_t args = statement.child<1>(); + if (!args.try_get_child()) { // Ok, no arguments or redirections; check to see if the first argument is a // directory. wcstring implicit_cd_path; @@ -894,15 +811,16 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process( } else { const globspec_t glob_behavior = (cmd == L"set" || cmd == L"count") ? nullglob : failglob; // Form the list of arguments. The command is the first argument. + argument_node_list_t arg_nodes = statement.descendants(); parse_execution_result_t arg_result = - this->determine_arguments(statement, &argument_list, glob_behavior); + this->expand_arguments_from_nodes(arg_nodes, &argument_list, glob_behavior); if (arg_result != parse_execution_success) { return arg_result; } argument_list.insert(argument_list.begin(), cmd); // The set of IO redirections that we construct for the process. - if (!this->determine_io_chain(statement, &process_io_chain)) { + if (!this->determine_io_chain(statement.child<1>(), &process_io_chain)) { return parse_execution_errored; } @@ -920,17 +838,14 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process( // Determine the list of arguments, expanding stuff. Reports any errors caused by expansion. If we // have a wildcard that could not be expanded, report the error and continue. -parse_execution_result_t parse_execution_context_t::determine_arguments( - const parse_node_t &parent, wcstring_list_t *out_arguments, globspec_t glob_behavior) { +parse_execution_result_t parse_execution_context_t::expand_arguments_from_nodes( + const argument_node_list_t &argument_nodes, wcstring_list_t *out_arguments, + globspec_t glob_behavior) { // Get all argument nodes underneath the statement. We guess we'll have that many arguments (but // may have more or fewer, if there are wildcards involved). - const parse_node_tree_t::parse_node_list_t argument_nodes = - tree().find_nodes(parent, symbol_argument); out_arguments->reserve(out_arguments->size() + argument_nodes.size()); std::vector arg_expanded; - for (size_t i = 0; i < argument_nodes.size(); i++) { - const parse_node_t &arg_node = *argument_nodes.at(i); - + for (const auto arg_node : argument_nodes) { // Expect all arguments to have source. assert(arg_node.has_source()); const wcstring arg_str = arg_node.get_source(pstree->src); @@ -939,7 +854,7 @@ parse_execution_result_t parse_execution_context_t::determine_arguments( parse_error_list_t errors; arg_expanded.clear(); int expand_ret = expand_string(arg_str, &arg_expanded, EXPAND_NO_DESCRIPTIONS, &errors); - parse_error_offset_source_start(&errors, arg_node.source_start); + parse_error_offset_source_start(&errors, arg_node.source_range()->start); switch (expand_ret) { case EXPAND_ERROR: { this->report_errors(errors); @@ -974,26 +889,18 @@ parse_execution_result_t parse_execution_context_t::determine_arguments( return parse_execution_success; } -bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement_node, +bool parse_execution_context_t::determine_io_chain(tnode_t node, io_chain_t *out_chain) { io_chain_t result; bool errored = false; - // We are called with a statement of varying types. We require that the statement have an - // arguments_or_redirections_list child. - const parse_node_t &args_and_redirections_list = - tree().find_child(statement_node, symbol_arguments_or_redirections_list); - // Get all redirection nodes underneath the statement. - const parse_node_tree_t::parse_node_list_t redirect_nodes = - tree().find_nodes(args_and_redirections_list, symbol_redirection); - for (size_t i = 0; i < redirect_nodes.size(); i++) { - const parse_node_t &redirect_node = *redirect_nodes.at(i); - + auto redirect_nodes = node.descendants(); + for (tnode_t redirect_node : redirect_nodes) { int source_fd = -1; // source fd wcstring target; // file path or target fd enum token_type redirect_type = - tree().type_for_redirection(redirect_node, pstree->src, &source_fd, &target); + redirection_type(redirect_node, pstree->src, &source_fd, &target); // PCA: I can't justify this EXPAND_SKIP_VARIABLES flag. It was like this when I got here. bool target_expanded = expand_one(target, no_exec ? EXPAND_SKIP_VARIABLES : 0, NULL); @@ -1053,11 +960,10 @@ bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement } parse_execution_result_t parse_execution_context_t::populate_boolean_process( - job_t *job, process_t *proc, const parse_node_t &bool_statement) { + job_t *job, process_t *proc, tnode_t bool_statement) { // Handle a boolean statement. bool skip_job = false; - assert(bool_statement.type == symbol_boolean_statement); - switch (parse_node_tree_t::statement_boolean_type(bool_statement)) { + switch (bool_statement_type(bool_statement)) { case parse_bool_and: { // AND. Skip if the last job failed. skip_job = (proc_get_last_status() != 0); @@ -1078,56 +984,62 @@ parse_execution_result_t parse_execution_context_t::populate_boolean_process( if (skip_job) { return parse_execution_skipped; } - const parse_node_t &subject = *tree().get_child(bool_statement, 1, symbol_statement); - return this->populate_job_process(job, proc, subject); + return this->populate_job_process(job, proc, + bool_statement.require_get_child()); } -parse_execution_result_t parse_execution_context_t::populate_block_process( - job_t *job, process_t *proc, const parse_node_t &statement_node) { +template +parse_execution_result_t parse_execution_context_t::populate_block_process(job_t *job, + process_t *proc, + tnode_t node) { // We handle block statements by creating INTERNAL_BLOCK_NODE, that will bounce back to us when // it's time to execute them. UNUSED(job); - assert(statement_node.type == symbol_block_statement || - statement_node.type == symbol_if_statement || - statement_node.type == symbol_switch_statement); + static_assert(Type::token == symbol_block_statement || Type::token == symbol_if_statement || + Type::token == symbol_switch_statement, + "Invalid block process"); // The set of IO redirections that we construct for the process. + // TODO: fix this ugly find_child. + auto arguments = node.template find_child(); io_chain_t process_io_chain; - bool errored = !this->determine_io_chain(statement_node, &process_io_chain); + bool errored = !this->determine_io_chain(arguments, &process_io_chain); if (errored) return parse_execution_errored; proc->type = INTERNAL_BLOCK_NODE; - proc->internal_block_node = this->get_offset(statement_node); + proc->internal_block_node = this->get_offset(node); proc->set_io_chain(process_io_chain); return parse_execution_success; } -// Returns a process_t allocated with new. It's the caller's responsibility to delete it (!). parse_execution_result_t parse_execution_context_t::populate_job_process( - job_t *job, process_t *proc, const parse_node_t &statement_node) { - assert(statement_node.type == symbol_statement); - assert(statement_node.child_count == 1); - + job_t *job, process_t *proc, tnode_t statement) { // Get the "specific statement" which is boolean / block / if / switch / decorated. - const parse_node_t &specific_statement = *get_child(statement_node, 0); + const parse_node_t &specific_statement = statement.get_child_node<0>(); parse_execution_result_t result = parse_execution_success; switch (specific_statement.type) { case symbol_boolean_statement: { - result = this->populate_boolean_process(job, proc, specific_statement); + result = this->populate_boolean_process(job, proc, {&tree(), &specific_statement}); break; } case symbol_block_statement: - case symbol_if_statement: - case symbol_switch_statement: { - result = this->populate_block_process(job, proc, specific_statement); + result = this->populate_block_process( + job, proc, tnode_t(&tree(), &specific_statement)); + break; + case symbol_if_statement: + result = this->populate_block_process( + job, proc, tnode_t(&tree(), &specific_statement)); + break; + case symbol_switch_statement: + result = this->populate_block_process( + job, proc, tnode_t(&tree(), &specific_statement)); break; - } case symbol_decorated_statement: { // Get the plain statement. It will pull out the decoration itself. - const parse_node_t &plain_statement = - tree().find_child(specific_statement, symbol_plain_statement); + tnode_t dec_stat{&tree(), &specific_statement}; + auto plain_statement = dec_stat.find_child(); result = this->populate_plain_process(job, proc, plain_statement); break; } @@ -1143,52 +1055,49 @@ parse_execution_result_t parse_execution_context_t::populate_job_process( } parse_execution_result_t parse_execution_context_t::populate_job_from_job_node( - job_t *j, const parse_node_t &job_node, const block_t *associated_block) { + job_t *j, tnode_t job_node, const block_t *associated_block) { UNUSED(associated_block); - assert(job_node.type == symbol_job); // Tell the job what its command is. j->set_command(get_source(job_node)); // We are going to construct process_t structures for every statement in the job. Get the first // statement. - const parse_node_t *statement_node = get_child(job_node, 0, symbol_statement); - assert(statement_node != NULL); + tnode_t statement = job_node.child<0>(); + assert(statement); parse_execution_result_t result = parse_execution_success; // Create processes. Each one may fail. process_list_t processes; processes.emplace_back(new process_t()); - result = this->populate_job_process(j, processes.back().get(), *statement_node); + result = this->populate_job_process(j, processes.back().get(), statement); // Construct process_ts for job continuations (pipelines), by walking the list until we hit the // terminal (empty) job continuation. - const parse_node_t *job_cont = get_child(job_node, 1, symbol_job_continuation); - assert(job_cont != NULL); - while (result == parse_execution_success && job_cont->child_count > 0) { - assert(job_cont->type == symbol_job_continuation); + tnode_t job_cont = job_node.child<1>(); + assert(job_cont); + while (auto pipe = job_cont.try_get_child()) { + if (result != parse_execution_success) { + break; + } + tnode_t statement = job_cont.require_get_child(); // Handle the pipe, whose fd may not be the obvious stdout. - const parse_node_t &pipe_node = *get_child(*job_cont, 0, parse_token_type_pipe); - int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); + int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe)); if (pipe_write_fd == -1) { - result = report_error(pipe_node, ILLEGAL_FD_ERR_MSG, get_source(pipe_node).c_str()); + result = report_error(pipe, ILLEGAL_FD_ERR_MSG, get_source(pipe).c_str()); break; } processes.back()->pipe_write_fd = pipe_write_fd; - // Get the statement node and make a process from it. - const parse_node_t *statement_node = get_child(*job_cont, 1, symbol_statement); - assert(statement_node != NULL); - // Store the new process (and maybe with an error). processes.emplace_back(new process_t()); - result = this->populate_job_process(j, processes.back().get(), *statement_node); + result = this->populate_job_process(j, processes.back().get(), statement); // Get the next continuation. - job_cont = get_child(*job_cont, 2, symbol_job_continuation); - assert(job_cont != NULL); + job_cont = job_cont.require_get_child(); + assert(job_cont); } // Inform our processes of who is first and last @@ -1204,7 +1113,7 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node( return result; } -parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t &job_node, +parse_execution_result_t parse_execution_context_t::run_1_job(tnode_t job_node, const block_t *associated_block) { if (should_cancel_execution(associated_block)) { return parse_execution_cancelled; @@ -1238,20 +1147,20 @@ parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t if (job_is_simple_block(job_node)) { parse_execution_result_t result = parse_execution_success; - const parse_node_t &statement = *get_child(job_node, 0, symbol_statement); - const parse_node_t &specific_statement = *get_child(statement, 0); + tnode_t statement = job_node.child<0>(); + const parse_node_t &specific_statement = statement.get_child_node<0>(); assert(specific_statement_type_is_redirectable_block(specific_statement)); switch (specific_statement.type) { case symbol_block_statement: { - result = this->run_block_statement(specific_statement); + result = this->run_block_statement({&tree(), &specific_statement}); break; } case symbol_if_statement: { - result = this->run_if_statement(specific_statement); + result = this->run_if_statement({&tree(), &specific_statement}); break; } case symbol_switch_statement: { - result = this->run_switch_statement(specific_statement); + result = this->run_switch_statement({&tree(), &specific_statement}); break; } default: { @@ -1283,7 +1192,7 @@ parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t (job_control_mode == JOB_CONTROL_ALL) || ((job_control_mode == JOB_CONTROL_INTERACTIVE) && shell_is_interactive())); - job->set_flag(JOB_FOREGROUND, !tree().job_should_be_backgrounded(job_node)); + job->set_flag(JOB_FOREGROUND, !job_node_is_background(job_node)); job->set_flag(JOB_TERMINAL, job->get_flag(JOB_CONTROL) && !is_event); @@ -1346,21 +1255,16 @@ parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t return parse_execution_success; } -parse_execution_result_t parse_execution_context_t::run_job_list(const parse_node_t &job_list_node, +template +parse_execution_result_t parse_execution_context_t::run_job_list(tnode_t job_list, const block_t *associated_block) { - assert(job_list_node.type == symbol_job_list || job_list_node.type == symbol_andor_job_list); + static_assert(Type::token == symbol_job_list || Type::token == symbol_andor_job_list, + "Not a job list"); parse_execution_result_t result = parse_execution_success; - const parse_node_t *job_list = &job_list_node; - while (job_list != NULL && !should_cancel_execution(associated_block)) { - assert(job_list->type == symbol_job_list || job_list_node.type == symbol_andor_job_list); - - // Try pulling out a job. - const parse_node_t *job = tree().next_node_in_node_list(*job_list, symbol_job, &job_list); - - if (job != NULL) { - result = this->run_1_job(*job, associated_block); - } + while (tnode_t job = job_list.template next_in_list()) { + if (should_cancel_execution(associated_block)) break; + result = this->run_1_job(job, associated_block); } // Returns the last job executed. @@ -1388,30 +1292,31 @@ parse_execution_result_t parse_execution_context_t::eval_node_at_offset( // the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE // execution (which does block statements, but never job lists). assert(offset == 0); + tnode_t job_list{&tree(), &node}; wcstring func_name; - const parse_node_t *infinite_recursive_node = - this->infinite_recursive_statement_in_job_list(node, &func_name); - if (infinite_recursive_node != NULL) { + auto infinite_recursive_node = + this->infinite_recursive_statement_in_job_list(job_list, &func_name); + if (infinite_recursive_node) { // We have an infinite recursion. - this->report_error(*infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, + this->report_error(infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str()); status = parse_execution_errored; } else { // No infinite recursion. - status = this->run_job_list(node, associated_block); + status = this->run_job_list(job_list, associated_block); } break; } case symbol_block_statement: { - status = this->run_block_statement(node); + status = this->run_block_statement({&tree(), &node}); break; } case symbol_if_statement: { - status = this->run_if_statement(node); + status = this->run_if_statement({&tree(), &node}); break; } case symbol_switch_statement: { - status = this->run_switch_statement(node); + status = this->run_switch_statement({&tree(), &node}); break; } default: { diff --git a/src/parse_execution.h b/src/parse_execution.h index f188524f8..b1553eddc 100644 --- a/src/parse_execution.h +++ b/src/parse_execution.h @@ -65,60 +65,62 @@ class parse_execution_context_t { /// Command not found support. parse_execution_result_t handle_command_not_found(const wcstring &cmd, - const parse_node_t &statement_node, + tnode_t statement, int err_code); // Utilities wcstring get_source(const parse_node_t &node) const; - const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, - parse_token_type_t expected_type = token_type_invalid) const; node_offset_t get_offset(const parse_node_t &node) const; - const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, - wcstring *out_func_name) const; + tnode_t infinite_recursive_statement_in_job_list( + tnode_t job_list, wcstring *out_func_name) const; bool is_function_context() const; /// Indicates whether a job is a simple block (one block, no redirections). - bool job_is_simple_block(const parse_node_t &node) const; + bool job_is_simple_block(tnode_t job) const; - enum process_type_t process_type_for_command(const parse_node_t &plain_statement, + enum process_type_t process_type_for_command(tnode_t statement, const wcstring &cmd) const; // These create process_t structures from statements. parse_execution_result_t populate_job_process(job_t *job, process_t *proc, - const parse_node_t &statement_node); - parse_execution_result_t populate_boolean_process(job_t *job, process_t *proc, - const parse_node_t &bool_statement); + tnode_t statement); + parse_execution_result_t populate_boolean_process( + job_t *job, process_t *proc, tnode_t bool_statement); parse_execution_result_t populate_plain_process(job_t *job, process_t *proc, - const parse_node_t &statement); + tnode_t statement); + + template parse_execution_result_t populate_block_process(job_t *job, process_t *proc, - const parse_node_t &statement_node); + tnode_t statement_node); // These encapsulate the actual logic of various (block) statements. - parse_execution_result_t run_block_statement(const parse_node_t &statement); - parse_execution_result_t run_for_statement(const parse_node_t &header, - const parse_node_t &contents); - parse_execution_result_t run_if_statement(const parse_node_t &statement); - parse_execution_result_t run_switch_statement(const parse_node_t &statement); - parse_execution_result_t run_while_statement(const parse_node_t &header, - const parse_node_t &contents); - parse_execution_result_t run_function_statement(const parse_node_t &header, - const parse_node_t &block_end_command); - parse_execution_result_t run_begin_statement(const parse_node_t &header, - const parse_node_t &contents); + parse_execution_result_t run_block_statement(tnode_t statement); + parse_execution_result_t run_for_statement(tnode_t header, + tnode_t contents); + parse_execution_result_t run_if_statement(tnode_t statement); + parse_execution_result_t run_switch_statement(tnode_t statement); + parse_execution_result_t run_while_statement(tnode_t statement, + tnode_t contents); + parse_execution_result_t run_function_statement(tnode_t header, + tnode_t block_end); + parse_execution_result_t run_begin_statement(tnode_t header, + tnode_t contents); enum globspec_t { failglob, nullglob }; - parse_execution_result_t determine_arguments(const parse_node_t &parent, - wcstring_list_t *out_arguments, - globspec_t glob_behavior); + using argument_node_list_t = std::vector>; + parse_execution_result_t expand_arguments_from_nodes(const argument_node_list_t &argument_nodes, + wcstring_list_t *out_arguments, + globspec_t glob_behavior); // Determines the IO chain. Returns true on success, false on error. - bool determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain); + bool determine_io_chain(tnode_t node, + io_chain_t *out_chain); - parse_execution_result_t run_1_job(const parse_node_t &job_node, - const block_t *associated_block); - parse_execution_result_t run_job_list(const parse_node_t &job_list_node, + parse_execution_result_t run_1_job(tnode_t job, const block_t *associated_block); + template + parse_execution_result_t run_job_list(tnode_t job_list_node, const block_t *associated_block); - parse_execution_result_t populate_job_from_job_node(job_t *j, const parse_node_t &job_node, + parse_execution_result_t populate_job_from_job_node(job_t *j, tnode_t job_node, const block_t *associated_block); // Returns the line number of the node at the given index, indexed from 0. Not const since it diff --git a/src/parse_grammar.h b/src/parse_grammar.h new file mode 100644 index 000000000..972b22f1a --- /dev/null +++ b/src/parse_grammar.h @@ -0,0 +1,353 @@ +// Programmatic representation of fish grammar +#ifndef FISH_PARSE_GRAMMAR_H +#define FISH_PARSE_GRAMMAR_H + +#include +#include +#include +#include "parse_constants.h" +#include "tokenizer.h" + +struct parse_token_t; +typedef uint8_t parse_node_tag_t; + +using parse_node_tag_t = uint8_t; +struct parse_token_t; +namespace grammar { + +using production_element_t = uint8_t; + +// Define primitive types. +template +struct primitive { + using type_tuple = std::tuple<>; + static constexpr parse_token_type_t token = Token; + static constexpr production_element_t element() { return Token; } +}; + +using tok_end = primitive; +using tok_string = primitive; +using tok_pipe = primitive; +using tok_background = primitive; +using tok_redirection = primitive; + +// Define keyword types. +template +struct keyword { + using type_tuple = std::tuple<>; + static constexpr parse_token_type_t token = parse_token_type_string; + static constexpr production_element_t element() { + // Convert a parse_keyword_t enum to a production_element_t enum. + return Keyword + LAST_TOKEN_OR_SYMBOL + 1; + } +}; + +// Define special types. +// Comments are not emitted as part of productions, but specially by the parser. +struct comment { + using type_tuple = std::tuple<>; + static constexpr parse_token_type_t token = parse_special_type_comment; +}; + +// Forward declare all the symbol types. +#define ELEM(T) struct T; +#include "parse_grammar_elements.inc" + +// A production is a sequence of production elements. +// +1 to hold the terminating token_type_invalid +template +using production_t = std::array; + +// This is an ugly hack to avoid ODR violations +// Given some type, return a pointer to its production. +template +const production_element_t *production_for() { + static constexpr auto prod = T::production; + return prod.data(); +} + +// Get some production element. +template +constexpr production_element_t element() { + return T::element(); +} + +// Template goo. +namespace detail { +template +struct tuple_contains; + +template +struct tuple_contains> : std::false_type {}; + +template +struct tuple_contains> : tuple_contains> {}; + +template +struct tuple_contains> : std::true_type {}; + +struct void_type { + using type = void; +}; + +// Support for checking whether the index N is valid for T::type_tuple. +template +static constexpr bool index_valid() { + return N < std::tuple_size::value; +} + +// Get the Nth type of T::type_tuple. +template +using tuple_element = std::tuple_element; + +// Get the Nth type of T::type_tuple, or void if N is out of bounds. +template +using tuple_element_or_void = + typename std::conditional(), tuple_element, void_type>::type::type; + +// Make a tuple by mapping the Nth item of a list of 'seq's. +template +struct tuple_nther { + // A tuple of the Nth types of tuples (or voids). + using type = std::tuple...>; +}; + +// Given a list of Options, each one a seq, check to see if any of them contain type Desired at +// index Index. +template +inline constexpr bool type_possible() { + using nths = typename tuple_nther::type; + return tuple_contains::value; +} +} // namespace detail + +// Partial specialization hack. +#define ELEM(T) \ + template <> \ + constexpr production_element_t element() { \ + return symbol_##T; \ + } +#include "parse_grammar_elements.inc" + +// Empty produces nothing. +struct empty { + using type_tuple = std::tuple<>; + static constexpr production_t<0> production = {{token_type_invalid}}; + static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, + parse_node_tag_t *) { + return production_for(); + } +}; + +// Sequence represents a list of (at least two) productions. +template +struct seq { + static constexpr production_t<1 + sizeof...(Ts)> production = { + {element(), element()..., token_type_invalid}}; + + using type_tuple = std::tuple; + + template + static constexpr bool type_possible() { + using element_t = detail::tuple_element_or_void; + return std::is_same::value; + } + + static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, + parse_node_tag_t *) { + return production_for(); + } +}; + +template +using produces_sequence = seq; + +// Ergonomic way to create a production for a single element. +template +using single = seq; + +template +using produces_single = single; + +// Alternative represents a choice. +struct alternative {}; + +// Following are the grammar productions. +#define BODY(T) static constexpr parse_token_type_t token = symbol_##T; + +#define DEF(T) struct T : public + +#define DEF_ALT(T) struct T : public alternative +#define ALT_BODY(T, ...) \ + BODY(T) \ + using type_tuple = std::tuple<>; \ + template \ + static constexpr bool type_possible() { \ + return detail::type_possible(); \ + } \ + static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \ + parse_node_tag_t *); + +// A job_list is a list of jobs, separated by semicolons or newlines +DEF_ALT(job_list) { + using normal = seq; + using empty_line = seq; + using empty = grammar::empty; + ALT_BODY(job_list, normal, empty_line, empty); +}; + +// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases +// like if statements, where we require a command). To represent "non-empty", we require a +// statement, followed by a possibly empty job_continuation, and then optionally a background +// specifier '&' +DEF(job) produces_sequence{BODY(job)}; + +DEF_ALT(job_continuation) { + using piped = seq; + using empty = grammar::empty; + ALT_BODY(job_continuation, piped, empty); +}; + +// A statement is a normal command, or an if / while / and etc +DEF_ALT(statement) { + using boolean = single; + using block = single; + using ifs = single; + using switchs = single; + using decorated = single; + ALT_BODY(statement, boolean, block, ifs, switchs, decorated); +}; + +// A block is a conditional, loop, or begin/end +DEF(if_statement) +produces_sequence{ + BODY(if_statement)}; + +DEF(if_clause) +produces_sequence, job, tok_end, andor_job_list, job_list>{ + BODY(if_clause)}; + +DEF_ALT(else_clause) { + using empty = grammar::empty; + using else_cont = seq, else_continuation>; + ALT_BODY(else_clause, empty, else_cont); +}; + +DEF_ALT(else_continuation) { + using else_if = seq; + using else_only = seq; + ALT_BODY(else_continuation, else_if, else_only); +}; + +DEF(switch_statement) +produces_sequence, argument, tok_end, case_item_list, end_command, + arguments_or_redirections_list>{BODY(switch_statement)}; + +DEF_ALT(case_item_list) { + using empty = grammar::empty; + using case_items = seq; + using blank_line = seq; + ALT_BODY(case_item_list, empty, case_items, blank_line); +}; + +DEF(case_item) produces_sequence, argument_list, tok_end, job_list> { + BODY(case_item); +}; + +DEF(block_statement) +produces_sequence{ + BODY(block_statement)}; + +DEF_ALT(block_header) { + using forh = single; + using whileh = single; + using funch = single; + using beginh = single; + ALT_BODY(block_header, forh, whileh, funch, beginh); +}; + +DEF(for_header) +produces_sequence, tok_string, keyword, argument_list, + tok_end> { + BODY(for_header); +}; + +DEF(while_header) +produces_sequence, job, tok_end, andor_job_list>{BODY(while_header)}; + +DEF(begin_header) produces_single>{BODY(begin_header)}; + +// Functions take arguments, and require at least one (the name). No redirections allowed. +DEF(function_header) +produces_sequence, argument, argument_list, tok_end>{ + BODY(function_header)}; + +// A boolean statement is AND or OR or NOT +DEF_ALT(boolean_statement) { + using ands = seq, statement>; + using ors = seq, statement>; + using nots = seq, statement>; + ALT_BODY(boolean_statement, ands, ors, nots); +}; + +// An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean +// statement. +DEF_ALT(andor_job_list) { + using empty = grammar::empty; + using andor_job = seq; + using empty_line = seq; + ALT_BODY(andor_job_list, empty, andor_job, empty_line); +}; + +// A decorated_statement is a command with a list of arguments_or_redirections, possibly with +// "builtin" or "command" or "exec" +DEF_ALT(decorated_statement) { + using plains = single; + using cmds = seq, plain_statement>; + using builtins = seq, plain_statement>; + using execs = seq, plain_statement>; + ALT_BODY(decorated_statement, plains, cmds, builtins, execs); +}; + +DEF(plain_statement) +produces_sequence{BODY(plain_statement)}; + +DEF_ALT(argument_list) { + using empty = grammar::empty; + using arg = seq; + ALT_BODY(argument_list, empty, arg); +}; + +DEF_ALT(arguments_or_redirections_list) { + using empty = grammar::empty; + using value = seq; + ALT_BODY(arguments_or_redirections_list, empty, value); +}; + +DEF_ALT(argument_or_redirection) { + using arg = single; + using redir = single; + ALT_BODY(argument_or_redirection, arg, redir); +}; + +DEF(argument) produces_single{BODY(argument)}; +DEF(redirection) produces_sequence{BODY(redirection)}; + +DEF_ALT(optional_background) { + using empty = grammar::empty; + using background = single; + ALT_BODY(optional_background, empty, background); +}; + +DEF(end_command) produces_single>{BODY(end_command)}; + +// A freestanding_argument_list is equivalent to a normal argument list, except it may contain +// TOK_END (newlines, and even semicolons, for historical reasons) +DEF_ALT(freestanding_argument_list) { + using empty = grammar::empty; + using arg = seq; + using semicolon = seq; + ALT_BODY(freestanding_argument_list, empty, arg, semicolon); +}; +} // namespace grammar +#endif diff --git a/src/parse_grammar_elements.inc b/src/parse_grammar_elements.inc new file mode 100644 index 000000000..6f38cf99d --- /dev/null +++ b/src/parse_grammar_elements.inc @@ -0,0 +1,32 @@ +// Define ELEM before including this file. +ELEM(job_list) +ELEM(job) +ELEM(job_continuation) +ELEM(statement) +ELEM(if_statement) +ELEM(if_clause) +ELEM(else_clause) +ELEM(else_continuation) +ELEM(switch_statement) +ELEM(case_item_list) +ELEM(case_item) +ELEM(block_statement) +ELEM(block_header) +ELEM(for_header) +ELEM(while_header) +ELEM(begin_header) +ELEM(function_header) +ELEM(boolean_statement) +ELEM(andor_job_list) +ELEM(decorated_statement) +ELEM(plain_statement) +ELEM(argument_list) +ELEM(arguments_or_redirections_list) +ELEM(argument_or_redirection) +ELEM(argument) +ELEM(redirection) +ELEM(optional_background) +ELEM(end_command) +ELEM(freestanding_argument_list) +#undef ELEM + diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp index 7a66f9d7d..20dd556a6 100644 --- a/src/parse_productions.cpp +++ b/src/parse_productions.cpp @@ -4,10 +4,12 @@ #include "common.h" #include "parse_constants.h" +#include "parse_grammar.h" #include "parse_productions.h" #include "parse_tree.h" using namespace parse_productions; +using namespace grammar; #define NO_PRODUCTION NULL @@ -21,40 +23,14 @@ using namespace parse_productions; // Productions are generally a static const array, and we return a pointer to the array (yes, // really). -#define RESOLVE(sym) \ - static const production_element_t *resolve_##sym( \ +#define RESOLVE(SYM) \ + const production_element_t *SYM::resolve( \ const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) -// This is a shorthand for symbols which always resolve to the same production sequence. Using this -// avoids repeating a lot of boilerplate code below. -#define RESOLVE_ONLY(sym, tokens...) \ - extern const production_element_t sym##_only[]; \ - static const production_element_t *resolve_##sym( \ - const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) { \ - UNUSED(token1); \ - UNUSED(token2); \ - UNUSED(out_tag); \ - return sym##_only; \ - } \ - const production_element_t sym##_only[] = {tokens, token_type_invalid} - -// Convert a parse_keyword_t enum to a parse_token_type_t enum. -#define KEYWORD(keyword) (keyword + LAST_TOKEN_OR_SYMBOL + 1) - -/// Helper macro to define a production sequence. Note that such sequences must always end with -/// enum `token_type_invalid`. -#define P(production_name, tokens...) \ - static const production_element_t production_name[] = {tokens, token_type_invalid} - -/// The empty production is used often enough it's worth definining once at module scope. -static const production_element_t empty[] = {token_type_invalid}; - /// A job_list is a list of jobs, separated by semicolons or newlines. RESOLVE(job_list) { UNUSED(token2); UNUSED(out_tag); - P(normal, symbol_job, symbol_job_list); - P(empty_line, parse_token_type_end, symbol_job_list); switch (token1.type) { case parse_token_type_string: { @@ -63,44 +39,38 @@ RESOLVE(job_list) { case parse_keyword_end: case parse_keyword_else: case parse_keyword_case: { - return empty; // end this job list + return production_for(); // end this job list } default: { - return normal; // normal string + return production_for(); // normal string } } } case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: { - return normal; + return production_for(); } case parse_token_type_end: { - return empty_line; + return production_for(); } case parse_token_type_terminate: { - return empty; // no more commands, just transition to empty + return production_for(); // no more commands, just transition to empty } default: { return NO_PRODUCTION; } } } -// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like -// if statements, where we require a command). To represent "non-empty", we require a statement, -// followed by a possibly empty job_continuation. -RESOLVE_ONLY(job, symbol_statement, symbol_job_continuation, symbol_optional_background); - RESOLVE(job_continuation) { UNUSED(token2); UNUSED(out_tag); - P(piped, parse_token_type_pipe, symbol_statement, symbol_job_continuation); switch (token1.type) { case parse_token_type_pipe: { - return piped; // pipe, continuation + return production_for(); // pipe, continuation } default: { - return empty; // not a pipe, no job continuation + return production_for(); // not a pipe, no job continuation } } } @@ -108,11 +78,6 @@ RESOLVE(job_continuation) { // A statement is a normal command, or an if / while / and etc. RESOLVE(statement) { UNUSED(out_tag); - P(boolean, symbol_boolean_statement); - P(block, symbol_block_statement); - P(ifs, symbol_if_statement); - P(switchs, symbol_switch_statement); - P(decorated, symbol_decorated_statement); // The only block-like builtin that takes any parameters is 'function' So go to decorated // statements if the subsequent token looks like '--'. The logic here is subtle: @@ -125,9 +90,9 @@ RESOLVE(statement) { // If we are a function, then look for help arguments. Otherwise, if the next token looks // like an option (starts with a dash), then parse it as a decorated statement. if (token1.keyword == parse_keyword_function && token2.is_help_argument) { - return decorated; + return production_for(); } else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) { - return decorated; + return production_for(); } // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. @@ -136,7 +101,7 @@ RESOLVE(statement) { (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) { - return decorated; + return production_for(); } } @@ -146,28 +111,28 @@ RESOLVE(statement) { case parse_keyword_and: case parse_keyword_or: case parse_keyword_not: { - return boolean; + return production_for(); } case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: { - return block; + return production_for(); } case parse_keyword_if: { - return ifs; + return production_for(); } case parse_keyword_else: { return NO_PRODUCTION; } case parse_keyword_switch: { - return switchs; + return production_for(); } case parse_keyword_end: { return NO_PRODUCTION; } // All other keywords fall through to decorated statement. - default: { return decorated; } + default: { return production_for(); } } break; } @@ -181,255 +146,201 @@ RESOLVE(statement) { } } -RESOLVE_ONLY(if_statement, symbol_if_clause, symbol_else_clause, symbol_end_command, - symbol_arguments_or_redirections_list); -RESOLVE_ONLY(if_clause, KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, - symbol_andor_job_list, symbol_job_list); - RESOLVE(else_clause) { UNUSED(token2); UNUSED(out_tag); - P(else_cont, KEYWORD(parse_keyword_else), symbol_else_continuation); switch (token1.keyword) { case parse_keyword_else: { - return else_cont; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } RESOLVE(else_continuation) { UNUSED(token2); UNUSED(out_tag); - P(elseif, symbol_if_clause, symbol_else_clause); - P(elseonly, parse_token_type_end, symbol_job_list); switch (token1.keyword) { case parse_keyword_if: { - return elseif; + return production_for(); } - default: { return elseonly; } + default: { return production_for(); } } } -RESOLVE_ONLY(switch_statement, KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end, - symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list); - RESOLVE(case_item_list) { UNUSED(token2); UNUSED(out_tag); - P(case_item, symbol_case_item, symbol_case_item_list); - P(blank_line, parse_token_type_end, symbol_case_item_list); if (token1.keyword == parse_keyword_case) - return case_item; + return production_for(); else if (token1.type == parse_token_type_end) - return blank_line; + return production_for(); else - return empty; + return production_for(); } -RESOLVE_ONLY(case_item, KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, - symbol_job_list); - RESOLVE(andor_job_list) { UNUSED(out_tag); - P(andor_job, symbol_job, symbol_andor_job_list); - P(empty_line, parse_token_type_end, symbol_andor_job_list); if (token1.type == parse_token_type_end) { - return empty_line; + return production_for(); } else if (token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) { // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and // --help' or a naked 'and', and not part of this list. if (token2.type == parse_token_type_string && !token2.is_help_argument) { - return andor_job; + return production_for(); } } // All other cases end the list. - return empty; + return production_for(); } RESOLVE(argument_list) { UNUSED(token2); UNUSED(out_tag); - P(arg, symbol_argument, symbol_argument_list); switch (token1.type) { case parse_token_type_string: { - return arg; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } RESOLVE(freestanding_argument_list) { UNUSED(token2); UNUSED(out_tag); - P(arg, symbol_argument, symbol_freestanding_argument_list); - P(semicolon, parse_token_type_end, symbol_freestanding_argument_list); switch (token1.type) { case parse_token_type_string: { - return arg; + return production_for(); } case parse_token_type_end: { - return semicolon; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } -RESOLVE_ONLY(block_statement, symbol_block_header, symbol_job_list, symbol_end_command, - symbol_arguments_or_redirections_list); - RESOLVE(block_header) { UNUSED(token2); UNUSED(out_tag); - P(forh, symbol_for_header); - P(whileh, symbol_while_header); - P(funch, symbol_function_header); - P(beginh, symbol_begin_header); switch (token1.keyword) { case parse_keyword_for: { - return forh; + return production_for(); } case parse_keyword_while: { - return whileh; + return production_for(); } case parse_keyword_function: { - return funch; + return production_for(); } case parse_keyword_begin: { - return beginh; + return production_for(); } default: { return NO_PRODUCTION; } } } -RESOLVE_ONLY(for_header, KEYWORD(parse_keyword_for), parse_token_type_string, - KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end); -RESOLVE_ONLY(while_header, KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end, - symbol_andor_job_list); -RESOLVE_ONLY(begin_header, KEYWORD(parse_keyword_begin)); -RESOLVE_ONLY(function_header, KEYWORD(parse_keyword_function), symbol_argument, - symbol_argument_list, parse_token_type_end); - // A boolean statement is AND or OR or NOT. RESOLVE(boolean_statement) { UNUSED(token2); - P(ands, KEYWORD(parse_keyword_and), symbol_statement); - P(ors, KEYWORD(parse_keyword_or), symbol_statement); - P(nots, KEYWORD(parse_keyword_not), symbol_statement); switch (token1.keyword) { case parse_keyword_and: { *out_tag = parse_bool_and; - return ands; + return production_for(); } case parse_keyword_or: { *out_tag = parse_bool_or; - return ors; + return production_for(); } case parse_keyword_not: { *out_tag = parse_bool_not; - return nots; + return production_for(); } default: { return NO_PRODUCTION; } } } RESOLVE(decorated_statement) { - P(plains, symbol_plain_statement); - P(cmds, KEYWORD(parse_keyword_command), symbol_plain_statement); - P(builtins, KEYWORD(parse_keyword_builtin), symbol_plain_statement); - P(execs, KEYWORD(parse_keyword_exec), symbol_plain_statement); // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the // second token is not a string, then this is a naked 'command' and we should execute it as // undecorated. if (token2.type != parse_token_type_string || token2.has_dash_prefix) { - return plains; + return production_for(); } switch (token1.keyword) { case parse_keyword_command: { *out_tag = parse_statement_decoration_command; - return cmds; + return production_for(); } case parse_keyword_builtin: { *out_tag = parse_statement_decoration_builtin; - return builtins; + return production_for(); } case parse_keyword_exec: { *out_tag = parse_statement_decoration_exec; - return execs; + return production_for(); } default: { *out_tag = parse_statement_decoration_none; - return plains; + return production_for(); } } } -RESOLVE_ONLY(plain_statement, parse_token_type_string, symbol_arguments_or_redirections_list); - RESOLVE(arguments_or_redirections_list) { UNUSED(token2); UNUSED(out_tag); - P(value, symbol_argument_or_redirection, symbol_arguments_or_redirections_list); switch (token1.type) { case parse_token_type_string: case parse_token_type_redirection: { - return value; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } RESOLVE(argument_or_redirection) { UNUSED(token2); UNUSED(out_tag); - P(arg, symbol_argument); - P(redir, symbol_redirection); switch (token1.type) { case parse_token_type_string: { - return arg; + return production_for(); } case parse_token_type_redirection: { - return redir; + return production_for(); } default: { return NO_PRODUCTION; } } } -RESOLVE_ONLY(argument, parse_token_type_string); -RESOLVE_ONLY(redirection, parse_token_type_redirection, parse_token_type_string); - RESOLVE(optional_background) { UNUSED(token2); - P(background, parse_token_type_background); switch (token1.type) { case parse_token_type_background: { *out_tag = parse_background; - return background; + return production_for(); } default: { *out_tag = parse_no_background; - return empty; + return production_for(); } } } -RESOLVE_ONLY(end_command, KEYWORD(parse_keyword_end)); - -#define TEST(sym) \ - case (symbol_##sym): \ - resolver = resolve_##sym; \ +#define TEST(SYM) \ + case (symbol_##SYM): \ + resolver = SYM::resolve; \ break; const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type, diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index 53f706393..b06bb54c2 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -17,6 +17,7 @@ #include "parse_productions.h" #include "parse_tree.h" #include "proc.h" +#include "tnode.h" #include "tokenizer.h" #include "wutil.h" // IWYU pragma: keep @@ -1231,27 +1232,6 @@ const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, return result; } -static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, - parse_token_type_t type, - parse_node_tree_t::parse_node_list_t *result, size_t max_count) { - if (result->size() < max_count) { - if (parent.type == type) result->push_back(&parent); - for (node_offset_t i = 0; i < parent.child_count; i++) { - const parse_node_t *child = tree.get_child(parent, i); - assert(child != NULL); - find_nodes_recursive(tree, *child, type, result, max_count); - } - } -} - -parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, - parse_token_type_t type, - size_t max_count) const { - parse_node_list_t result; - find_nodes_recursive(*this, parent, type, &result, max_count); - return result; -} - /// Return true if the given node has the proposed ancestor as an ancestor (or is itself that /// ancestor). static bool node_has_ancestor(const parse_node_tree_t &tree, const parse_node_t &node, @@ -1266,23 +1246,6 @@ static bool node_has_ancestor(const parse_node_tree_t &tree, const parse_node_t return node_has_ancestor(tree, tree.at(node.parent), proposed_ancestor); } -const parse_node_t *parse_node_tree_t::find_last_node_of_type(parse_token_type_t type, - const parse_node_t *parent) const { - const parse_node_t *result = NULL; - // Find nodes of the given type in the tree, working backwards. - size_t idx = this->size(); - while (idx--) { - const parse_node_t &node = this->at(idx); - bool expected_type = (node.type == type); - if (expected_type && (parent == NULL || node_has_ancestor(*this, node, *parent))) { - // The types match and it has the right parent. - result = &node; - break; - } - } - return result; -} - const parse_node_t *parse_node_tree_t::find_node_matching_source_location( parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const { const parse_node_t *result = NULL; @@ -1307,191 +1270,20 @@ const parse_node_t *parse_node_tree_t::find_node_matching_source_location( return result; } -bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const { - bool result = true; - assert(node.type == symbol_argument_list || node.type == symbol_arguments_or_redirections_list); - const parse_node_t *parent = this->get_parent(node); - if (parent != NULL) { - // We have a parent - check to make sure it's not another list! - result = parent->type != symbol_arguments_or_redirections_list && - parent->type != symbol_argument_list; - } - return result; -} - -enum parse_statement_decoration_t parse_node_tree_t::decoration_for_plain_statement( - const parse_node_t &node) const { - assert(node.type == symbol_plain_statement); - parse_statement_decoration_t decoration = parse_statement_decoration_none; - const parse_node_t *decorated_statement = this->get_parent(node, symbol_decorated_statement); - if (decorated_statement) { - decoration = static_cast(decorated_statement->tag); - } - return decoration; -} - -bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, const wcstring &src, - wcstring *out_cmd) const { - bool result = false; - assert(node.type == symbol_plain_statement); - const parse_node_t *cmd_node = this->get_child(node, 0, parse_token_type_string); - if (cmd_node != NULL && cmd_node->has_source()) { - out_cmd->assign(src, cmd_node->source_start, cmd_node->source_length); - result = true; - } else { - out_cmd->clear(); - } - return result; -} - -bool parse_node_tree_t::statement_is_in_pipeline(const parse_node_t &node, - bool include_first) const { - // Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. - // This checks if we are in the second or greater element in a pipeline; if we are the first - // element we treat this as false. This accepts a few statement types. - bool result = false; - const parse_node_t *ancestor = &node; - - // If we're given a plain statement, try to get its decorated statement parent. - if (ancestor && ancestor->type == symbol_plain_statement) - ancestor = this->get_parent(*ancestor, symbol_decorated_statement); - if (ancestor) ancestor = this->get_parent(*ancestor, symbol_statement); - if (ancestor) ancestor = this->get_parent(*ancestor); - - if (ancestor) { - if (ancestor->type == symbol_job_continuation) { - // Second or more in a pipeline. - result = true; - } else if (ancestor->type == symbol_job && include_first) { - // Check to see if we have a job continuation that's not empty. - const parse_node_t *continuation = - this->get_child(*ancestor, 1, symbol_job_continuation); - result = (continuation != NULL && continuation->child_count > 0); - } - } - - return result; -} - -enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, - const wcstring &src, int *out_fd, - wcstring *out_target) const { - assert(redirection_node.type == symbol_redirection); - enum token_type result = TOK_NONE; - const parse_node_t *redirection_primitive = - this->get_child(redirection_node, 0, parse_token_type_redirection); // like 2> - const parse_node_t *redirection_target = - this->get_child(redirection_node, 1, parse_token_type_string); // like &1 or file path - - if (redirection_primitive != NULL && redirection_primitive->has_source()) { - result = redirection_type_for_string(redirection_primitive->get_source(src), out_fd); - } - if (out_target != NULL) { - *out_target = redirection_target ? redirection_target->get_source(src) : L""; - } - return result; -} - -const parse_node_t *parse_node_tree_t::header_node_for_block_statement( - const parse_node_t &node) const { +const parse_node_t *parse_node_tree_t::find_last_node_of_type(parse_token_type_t type, + const parse_node_t *parent) const { const parse_node_t *result = NULL; - if (node.type == symbol_block_statement) { - const parse_node_t *block_header = this->get_child(node, 0, symbol_block_header); - if (block_header != NULL) { - result = this->get_child(*block_header, 0); + // Find nodes of the given type in the tree, working backwards. + size_t idx = this->size(); + while (idx--) { + const parse_node_t &node = this->at(idx); + bool expected_type = (node.type == type); + if (expected_type && (parent == NULL || node_has_ancestor(*this, node, *parent))) { + // The types match and it has the right parent. + result = &node; + break; } } return result; } -parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_job( - const parse_node_t &job) const { - assert(job.type == symbol_job); - parse_node_list_t result; - - // Initial statement (non-specific). - result.push_back(get_child(job, 0, symbol_statement)); - - // Our cursor variable. Walk over the list of continuations. - const parse_node_t *continuation = get_child(job, 1, symbol_job_continuation); - while (continuation != NULL && continuation->child_count > 0) { - result.push_back(get_child(*continuation, 1, symbol_statement)); - continuation = get_child(*continuation, 2, symbol_job_continuation); - } - - // Result now contains a list of statements. But we want a list of specific statements e.g. - // symbol_switch_statement. So replace them in-place in the vector. - for (size_t i = 0; i < result.size(); i++) { - const parse_node_t *statement = result.at(i); - assert(statement->type == symbol_statement); - result.at(i) = this->get_child(*statement, 0); - } - - return result; -} - -parse_node_tree_t::parse_node_list_t parse_node_tree_t::comment_nodes_for_node( - const parse_node_t &parent) const { - parse_node_list_t result; - if (parent.has_comments()) { - // Walk all our nodes, looking for comment nodes that have the given node as a parent. - for (size_t i = 0; i < this->size(); i++) { - const parse_node_t &potential_comment = this->at(i); - if (potential_comment.type == parse_special_type_comment && - this->get_parent(potential_comment) == &parent) { - result.push_back(&potential_comment); - } - } - } - return result; -} - -enum parse_bool_statement_type_t parse_node_tree_t::statement_boolean_type( - const parse_node_t &node) { - assert(node.type == symbol_boolean_statement); - return static_cast(node.tag); -} - -bool parse_node_tree_t::job_should_be_backgrounded(const parse_node_t &job) const { - assert(job.type == symbol_job); - const parse_node_t *opt_background = get_child(job, 2, symbol_optional_background); - return opt_background != NULL && opt_background->tag == parse_background; -} - -const parse_node_t *parse_node_tree_t::next_node_in_node_list( - const parse_node_t &node_list, parse_token_type_t entry_type, - const parse_node_t **out_list_tail) const { - parse_token_type_t list_type = node_list.type; - - // Paranoia - it doesn't make sense for a list type to contain itself. - assert(list_type != entry_type); - - const parse_node_t *list_cursor = &node_list; - const parse_node_t *list_entry = NULL; - - // Loop while we don't have an item but do have a list. Note that some nodes may contain - // nothing; e.g. job_list contains blank lines as a production. - while (list_entry == NULL && list_cursor != NULL) { - const parse_node_t *next_cursor = NULL; - - // Walk through the children. - for (node_offset_t i = 0; i < list_cursor->child_count; i++) { - const parse_node_t *child = this->get_child(*list_cursor, i); - if (child->type == entry_type) { - // This is the list entry. - list_entry = child; - } else if (child->type == list_type) { - // This is the next in the list. - next_cursor = child; - } - } - // Go to the next entry, even if it's NULL. - list_cursor = next_cursor; - } - - // Return what we got. - assert(list_cursor == NULL || list_cursor->type == list_type); - assert(list_entry == NULL || list_entry->type == entry_type); - if (out_list_tail != NULL) *out_list_tail = list_cursor; - return list_entry; -} diff --git a/src/parse_tree.h b/src/parse_tree.h index e08cf1364..6f48fcfb6 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -10,7 +10,9 @@ #include #include "common.h" +#include "maybe.h" #include "parse_constants.h" +#include "parse_grammar.h" #include "tokenizer.h" class parse_node_tree_t; @@ -137,6 +139,9 @@ class parse_node_t { } }; +template +class tnode_t; + /// The parse tree itself. class parse_node_tree_t : public std::vector { public: @@ -154,6 +159,9 @@ class parse_node_tree_t : public std::vector { // Find the first direct child of the given node of the given type. asserts on failure. const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const; + template + tnode_t find_child(const parse_node_t &parent) const; + // Get the node corresponding to the parent of the given node, or NULL if there is no such // child. If expected_type is provided, only returns the parent if it is of that type. Note the // asymmetry: get_child asserts since the children are known, but get_parent does not, since the @@ -161,51 +169,24 @@ class parse_node_tree_t : public std::vector { const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; - // Find all the nodes of a given type underneath a given node, up to max_count of them. - typedef std::vector parse_node_list_t; - parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, - size_t max_count = (size_t)(-1)) const; - - // Finds the last node of a given type underneath a given node, or NULL if it could not be - // found. If parent is NULL, this finds the last node in the tree of that type. - const parse_node_t *find_last_node_of_type(parse_token_type_t type, - const parse_node_t *parent = NULL) const; + // Finds the last node of a given type, or empty if it could not be found. If parent is NULL, + // this finds the last node in the tree of that type. + template + tnode_t find_last_node(const parse_node_t *parent = NULL) const; // Finds a node containing the given source location. If 'parent' is not NULL, it must be an // ancestor. const parse_node_t *find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const; - - // Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has - // a parent. - bool argument_list_is_root(const parse_node_t &node) const; - // Utilities - /// Given a plain statement, get the decoration (from the parent node), or none if there is no - /// decoration. - enum parse_statement_decoration_t decoration_for_plain_statement( - const parse_node_t &node) const; - - /// Given a plain statement, get the command by reference (from the child node). Returns true if - /// successful. Clears the command on failure. - bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, - wcstring *out_cmd) const; - - /// Given a plain statement, return true if the statement is part of a pipeline. If - /// include_first is set, the first command in a pipeline is considered part of it; otherwise - /// only the second or additional commands are. - bool statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; - - /// Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd). - enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, - wcstring *out_target) const; - - /// If the given node is a block statement, returns the header node (for_header, while_header, - /// begin_header, or function_header). Otherwise returns NULL. - const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const; + /// Given a node, return all of its comment nodes. + std::vector> comment_nodes_for_node(const parse_node_t &node) const; + private: + template + friend class tnode_t; /// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return /// the next element of the given type in that list, and the tail (by reference). Returns NULL /// if we've exhausted the list. @@ -213,18 +194,10 @@ class parse_node_tree_t : public std::vector { parse_token_type_t item_type, const parse_node_t **list_tail) const; - /// Given a job, return all of its statements. These are 'specific statements' (e.g. - /// symbol_decorated_statement, not symbol_statement). - parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; - - /// Given a node, return all of its comment nodes. - parse_node_list_t comment_nodes_for_node(const parse_node_t &node) const; - - /// Returns the boolean type for a boolean node. - static enum parse_bool_statement_type_t statement_boolean_type(const parse_node_t &node); - - /// Given a job, return whether it should be backgrounded, because it has a & specifier. - bool job_should_be_backgrounded(const parse_node_t &job) const; + // Finds the last node of a given type underneath a given node, or NULL if it could not be + // found. If parent is NULL, this finds the last node in the tree of that type. + const parse_node_t *find_last_node_of_type(parse_token_type_t type, + const parse_node_t *parent) const; }; /// The big entry point. Parse a string, attempting to produce a tree for the given goal type. diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 3ea58463c..86c49735d 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -17,8 +17,8 @@ #include "expand.h" #include "fallback.h" // IWYU pragma: keep #include "parse_constants.h" -#include "parse_tree.h" #include "parse_util.h" +#include "tnode.h" #include "tokenizer.h" #include "util.h" #include "wildcard.h" @@ -760,15 +760,13 @@ bool parse_util_argument_is_help(const wchar_t *s) { } /// Check if the first argument under the given node is --help. -static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, +static bool first_argument_is_help(tnode_t statement, const wcstring &src) { bool is_help = false; - const parse_node_tree_t::parse_node_list_t arg_nodes = - node_tree.find_nodes(node, symbol_argument, 1); + auto arg_nodes = get_argument_nodes(statement.child<1>()); if (!arg_nodes.empty()) { // Check the first argument only. - const parse_node_t &arg = *arg_nodes.at(0); - const wcstring first_arg_src = arg.get_source(src); + wcstring first_arg_src = arg_nodes.front().get_source(src); is_help = parse_util_argument_is_help(first_arg_src.c_str()); } return is_help; @@ -940,11 +938,11 @@ static parser_test_error_bits_t detect_dollar_cmdsub_errors(size_t arg_src_offse /// Test if this argument contains any errors. Detected errors include syntax errors in command /// substitutions, improperly escaped characters and improper use of the variable expansion /// operator. -parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t &node, +parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t node, const wcstring &arg_src, parse_error_list_t *out_errors) { - assert(node.type == symbol_argument); - + assert(node.has_source() && "argument has no source"); + auto source_start = node.source_range()->start; int err = 0; wchar_t *paran_begin, *paran_end; int do_loop = 1; @@ -956,7 +954,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t case -1: { err = 1; if (out_errors) { - append_syntax_error(out_errors, node.source_start, L"Mismatched parenthesis"); + append_syntax_error(out_errors, source_start, L"Mismatched parenthesis"); } return err; } @@ -979,7 +977,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t // Our command substitution produced error offsets relative to its source. Tweak the // offsets of the errors in the command substitution to account for both its offset // within the string, and the offset of the node. - size_t error_offset = cmd_sub_start + 1 + node.source_start; + size_t error_offset = cmd_sub_start + 1 + source_start; parse_error_offset_source_start(&subst_errors, error_offset); if (out_errors != NULL) { @@ -990,9 +988,8 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t // "" and (), and also we no longer have the source of the command substitution. // As an optimization, this is only necessary if the last character is a $. if (cmd_sub_start > 0 && working_copy.at(cmd_sub_start - 1) == L'$') { - err |= detect_dollar_cmdsub_errors(node.source_start, - working_copy.substr(0, cmd_sub_start), - subst, out_errors); + err |= detect_dollar_cmdsub_errors( + source_start, working_copy.substr(0, cmd_sub_start), subst, out_errors); } } break; @@ -1007,7 +1004,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t wcstring unesc; if (!unescape_string(working_copy, &unesc, UNESCAPE_SPECIAL)) { if (out_errors) { - append_syntax_error(out_errors, node.source_start, L"Invalid token '%ls'", + append_syntax_error(out_errors, source_start, L"Invalid token '%ls'", working_copy.c_str()); } return 1; @@ -1031,8 +1028,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t unesc.at(first_dollar - 1) == VARIABLE_EXPAND_SINGLE)) { first_dollar--; } - parse_util_expand_variable_error(unesc, node.source_start, first_dollar, - out_errors); + parse_util_expand_variable_error(unesc, source_start, first_dollar, out_errors); } } } @@ -1040,6 +1036,49 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const parse_node_t return err; } +/// Given that the job given by node should be backgrounded, return true if we detect any errors. +static bool detect_errors_in_backgrounded_job(const parse_node_tree_t &node_tree, + tnode_t job, + parse_error_list_t *parse_errors) { + auto source_range = job.source_range(); + if (!source_range) return false; + + bool errored = false; + // Disallow background in the following cases: + // foo & ; and bar + // foo & ; or bar + // if foo & ; end + // while foo & ; end + if (job.try_get_parent()) { + errored = append_syntax_error(parse_errors, source_range->start, + BACKGROUND_IN_CONDITIONAL_ERROR_MSG); + } else if (job.try_get_parent()) { + errored = append_syntax_error(parse_errors, source_range->start, + BACKGROUND_IN_CONDITIONAL_ERROR_MSG); + } else if (auto job_list = job.try_get_parent()) { + // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. + // Build the job list and then advance it by one. + auto first_job = job_list.next_in_list(); + assert(first_job == job && "Expected first job to be the node we found"); + (void)first_job; + // Try getting the next job as a boolean statement. + auto next_job = job_list.next_in_list(); + tnode_t next_stmt = next_job.child<0>(); + if (auto bool_stmt = next_stmt.try_get_child()) { + // The next job is indeed a boolean statement. + parse_bool_statement_type_t bool_type = bool_statement_type(bool_stmt); + if (bool_type == parse_bool_and) { // this is not allowed + errored = append_syntax_error(parse_errors, bool_stmt.source_range()->start, + BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and"); + } else if (bool_type == parse_bool_or) { // this is not allowed + errored = append_syntax_error(parse_errors, bool_stmt.source_range()->start, + BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or"); + } + } + } + return errored; +} + parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, bool allow_incomplete, @@ -1097,90 +1136,46 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // Verify no variable expansions. if (!errored) { - const size_t node_tree_size = node_tree.size(); - for (size_t i = 0; i < node_tree_size; i++) { - const parse_node_t &node = node_tree.at(i); + for (const parse_node_t &node : node_tree) { if (node.type == symbol_end_command && !node.has_source()) { // An 'end' without source is an unclosed block. has_unclosed_block = true; } else if (node.type == symbol_boolean_statement) { // 'or' and 'and' can be in a pipeline, as long as they're first. - parse_bool_statement_type_t type = parse_node_tree_t::statement_boolean_type(node); + tnode_t gbs{&node_tree, &node}; + parse_bool_statement_type_t type = bool_statement_type(gbs); if ((type == parse_bool_and || type == parse_bool_or) && - node_tree.statement_is_in_pipeline(node, false /* don't count first */)) { + statement_is_in_pipeline(gbs.try_get_parent(), + false /* don't count first */)) { errored = append_syntax_error(&parse_errors, node.source_start, EXEC_ERR_MSG, (type == parse_bool_and) ? L"and" : L"or"); } } else if (node.type == symbol_argument) { + tnode_t arg{&node_tree, &node}; const wcstring arg_src = node.get_source(buff_src); - res |= parse_util_detect_errors_in_argument(node, arg_src, &parse_errors); + res |= parse_util_detect_errors_in_argument(arg, arg_src, &parse_errors); } else if (node.type == symbol_job) { - if (node_tree.job_should_be_backgrounded(node)) { - // Disallow background in the following cases: - // - // foo & ; and bar - // foo & ; or bar - // if foo & ; end - // while foo & ; end - const parse_node_t *job_parent = node_tree.get_parent(node); - assert(job_parent != NULL); - switch (job_parent->type) { - case symbol_if_clause: - case symbol_while_header: { - assert(node_tree.get_child(*job_parent, 1) == &node); - errored = append_syntax_error(&parse_errors, node.source_start, - BACKGROUND_IN_CONDITIONAL_ERROR_MSG); - break; - } - case symbol_job_list: { - // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'. - assert(node_tree.get_child(*job_parent, 0) == &node); - const parse_node_t *next_job_list = - node_tree.get_child(*job_parent, 1, symbol_job_list); - assert(next_job_list != NULL); - const parse_node_t *next_job = - node_tree.next_node_in_node_list(*next_job_list, symbol_job, NULL); - if (next_job == NULL) { - break; - } - - const parse_node_t *next_statement = - node_tree.get_child(*next_job, 0, symbol_statement); - if (next_statement == NULL) { - break; - } - - const parse_node_t *spec_statement = - node_tree.get_child(*next_statement, 0); - if (!spec_statement || - spec_statement->type != symbol_boolean_statement) { - break; - } - - parse_bool_statement_type_t bool_type = - parse_node_tree_t::statement_boolean_type(*spec_statement); - if (bool_type == parse_bool_and) { // this is not allowed - errored = - append_syntax_error(&parse_errors, spec_statement->source_start, - BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and"); - } else if (bool_type == parse_bool_or) { // this is not allowed - errored = - append_syntax_error(&parse_errors, spec_statement->source_start, - BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or"); - } - break; - } - default: { break; } - } + // Disallow background in the following cases: + // + // foo & ; and bar + // foo & ; or bar + // if foo & ; end + // while foo & ; end + // If it's not a background job, nothing to do. + auto job = tnode_t{&node_tree, &node}; + if (job_node_is_background(job)) { + errored |= detect_errors_in_backgrounded_job(node_tree, job, &parse_errors); } } else if (node.type == symbol_plain_statement) { + using namespace grammar; + tnode_t pst{&node_tree, &node}; // In a few places below, we want to know if we are in a pipeline. - const bool is_in_pipeline = - node_tree.statement_is_in_pipeline(node, true /* count first */); + tnode_t st = + pst.try_get_parent().try_get_parent(); + const bool is_in_pipeline = statement_is_in_pipeline(st, true /* count first */); // We need to know the decoration. - const enum parse_statement_decoration_t decoration = - node_tree.decoration_for_plain_statement(node); + const enum parse_statement_decoration_t decoration = get_decoration(pst); // Check that we don't try to pipe through exec. if (is_in_pipeline && decoration == parse_statement_decoration_exec) { @@ -1188,8 +1183,8 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, L"exec"); } - wcstring command; - if (node_tree.command_for_plain_statement(node, buff_src, &command)) { + if (maybe_t mcommand = command_for_plain_statement(pst, buff_src)) { + wcstring command = std::move(*mcommand); // Check that we can expand the command. if (!expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS, @@ -1208,19 +1203,18 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // Check that we don't return from outside a function. But we allow it if it's // 'return --help'. if (!errored && command == L"return") { - const parse_node_t *ancestor = &node; bool found_function = false; - while (ancestor != NULL) { - const parse_node_t *possible_function_header = - node_tree.header_node_for_block_statement(*ancestor); - if (possible_function_header != NULL && - possible_function_header->type == symbol_function_header) { + for (const parse_node_t *ancestor = &node; ancestor != nullptr; + ancestor = node_tree.get_parent(*ancestor)) { + auto fh = tnode_t::try_create(&node_tree, ancestor) + .child<0>() + .try_get_child(); + if (fh) { found_function = true; break; } - ancestor = node_tree.get_parent(*ancestor); } - if (!found_function && !first_argument_is_help(node_tree, node, buff_src)) { + if (!found_function && !first_argument_is_help(pst, buff_src)) { errored = append_syntax_error(&parse_errors, node.source_start, INVALID_RETURN_ERR_MSG); } @@ -1233,38 +1227,26 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // This is a little funny because we can't tell if it's a 'for' or 'while' // loop from the ancestor alone; we need the header. That is, we hit a // block_statement, and have to check its header. - bool found_loop = false, end_search = false; - const parse_node_t *ancestor = &node; - while (ancestor != NULL && !end_search) { - const parse_node_t *loop_or_function_header = - node_tree.header_node_for_block_statement(*ancestor); - if (loop_or_function_header != NULL) { - switch (loop_or_function_header->type) { - case symbol_while_header: - case symbol_for_header: { - // This is a loop header, so we can break or continue. - found_loop = true; - end_search = true; - break; - } - case symbol_function_header: { - // This is a function header, so we cannot break or - // continue. We stop our search here. - found_loop = false; - end_search = true; - break; - } - default: { - // Most likely begin / end style block, which makes no - // difference. - break; - } - } + bool found_loop = false; + for (const parse_node_t *ancestor = &node; ancestor != nullptr; + ancestor = node_tree.get_parent(*ancestor)) { + tnode_t bh = + tnode_t::try_create(&node_tree, ancestor) + .child<0>(); + if (bh.try_get_child() || + bh.try_get_child()) { + // This is a loop header, so we can break or continue. + found_loop = true; + break; + } else if (bh.try_get_child()) { + // This is a function header, so we cannot break or + // continue. We stop our search here. + found_loop = false; + break; } - ancestor = node_tree.get_parent(*ancestor); } - if (!found_loop && !first_argument_is_help(node_tree, node, buff_src)) { + if (!found_loop && !first_argument_is_help(pst, buff_src)) { errored = append_syntax_error( &parse_errors, node.source_start, (command == L"break" ? INVALID_BREAK_ERR_MSG diff --git a/src/parse_util.h b/src/parse_util.h index 332737cbe..147971090 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -136,7 +136,8 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, /// operator. This does NOT currently detect unterminated quotes. class parse_node_t; parser_test_error_bits_t parse_util_detect_errors_in_argument( - const parse_node_t &node, const wcstring &arg_src, parse_error_list_t *out_errors = NULL); + tnode_t node, const wcstring &arg_src, + parse_error_list_t *out_errors = NULL); /// Given a string containing a variable expansion error, append an appropriate error to the errors /// list. The global_token_pos is the offset of the token in the larger source, and the dollar_pos diff --git a/src/parser.cpp b/src/parser.cpp index 37af8911a..4b91d5642 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -16,12 +16,12 @@ #include "intern.h" #include "parse_constants.h" #include "parse_execution.h" -#include "parse_tree.h" #include "parse_util.h" #include "parser.h" #include "proc.h" #include "reader.h" #include "sanity.h" +#include "tnode.h" #include "wutil.h" // IWYU pragma: keep class io_chain_t; @@ -329,20 +329,13 @@ void parser_t::expand_argument_list(const wcstring &arg_list_src, expand_flags_t return; } - // Get the root argument list. + // Get the root argument list and extract arguments from it. assert(!tree.empty()); //!OCLINT(multiple unary operator) - const parse_node_t *arg_list = &tree.at(0); - assert(arg_list->type == symbol_freestanding_argument_list); - - // Extract arguments from it. - while (arg_list != NULL) { - const parse_node_t *arg_node = - tree.next_node_in_node_list(*arg_list, symbol_argument, &arg_list); - if (arg_node != NULL) { - const wcstring arg_src = arg_node->get_source(arg_list_src); - if (expand_string(arg_src, output_arg_list, eflags, NULL) == EXPAND_ERROR) { - break; // failed to expand a string - } + tnode_t arg_list(&tree, &tree.at(0)); + while (auto arg = arg_list.next_in_list()) { + const wcstring arg_src = arg.get_source(arg_list_src); + if (expand_string(arg_src, output_arg_list, eflags, NULL) == EXPAND_ERROR) { + break; // failed to expand a string } } } @@ -742,20 +735,13 @@ bool parser_t::detect_errors_in_argument_list(const wcstring &arg_list_src, wcst } if (!errored) { - // Get the root argument list. + // Get the root argument list and extract arguments from it. assert(!tree.empty()); //!OCLINT(multiple unary operator) - const parse_node_t *arg_list = &tree.at(0); - assert(arg_list->type == symbol_freestanding_argument_list); - - // Extract arguments from it. - while (arg_list != NULL && !errored) { - const parse_node_t *arg_node = - tree.next_node_in_node_list(*arg_list, symbol_argument, &arg_list); - if (arg_node != NULL) { - const wcstring arg_src = arg_node->get_source(arg_list_src); - if (parse_util_detect_errors_in_argument(*arg_node, arg_src, &errors)) { - errored = true; - } + tnode_t arg_list(&tree, &tree.at(0)); + while (auto arg = arg_list.next_in_list()) { + const wcstring arg_src = arg.get_source(arg_list_src); + if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) { + errored = true; } } } diff --git a/src/reader.cpp b/src/reader.cpp index 806e3d6e7..ae22a64f5 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -62,7 +62,6 @@ #include "output.h" #include "pager.h" #include "parse_constants.h" -#include "parse_tree.h" #include "parse_util.h" #include "parser.h" #include "proc.h" @@ -70,6 +69,7 @@ #include "sanity.h" #include "screen.h" #include "signal.h" +#include "tnode.h" #include "tokenizer.h" #include "util.h" #include "wutil.h" // IWYU pragma: keep @@ -580,25 +580,25 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso &parse_tree, NULL); // Look for plain statements where the cursor is at the end of the command. - const parse_node_t *matching_cmd_node = NULL; - const size_t len = parse_tree.size(); - for (size_t i = 0; i < len; i++) { - const parse_node_t &node = parse_tree.at(i); - + using namespace grammar; + tnode_t matching_cmd_node; + for (const parse_node_t &node : parse_tree) { // Only interested in plain statements with source. if (node.type != symbol_plain_statement || !node.has_source()) continue; - // Skip decorated statements. - if (parse_tree.decoration_for_plain_statement(node) != parse_statement_decoration_none) - continue; - // Get the command node. Skip it if we can't or it has no source. - const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); - if (cmd_node == NULL || !cmd_node->has_source()) continue; + tnode_t statement(&parse_tree, &node); + tnode_t cmd_node = statement.child<0>(); + + // Skip decorated statements. + if (get_decoration(statement) != parse_statement_decoration_none) continue; + + auto msource = cmd_node.source_range(); + if (!msource) continue; // Now see if its source range contains our cursor, including at the end. - if (subcmd_cursor_pos >= cmd_node->source_start && - subcmd_cursor_pos <= cmd_node->source_start + cmd_node->source_length) { + if (subcmd_cursor_pos >= msource->start && + subcmd_cursor_pos <= msource->start + msource->length) { // Success! matching_cmd_node = cmd_node; break; @@ -607,17 +607,16 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso // Now if we found a command node, expand it. bool result = false; - if (matching_cmd_node != NULL) { - assert(matching_cmd_node->type == parse_token_type_string); - const wcstring token = matching_cmd_node->get_source(subcmd); + if (matching_cmd_node) { + const wcstring token = matching_cmd_node.get_source(subcmd); wcstring abbreviation; if (expand_abbreviation(token, &abbreviation)) { // There was an abbreviation! Replace the token in the full command. Maintain the // relative position of the cursor. if (output != NULL) { output->assign(cmdline); - output->replace(subcmd_offset + matching_cmd_node->source_start, - matching_cmd_node->source_length, abbreviation); + source_range_t r = *matching_cmd_node.source_range(); + output->replace(subcmd_offset + r.start, r.length, abbreviation); } result = true; } diff --git a/src/tnode.cpp b/src/tnode.cpp new file mode 100644 index 000000000..630309891 --- /dev/null +++ b/src/tnode.cpp @@ -0,0 +1,129 @@ +#include "tnode.h" + +const parse_node_t *parse_node_tree_t::next_node_in_node_list( + const parse_node_t &node_list, parse_token_type_t entry_type, + const parse_node_t **out_list_tail) const { + parse_token_type_t list_type = node_list.type; + + // Paranoia - it doesn't make sense for a list type to contain itself. + assert(list_type != entry_type); + + const parse_node_t *list_cursor = &node_list; + const parse_node_t *list_entry = NULL; + + // Loop while we don't have an item but do have a list. Note that some nodes may contain + // nothing; e.g. job_list contains blank lines as a production. + while (list_entry == NULL && list_cursor != NULL) { + const parse_node_t *next_cursor = NULL; + + // Walk through the children. + for (node_offset_t i = 0; i < list_cursor->child_count; i++) { + const parse_node_t *child = this->get_child(*list_cursor, i); + if (child->type == entry_type) { + // This is the list entry. + list_entry = child; + } else if (child->type == list_type) { + // This is the next in the list. + next_cursor = child; + } + } + // Go to the next entry, even if it's NULL. + list_cursor = next_cursor; + } + + // Return what we got. + assert(list_cursor == NULL || list_cursor->type == list_type); + assert(list_entry == NULL || list_entry->type == entry_type); + if (out_list_tail != NULL) *out_list_tail = list_cursor; + return list_entry; +} + +enum parse_statement_decoration_t get_decoration(tnode_t stmt) { + parse_statement_decoration_t decoration = parse_statement_decoration_none; + if (auto decorated_statement = stmt.try_get_parent()) { + decoration = static_cast(decorated_statement.tag()); + } + return decoration; +} + +enum parse_bool_statement_type_t bool_statement_type(tnode_t stmt) { + return static_cast(stmt.tag()); +} + +enum token_type redirection_type(tnode_t redirection, const wcstring &src, + int *out_fd, wcstring *out_target) { + assert(redirection && "redirection is missing"); + enum token_type result = TOK_NONE; + tnode_t prim = redirection.child<0>(); // like 2> + assert(prim && "expected to have primitive"); + + if (prim.has_source()) { + result = redirection_type_for_string(prim.get_source(src), out_fd); + } + if (out_target != NULL) { + tnode_t target = redirection.child<1>(); // like &1 or file path + *out_target = target ? target.get_source(src) : wcstring(); + } + return result; +} + +std::vector> parse_node_tree_t::comment_nodes_for_node( + const parse_node_t &parent) const { + std::vector> result; + if (parent.has_comments()) { + // Walk all our nodes, looking for comment nodes that have the given node as a parent. + for (size_t i = 0; i < this->size(); i++) { + const parse_node_t &potential_comment = this->at(i); + if (potential_comment.type == parse_special_type_comment && + this->get_parent(potential_comment) == &parent) { + result.emplace_back(this, &potential_comment); + } + } + } + return result; +} + +maybe_t command_for_plain_statement(tnode_t stmt, + const wcstring &src) { + tnode_t cmd = stmt.child<0>(); + if (cmd && cmd.has_source()) { + return cmd.get_source(src); + } + return none(); +} + +arguments_node_list_t get_argument_nodes(tnode_t list, size_t max) { + return list.descendants(max); +} + +arguments_node_list_t get_argument_nodes(tnode_t list, + size_t max) { + return list.descendants(max); +} + +bool job_node_is_background(tnode_t job) { + tnode_t bg = job.child<2>(); + return bg.tag() == parse_background; +} + +bool statement_is_in_pipeline(tnode_t st, bool include_first) { + using namespace grammar; + if (!st) { + return false; + } + + // If we're part of a job continuation, we're definitely in a pipeline. + if (st.try_get_parent()) { + return true; + } + + // If include_first is set, check if we're the beginning of a job, and if so, whether that job + // has a non-empty continuation. + if (include_first) { + tnode_t jc = st.try_get_parent().child<1>(); + if (jc.try_get_child()) { + return true; + } + } + return false; +} diff --git a/src/tnode.h b/src/tnode.h new file mode 100644 index 000000000..3c663ffc3 --- /dev/null +++ b/src/tnode.h @@ -0,0 +1,258 @@ +// Type-safe access to fish parse trees. +#ifndef FISH_TNODE_H +#define FISH_TNODE_H + +#include "parse_grammar.h" +#include "parse_tree.h" + +struct source_range_t { + uint32_t start; + uint32_t length; +}; + +// Check if a child type is possible for a parent type at a given index. +template +constexpr bool child_type_possible_at_index() { + return Parent::template type_possible(); +} + +// Check if a child type is possible for a parent type at any index. +// 5 is arbitrary and represents the longest production we have. +template +constexpr bool child_type_possible() { + return child_type_possible_at_index() || + child_type_possible_at_index() || + child_type_possible_at_index() || + child_type_possible_at_index() || + child_type_possible_at_index() || + child_type_possible_at_index(); +} + +/// tnode_t ("typed node") is type-safe access to a parse_tree. A tnode_t holds both a pointer to a +/// parse_node_tree_t and a pointer to a parse_node_t. (Note that the parse_node_tree_t is unowned; +/// the caller must ensure that the tnode does not outlive the tree. +/// +/// tnode_t is a lightweight value-type class. It ought to be passed by value. A tnode_t may also be +/// "missing", associated with a null parse_node_t pointer. operator bool() may be used to check if +/// a tnode_t is misisng. +/// +/// A tnode_t is parametrized by a grammar element, and uses the fish grammar to statically +/// type-check accesses to children and parents. Any particular tnode either corresponds to a +/// sequence (a single child) or an alternation (multiple possible children). A sequence may have +/// its children accessed directly via child(), which is templated on the index (and returns a +/// tnode of the proper type). Alternations may be disambiguated via try_get_child(), which returns +/// an empty child if the child has the wrong type, or require_get_child() which aborts if the child +/// has the wrong type. +template +class tnode_t { + /// The tree containing our node. + const parse_node_tree_t *tree = nullptr; + + /// The node in the tree + const parse_node_t *nodeptr = nullptr; + + // Helper to get a child type at a given index. + template + using child_at = typename std::tuple_element::type; + + public: + tnode_t() = default; + + tnode_t(const parse_node_tree_t *t, const parse_node_t *n) : tree(t), nodeptr(n) { + assert(t && "tree cannot be null in this constructor"); + assert((!n || n->type == Type::token) && "node has wrong type"); + } + + // Try to create a tnode from the given tree and parse node. + // Returns an empty node if the parse node is null, or has the wrong type. + static tnode_t try_create(const parse_node_tree_t *tree, const parse_node_t *node) { + assert(tree && "tree cannot be null"); + return tnode_t(tree, node && node->type == Type::token ? node : nullptr); + } + + /// Temporary conversion to parse_node_t to assist in migration. + /* implicit */ operator const parse_node_t &() const { + assert(nodeptr && "Empty tnode_t"); + return *nodeptr; + } + + /* implicit */ operator const parse_node_t *() const { return nodeptr; } + + /// \return the underlying (type-erased) node. + const parse_node_t *node() const { return nodeptr; } + + /// Check whether we're populated. + explicit operator bool() const { return nodeptr != nullptr; } + + bool operator==(const tnode_t &rhs) const { return tree == rhs.tree && nodeptr == rhs.nodeptr; } + + bool operator!=(const tnode_t &rhs) const { return !(*this == rhs); } + + bool has_source() const { return nodeptr && nodeptr->has_source(); } + + // return the tag, or 0 if missing. + parse_node_tag_t tag() const { return nodeptr ? nodeptr->tag : 0; } + + // return the number of children, or 0 if missing. + uint8_t child_count() const { return nodeptr ? nodeptr->child_count : 0; } + + maybe_t source_range() const { + if (!has_source()) return none(); + return source_range_t{nodeptr->source_start, nodeptr->source_length}; + } + + wcstring get_source(const wcstring &str) const { + assert(has_source() && "Source missing"); + return nodeptr->get_source(str); + } + + bool location_in_or_at_end_of_source_range(size_t loc) const { + return nodeptr && nodeptr->location_in_or_at_end_of_source_range(loc); + } + + static tnode_t find_node_matching_source_location(const parse_node_tree_t *tree, + size_t source_loc, + const parse_node_t *parent) { + assert(tree && "null tree"); + return tnode_t{tree, + tree->find_node_matching_source_location(Type::token, source_loc, parent)}; + } + + /// Type-safe access to a child at the given index. + template + tnode_t> child() const { + using child_type = child_at; + const parse_node_t *child = nullptr; + if (nodeptr) child = tree->get_child(*nodeptr, Index, child_type::token); + return tnode_t{tree, child}; + } + + /// Return a parse_node_t for a child. + /// This is used to disambiguate alts. + template + const parse_node_t &get_child_node() const { + assert(nodeptr && "receiver is missing in get_child_node"); + return *tree->get_child(*nodeptr, Index); + } + + /// If the child at the given index has the given type, return it; otherwise return an empty + /// child. Note this will refuse to compile if the child type is not possible. + /// This is used for e.g. alternations. + template + tnode_t try_get_child() const { + static_assert(child_type_possible_at_index(), + "Cannot contain a child of this type"); + const parse_node_t *child = nullptr; + if (nodeptr) child = &get_child_node(); + if (child && child->type == ChildType::token) return {tree, child}; + return {}; + } + + /// assert that this is not empty and that the child at index Index has the given type, then + /// return that child. Note this will refuse to compile if the child type is not possible. + template + tnode_t require_get_child() const { + assert(nodeptr && "receiver is missing in require_get_child()"); + auto result = try_get_child(); + assert(result && "require_get_child(): wrong child type"); + return result; + } + + /// Find the first direct child of the given node of the given type. asserts on failure. + template + tnode_t find_child() const { + assert(nodeptr && "receiver is missing in find_child()"); + tnode_t result{tree, &tree->find_child(*nodeptr, ChildType::token)}; + assert(result && "cannot find child"); + return result; + } + + /// Type-safe access to a node's parent. + /// If the parent exists and has type ParentType, return it. + /// Otherwise return a missing tnode. + template + tnode_t try_get_parent() const { + static_assert(child_type_possible(), "Parent cannot have us as a child"); + if (!nodeptr) return {}; + return {tree, tree->get_parent(*nodeptr, ParentType::token)}; + } + + /// Finds all descendants (up to max_count) under this node of the given type. + template + std::vector> descendants(size_t max_count = -1) const { + if (!nodeptr) return {}; + std::vector> result; + std::vector stack{nodeptr}; + while (!stack.empty() && result.size() < max_count) { + const parse_node_t *node = stack.back(); + if (node->type == DescendantType::token) result.emplace_back(tree, node); + stack.pop_back(); + node_offset_t index = node->child_count; + while (index--) { + stack.push_back(tree->get_child(*node, index)); + } + } + return result; + } + + /// Given that we are a list type, \return the next node of some Item in some node list, + /// adjusting 'this' to be the remainder of the list. + /// Returns an empty item on failure. + template + tnode_t next_in_list() { + if (!nodeptr) return {}; + const parse_node_t *next = + tree->next_node_in_node_list(*nodeptr, ItemType::token, &nodeptr); + return {tree, next}; + } +}; + +template +tnode_t parse_node_tree_t::find_child(const parse_node_t &parent) const { + return tnode_t(this, &this->find_child(parent, Type::token)); +} + +template +tnode_t parse_node_tree_t::find_last_node(const parse_node_t *parent) const { + return tnode_t(this, this->find_last_node_of_type(Type::token, parent)); +} + +/// Given a plain statement, get the command from the child node. Returns the command string on +/// success, none on failure. +maybe_t command_for_plain_statement(tnode_t stmt, + const wcstring &src); + +/// Return the decoration for a plain statement. +parse_statement_decoration_t get_decoration(tnode_t stmt); + +/// Return the type for a boolean statement. +enum parse_bool_statement_type_t bool_statement_type(tnode_t stmt); + +/// Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd). +enum token_type redirection_type(tnode_t redirection, const wcstring &src, + int *out_fd, wcstring *out_target); + +/// Return the arguments under an arguments_list or arguments_or_redirection_list +/// Do not return more than max. +using arguments_node_list_t = std::vector>; +arguments_node_list_t get_argument_nodes(tnode_t, size_t max = -1); +arguments_node_list_t get_argument_nodes(tnode_t, + size_t max = -1); + +/// Return whether the given job is background because it has a & symbol. +bool job_node_is_background(tnode_t); + +/// Return whether the statement is part of a pipeline. If include_first is set, the first command +/// in a pipeline is considered part of it; otherwise only the second or additional commands are. +bool statement_is_in_pipeline(tnode_t st, bool include_first); + +/// Check whether an argument_list is a root list. +inline bool argument_list_is_root(tnode_t list) { + return !list.try_get_parent(); +} + +inline bool argument_list_is_root(tnode_t list) { + return !list.try_get_parent(); +} + +#endif