#include "config.h" // IWYU pragma: keep #include #include "common.h" #include "parse_constants.h" #include "parse_productions.h" #include "parse_tree.h" using namespace parse_productions; #define NO_PRODUCTION NULL // Herein are encoded the productions for our LL2 fish grammar. // // Each symbol (e.g. symbol_job_list) has a corresponding function (e.g. resolve_job_lits). The // function accepts two tokens, representing the first and second lookahead, and returns returns a // production representing the rule, or NULL on error. There is also a tag value which is returned // by reference; the tag is a sort of node annotation. // // Productions are generally a static const array, and we return a pointer to the array (yes, // really). #define RESOLVE(sym) \ static const production_element_t *resolve_##sym( \ const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) // This is a shorthand for symbols which always resolve to the same production sequence. Using this // avoids repeating a lot of boilerplate code below. #define RESOLVE_ONLY(sym, tokens...) \ extern const production_element_t sym##_only[]; \ static const production_element_t *resolve_##sym( \ const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) { \ UNUSED(token1); \ UNUSED(token2); \ UNUSED(out_tag); \ return sym##_only; \ } \ const production_element_t sym##_only[] = {tokens, token_type_invalid} // Convert a parse_keyword_t enum to a parse_token_type_t enum. #define KEYWORD(keyword) (keyword + LAST_TOKEN_OR_SYMBOL + 1) /// Helper macro to define a production sequence. Note that such sequences must always end with /// enum `token_type_invalid`. #define P(production_name, tokens...) \ static const production_element_t production_name[] = {tokens, token_type_invalid} /// The empty production is used often enough it's worth definining once at module scope. static const production_element_t empty[] = {token_type_invalid}; /// A job_list is a list of jobs, separated by semicolons or newlines. RESOLVE(job_list) { UNUSED(token2); UNUSED(out_tag); P(normal, symbol_job, symbol_job_list); P(empty_line, parse_token_type_end, symbol_job_list); switch (token1.type) { case parse_token_type_string: { // Some keywords are special. switch (token1.keyword) { case parse_keyword_end: case parse_keyword_else: case parse_keyword_case: { return empty; // end this job list } default: { return normal; // normal string } } } case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: { return normal; } case parse_token_type_end: { return empty_line; } case parse_token_type_terminate: { return empty; // no more commands, just transition to empty } default: { return NO_PRODUCTION; } } } // A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like // if statements, where we require a command). To represent "non-empty", we require a statement, // followed by a possibly empty job_continuation. RESOLVE_ONLY(job, symbol_statement, symbol_job_continuation, symbol_optional_background); RESOLVE(job_continuation) { UNUSED(token2); UNUSED(out_tag); P(piped, parse_token_type_pipe, symbol_statement, symbol_job_continuation); switch (token1.type) { case parse_token_type_pipe: { return piped; // pipe, continuation } default: { return empty; // not a pipe, no job continuation } } } // A statement is a normal command, or an if / while / and etc. RESOLVE(statement) { UNUSED(out_tag); P(boolean, symbol_boolean_statement); P(block, symbol_block_statement); P(ifs, symbol_if_statement); P(switchs, symbol_switch_statement); P(decorated, symbol_decorated_statement); // The only block-like builtin that takes any parameters is 'function' So go to decorated // statements if the subsequent token looks like '--'. The logic here is subtle: // // If we are 'begin', then we expect to be invoked with no arguments. // If we are 'function', then we are a non-block if we are invoked with -h or --help // If we are anything else, we require an argument, so do the same thing if the subsequent token // is a statement terminator. if (token1.type == parse_token_type_string) { // If we are a function, then look for help arguments. Otherwise, if the next token looks // like an option (starts with a dash), then parse it as a decorated statement. if (token1.keyword == parse_keyword_function && token2.is_help_argument) { return decorated; } else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) { return decorated; } // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. // a "naked if". bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) { return decorated; } } switch (token1.type) { case parse_token_type_string: { switch (token1.keyword) { case parse_keyword_and: case parse_keyword_or: case parse_keyword_not: { return boolean; } case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: { return block; } case parse_keyword_if: { return ifs; } case parse_keyword_else: { return NO_PRODUCTION; } case parse_keyword_switch: { return switchs; } case parse_keyword_end: { return NO_PRODUCTION; } // All other keywords fall through to decorated statement. default: { return decorated; } } break; } case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: case parse_token_type_terminate: { return NO_PRODUCTION; } default: { return NO_PRODUCTION; } } } RESOLVE_ONLY(if_statement, symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list); RESOLVE_ONLY(if_clause, KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_andor_job_list, symbol_job_list); RESOLVE(else_clause) { UNUSED(token2); UNUSED(out_tag); P(else_cont, KEYWORD(parse_keyword_else), symbol_else_continuation); switch (token1.keyword) { case parse_keyword_else: { return else_cont; } default: { return empty; } } } RESOLVE(else_continuation) { UNUSED(token2); UNUSED(out_tag); P(elseif, symbol_if_clause, symbol_else_clause); P(elseonly, parse_token_type_end, symbol_job_list); switch (token1.keyword) { case parse_keyword_if: { return elseif; } default: { return elseonly; } } } RESOLVE_ONLY(switch_statement, KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end, symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list); RESOLVE(case_item_list) { UNUSED(token2); UNUSED(out_tag); P(case_item, symbol_case_item, symbol_case_item_list); P(blank_line, parse_token_type_end, symbol_case_item_list); if (token1.keyword == parse_keyword_case) return case_item; else if (token1.type == parse_token_type_end) return blank_line; else return empty; } RESOLVE_ONLY(case_item, KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list); RESOLVE(andor_job_list) { UNUSED(out_tag); P(andor_job, symbol_job, symbol_andor_job_list); P(empty_line, parse_token_type_end, symbol_andor_job_list); if (token1.type == parse_token_type_end) { return empty_line; } else if (token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) { // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and // --help' or a naked 'and', and not part of this list. if (token2.type == parse_token_type_string && !token2.is_help_argument) { return andor_job; } } // All other cases end the list. return empty; } RESOLVE(argument_list) { UNUSED(token2); UNUSED(out_tag); P(arg, symbol_argument, symbol_argument_list); switch (token1.type) { case parse_token_type_string: { return arg; } default: { return empty; } } } RESOLVE(freestanding_argument_list) { UNUSED(token2); UNUSED(out_tag); P(arg, symbol_argument, symbol_freestanding_argument_list); P(semicolon, parse_token_type_end, symbol_freestanding_argument_list); switch (token1.type) { case parse_token_type_string: { return arg; } case parse_token_type_end: { return semicolon; } default: { return empty; } } } RESOLVE_ONLY(block_statement, symbol_block_header, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list); RESOLVE(block_header) { UNUSED(token2); UNUSED(out_tag); P(forh, symbol_for_header); P(whileh, symbol_while_header); P(funch, symbol_function_header); P(beginh, symbol_begin_header); switch (token1.keyword) { case parse_keyword_for: { return forh; } case parse_keyword_while: { return whileh; } case parse_keyword_function: { return funch; } case parse_keyword_begin: { return beginh; } default: { return NO_PRODUCTION; } } } RESOLVE_ONLY(for_header, KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end); RESOLVE_ONLY(while_header, KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end, symbol_andor_job_list); RESOLVE_ONLY(begin_header, KEYWORD(parse_keyword_begin)); RESOLVE_ONLY(function_header, KEYWORD(parse_keyword_function), symbol_argument, symbol_argument_list, parse_token_type_end); // A boolean statement is AND or OR or NOT. RESOLVE(boolean_statement) { UNUSED(token2); P(ands, KEYWORD(parse_keyword_and), symbol_statement); P(ors, KEYWORD(parse_keyword_or), symbol_statement); P(nots, KEYWORD(parse_keyword_not), symbol_statement); switch (token1.keyword) { case parse_keyword_and: { *out_tag = parse_bool_and; return ands; } case parse_keyword_or: { *out_tag = parse_bool_or; return ors; } case parse_keyword_not: { *out_tag = parse_bool_not; return nots; } default: { return NO_PRODUCTION; } } } RESOLVE(decorated_statement) { P(plains, symbol_plain_statement); P(cmds, KEYWORD(parse_keyword_command), symbol_plain_statement); P(builtins, KEYWORD(parse_keyword_builtin), symbol_plain_statement); P(execs, KEYWORD(parse_keyword_exec), symbol_plain_statement); // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the // second token is not a string, then this is a naked 'command' and we should execute it as // undecorated. if (token2.type != parse_token_type_string || token2.has_dash_prefix) { return plains; } switch (token1.keyword) { case parse_keyword_command: { *out_tag = parse_statement_decoration_command; return cmds; } case parse_keyword_builtin: { *out_tag = parse_statement_decoration_builtin; return builtins; } case parse_keyword_exec: { *out_tag = parse_statement_decoration_exec; return execs; } default: { *out_tag = parse_statement_decoration_none; return plains; } } } RESOLVE_ONLY(plain_statement, parse_token_type_string, symbol_arguments_or_redirections_list); RESOLVE(arguments_or_redirections_list) { UNUSED(token2); UNUSED(out_tag); P(value, symbol_argument_or_redirection, symbol_arguments_or_redirections_list); switch (token1.type) { case parse_token_type_string: case parse_token_type_redirection: { return value; } default: { return empty; } } } RESOLVE(argument_or_redirection) { UNUSED(token2); UNUSED(out_tag); P(arg, symbol_argument); P(redir, symbol_redirection); switch (token1.type) { case parse_token_type_string: { return arg; } case parse_token_type_redirection: { return redir; } default: { return NO_PRODUCTION; } } } RESOLVE_ONLY(argument, parse_token_type_string); RESOLVE_ONLY(redirection, parse_token_type_redirection, parse_token_type_string); RESOLVE(optional_background) { UNUSED(token2); P(background, parse_token_type_background); switch (token1.type) { case parse_token_type_background: { *out_tag = parse_background; return background; } default: { *out_tag = parse_no_background; return empty; } } } RESOLVE_ONLY(end_command, KEYWORD(parse_keyword_end)); #define TEST(sym) \ case (symbol_##sym): \ resolver = resolve_##sym; \ break; const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, parse_node_tag_t *out_tag) { debug(5, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type), input1.describe().c_str()); // Fetch the function to resolve the list of productions. const production_element_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param) const parse_token_t &input2, //!OCLINT(unused param) parse_node_tag_t *out_tag) = //!OCLINT(unused param) NULL; switch (node_type) { TEST(job_list) TEST(job) TEST(statement) TEST(job_continuation) TEST(boolean_statement) TEST(block_statement) TEST(if_statement) TEST(if_clause) TEST(else_clause) TEST(else_continuation) TEST(switch_statement) TEST(decorated_statement) TEST(case_item_list) TEST(case_item) TEST(argument_list) TEST(freestanding_argument_list) TEST(block_header) TEST(for_header) TEST(while_header) TEST(begin_header) TEST(function_header) TEST(plain_statement) TEST(andor_job_list) TEST(arguments_or_redirections_list) TEST(argument_or_redirection) TEST(argument) TEST(redirection) TEST(optional_background) TEST(end_command) case parse_token_type_string: case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: case parse_token_type_end: case parse_token_type_terminate: { debug(0, "Terminal token type %ls passed to %s", token_type_description(node_type), __FUNCTION__); PARSER_DIE(); break; } case parse_special_type_parse_error: case parse_special_type_tokenizer_error: case parse_special_type_comment: { debug(0, "Special type %ls passed to %s\n", token_type_description(node_type), __FUNCTION__); PARSER_DIE(); break; } case token_type_invalid: { debug(0, "token_type_invalid passed to %s", __FUNCTION__); PARSER_DIE(); break; } } PARSE_ASSERT(resolver != NULL); const production_element_t *result = resolver(input1, input2, out_tag); if (result == NULL) { debug(5, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type), input1.describe().c_str(), __FUNCTION__); } return result; }