diff --git a/src/parse_grammar.h b/src/parse_grammar.h index 23e79bd79..243f77355 100644 --- a/src/parse_grammar.h +++ b/src/parse_grammar.h @@ -4,160 +4,269 @@ #include "parse_constants.h" #include "tokenizer.h" +#include +struct parse_token_t; +typedef uint8_t parse_node_tag_t; + +using parse_node_tag_t = uint8_t; +struct parse_token_t; namespace grammar { + using production_element_t = uint8_t; -// Forward declarations. +// Define primitive types. +template +struct primitive { + static constexpr production_element_t element() { return Token; } +}; + +using tok_end = primitive; +using tok_string = primitive; +using tok_pipe = primitive; +using tok_background = primitive; +using tok_redirection = primitive; + +// Define keyword types. +template +struct keyword { + static constexpr production_element_t element() { + // Convert a parse_keyword_t enum to a production_element_t enum. + return Keyword + LAST_TOKEN_OR_SYMBOL + 1; + } +}; + +// Forward declare all the symbol types. #define ELEM(T) struct T; #include "parse_grammar_elements.inc" + // A production is a sequence of production elements. -template -struct production_t { - production_element_t elems[count]; -}; +// +1 to hold the terminating token_type_invalid +template +using production_t = std::array; -template -struct prim {}; - -using tok_end = prim; -using tok_string = prim; - -template -struct keyword {}; - -// A producer holds various productions. +// This is an ugly hack to avoid ODR violations +// Given some type, return a pointer to its production. template -struct producer {}; +const production_element_t *production_for() { + static constexpr auto prod = T::production; + return prod.data(); +} + +// Get some production element. +template +constexpr production_element_t element() { + return T::element(); +} + +// Partial specialization hack. +#define ELEM(T) \ + template <> \ + constexpr production_element_t element() { \ + return symbol_##T; \ + } +#include "parse_grammar_elements.inc" // Empty produces nothing. -struct empty : public producer> {}; +struct empty { + static constexpr production_t<0> production = {{token_type_invalid}}; + static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, + parse_node_tag_t *) { + return production_for(); + } +}; -// Not sure if we need this. -template -struct single {}; +// Sequence represents a list of (at least two) productions. +template +struct seq { + static constexpr production_t<1 + sizeof...(Ts)> production = { + {element(), element()..., token_type_invalid}}; + static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, + parse_node_tag_t *) { + return production_for(); + } +}; -template -using produces_single = producer>; +template +using produces_sequence = seq; + +// Ergonomic way to create a production for a single element. +template +using single = seq; + +template +using produces_single = single; // Alternative represents a choice. -template -struct alternative {}; - -template -using produces_alternative = producer>; - -// Sequence represents a list of productions. -template -struct seq {}; - -template -using produces_sequence = producer>; +struct alternative { +}; // Following are the grammar productions. +#define BODY(T) #define DEF(T) struct T : public +#define DEF_ALT(T) struct T : public alternative +#define ALT_BODY(T) \ + BODY(T) \ + static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \ + parse_node_tag_t *); + // A job_list is a list of jobs, separated by semicolons or newlines -DEF(job_list) -produces_alternative, // - seq>{}; +DEF_ALT(job_list) { + using normal = seq; + using empty_line = seq; + using empty = grammar::empty; + ALT_BODY(job_list); +}; // A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases // like if statements, where we require a command). To represent "non-empty", we require a // statement, followed by a possibly empty job_continuation, and then optionally a background // specifier '&' -DEF(job) produces_sequence{}; +DEF(job) produces_sequence{BODY(job)}; -DEF(job_continuation) -produces_alternative, statement, job_continuation>>{}; +DEF_ALT(job_continuation) { + using piped = seq; + ALT_BODY(job_continuation); +}; // A statement is a normal command, or an if / while / and etc -DEF(statement) -produces_alternative{}; +DEF_ALT(statement) { + using boolean = single; + using block = single; + using ifs = single; + using switchs = single; + using decorated = single; + ALT_BODY(statement); +}; // A block is a conditional, loop, or begin/end DEF(if_statement) -produces_sequence{}; +produces_sequence{ + BODY(if_statement)}; DEF(if_clause) -produces_sequence, job, tok_end, andor_job_list, job_list>{}; +produces_sequence, job, tok_end, andor_job_list, job_list>{ + BODY(if_clause)}; -DEF(else_clause) produces_alternative, else_continuation>>{}; -DEF(else_continuation) -produces_alternative, // - seq>{}; +DEF_ALT(else_clause) { + using empty = grammar::empty; + using else_cont = seq, else_continuation>; + ALT_BODY(else_clause); +}; + +DEF_ALT(else_continuation) { + using else_if = seq; + using else_only = seq; + ALT_BODY(else_continuation); +}; DEF(switch_statement) produces_sequence, argument, tok_end, case_item_list, end_command, - arguments_or_redirections_list>{}; + arguments_or_redirections_list>{BODY(switch_statement)}; -DEF(case_item_list) -produces_alternative, // - seq>{}; +DEF_ALT(case_item_list) { + using empty = grammar::empty; + using case_items = seq; + using blank_line = seq; + ALT_BODY(case_item_list); +}; -DEF(case_item) produces_sequence, argument_list, tok_end, job_list>{}; +DEF(case_item) produces_sequence, argument_list, tok_end, job_list> { + BODY(case_item); +}; DEF(block_statement) produces_sequence{}; -DEF(block_header) produces_alternative{}; + +DEF_ALT(block_header) { + using forh = single; + using whileh = single; + using funch = single; + using beginh = single; + ALT_BODY(block_header); +}; DEF(for_header) produces_sequence, tok_string, keyword, argument_list, tok_end>{}; -DEF(while_header) produces_sequence, job, tok_end, andor_job_list>{}; +DEF(while_header) +produces_sequence, job, tok_end, andor_job_list>{BODY(while_header)}; -struct begin_header : produces_single> {}; +DEF(begin_header) produces_single>{BODY(begin_header)}; // Functions take arguments, and require at least one (the name). No redirections allowed. DEF(function_header) produces_sequence, argument, argument_list, tok_end>{}; // A boolean statement is AND or OR or NOT -DEF(boolean_statement) -produces_alternative, statement>, // - seq, statement>, // - seq, statement>>{}; +DEF_ALT(boolean_statement) { + using ands = seq, statement>; + using ors = seq, statement>; + using nots = seq, statement>; + ALT_BODY(boolean_statement); +}; + // An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean // statement. -DEF(andor_job_list) -produces_alternative, // - seq>{}; +DEF_ALT(andor_job_list) { + using empty = grammar::empty; + using andor_job = seq; + using empty_line = seq; + ALT_BODY(andor_job_list); +}; // A decorated_statement is a command with a list of arguments_or_redirections, possibly with // "builtin" or "command" or "exec" -DEF(decorated_statement) -produces_alternative, plain_statement>, // - seq, plain_statement>, // - seq, plain_statement>>{}; +DEF_ALT(decorated_statement) { + using plains = single; + using cmds = seq, plain_statement>; + using builtins = seq, plain_statement>; + using execs = seq, plain_statement>; + ALT_BODY(decorated_statement); +}; -DEF(plain_statement) produces_sequence{}; +DEF(plain_statement) +produces_sequence{BODY(plain_statement)}; -DEF(argument_list) produces_alternative>{}; -DEF(arguments_or_redirections_list) -produces_alternative>{}; +DEF_ALT(argument_list) { + using empty = grammar::empty; + using arg = seq; + ALT_BODY(argument_list); +}; -DEF(argument_or_redirection) produces_alternative{}; -DEF(argument) produces_single{}; -DEF(optional_background) produces_alternative>{}; -DEF(end_command) produces_single>{}; +DEF_ALT(arguments_or_redirections_list) { + using empty = grammar::empty; + using value = seq; + ALT_BODY(arguments_or_redirections_list); +}; + +DEF_ALT(argument_or_redirection) { + using arg = single; + using redir = single; + ALT_BODY(argument_or_redirection); +}; + +DEF(argument) produces_single{BODY(argument)}; +DEF(redirection) produces_sequence{BODY(redirection)}; + +DEF_ALT(optional_background) { + using empty = grammar::empty; + using background = single; + ALT_BODY(optional_background); +}; + +DEF(end_command) produces_single>{BODY(end_command)}; // A freestanding_argument_list is equivalent to a normal argument list, except it may contain // TOK_END (newlines, and even semicolons, for historical reasons) -DEF(freestanding_argument_list) -produces_alternative, // - seq>{}; +DEF_ALT(freestanding_argument_list) { + using empty = grammar::empty; + using arg = seq; + using semicolon = seq; + ALT_BODY(freestanding_argument_list); +}; } #endif diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp index ae0df3303..20dd556a6 100644 --- a/src/parse_productions.cpp +++ b/src/parse_productions.cpp @@ -9,6 +9,7 @@ #include "parse_tree.h" using namespace parse_productions; +using namespace grammar; #define NO_PRODUCTION NULL @@ -22,40 +23,14 @@ using namespace parse_productions; // Productions are generally a static const array, and we return a pointer to the array (yes, // really). -#define RESOLVE(sym) \ - static const production_element_t *resolve_##sym( \ +#define RESOLVE(SYM) \ + const production_element_t *SYM::resolve( \ const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) -// This is a shorthand for symbols which always resolve to the same production sequence. Using this -// avoids repeating a lot of boilerplate code below. -#define RESOLVE_ONLY(sym, tokens...) \ - extern const production_element_t sym##_only[]; \ - static const production_element_t *resolve_##sym( \ - const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag) { \ - UNUSED(token1); \ - UNUSED(token2); \ - UNUSED(out_tag); \ - return sym##_only; \ - } \ - const production_element_t sym##_only[] = {tokens, token_type_invalid} - -// Convert a parse_keyword_t enum to a parse_token_type_t enum. -#define KEYWORD(keyword) (keyword + LAST_TOKEN_OR_SYMBOL + 1) - -/// Helper macro to define a production sequence. Note that such sequences must always end with -/// enum `token_type_invalid`. -#define P(production_name, tokens...) \ - static const production_element_t production_name[] = {tokens, token_type_invalid} - -/// The empty production is used often enough it's worth definining once at module scope. -static const production_element_t empty[] = {token_type_invalid}; - /// A job_list is a list of jobs, separated by semicolons or newlines. RESOLVE(job_list) { UNUSED(token2); UNUSED(out_tag); - P(normal, symbol_job, symbol_job_list); - P(empty_line, parse_token_type_end, symbol_job_list); switch (token1.type) { case parse_token_type_string: { @@ -64,44 +39,38 @@ RESOLVE(job_list) { case parse_keyword_end: case parse_keyword_else: case parse_keyword_case: { - return empty; // end this job list + return production_for(); // end this job list } default: { - return normal; // normal string + return production_for(); // normal string } } } case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_type_background: { - return normal; + return production_for(); } case parse_token_type_end: { - return empty_line; + return production_for(); } case parse_token_type_terminate: { - return empty; // no more commands, just transition to empty + return production_for(); // no more commands, just transition to empty } default: { return NO_PRODUCTION; } } } -// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like -// if statements, where we require a command). To represent "non-empty", we require a statement, -// followed by a possibly empty job_continuation. -RESOLVE_ONLY(job, symbol_statement, symbol_job_continuation, symbol_optional_background); - RESOLVE(job_continuation) { UNUSED(token2); UNUSED(out_tag); - P(piped, parse_token_type_pipe, symbol_statement, symbol_job_continuation); switch (token1.type) { case parse_token_type_pipe: { - return piped; // pipe, continuation + return production_for(); // pipe, continuation } default: { - return empty; // not a pipe, no job continuation + return production_for(); // not a pipe, no job continuation } } } @@ -109,11 +78,6 @@ RESOLVE(job_continuation) { // A statement is a normal command, or an if / while / and etc. RESOLVE(statement) { UNUSED(out_tag); - P(boolean, symbol_boolean_statement); - P(block, symbol_block_statement); - P(ifs, symbol_if_statement); - P(switchs, symbol_switch_statement); - P(decorated, symbol_decorated_statement); // The only block-like builtin that takes any parameters is 'function' So go to decorated // statements if the subsequent token looks like '--'. The logic here is subtle: @@ -126,9 +90,9 @@ RESOLVE(statement) { // If we are a function, then look for help arguments. Otherwise, if the next token looks // like an option (starts with a dash), then parse it as a decorated statement. if (token1.keyword == parse_keyword_function && token2.is_help_argument) { - return decorated; + return production_for(); } else if (token1.keyword != parse_keyword_function && token2.has_dash_prefix) { - return decorated; + return production_for(); } // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. @@ -137,7 +101,7 @@ RESOLVE(statement) { (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) { - return decorated; + return production_for(); } } @@ -147,28 +111,28 @@ RESOLVE(statement) { case parse_keyword_and: case parse_keyword_or: case parse_keyword_not: { - return boolean; + return production_for(); } case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: { - return block; + return production_for(); } case parse_keyword_if: { - return ifs; + return production_for(); } case parse_keyword_else: { return NO_PRODUCTION; } case parse_keyword_switch: { - return switchs; + return production_for(); } case parse_keyword_end: { return NO_PRODUCTION; } // All other keywords fall through to decorated statement. - default: { return decorated; } + default: { return production_for(); } } break; } @@ -182,255 +146,201 @@ RESOLVE(statement) { } } -RESOLVE_ONLY(if_statement, symbol_if_clause, symbol_else_clause, symbol_end_command, - symbol_arguments_or_redirections_list); -RESOLVE_ONLY(if_clause, KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, - symbol_andor_job_list, symbol_job_list); - RESOLVE(else_clause) { UNUSED(token2); UNUSED(out_tag); - P(else_cont, KEYWORD(parse_keyword_else), symbol_else_continuation); switch (token1.keyword) { case parse_keyword_else: { - return else_cont; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } RESOLVE(else_continuation) { UNUSED(token2); UNUSED(out_tag); - P(elseif, symbol_if_clause, symbol_else_clause); - P(elseonly, parse_token_type_end, symbol_job_list); switch (token1.keyword) { case parse_keyword_if: { - return elseif; + return production_for(); } - default: { return elseonly; } + default: { return production_for(); } } } -RESOLVE_ONLY(switch_statement, KEYWORD(parse_keyword_switch), symbol_argument, parse_token_type_end, - symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list); - RESOLVE(case_item_list) { UNUSED(token2); UNUSED(out_tag); - P(case_item, symbol_case_item, symbol_case_item_list); - P(blank_line, parse_token_type_end, symbol_case_item_list); if (token1.keyword == parse_keyword_case) - return case_item; + return production_for(); else if (token1.type == parse_token_type_end) - return blank_line; + return production_for(); else - return empty; + return production_for(); } -RESOLVE_ONLY(case_item, KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, - symbol_job_list); - RESOLVE(andor_job_list) { UNUSED(out_tag); - P(andor_job, symbol_job, symbol_andor_job_list); - P(empty_line, parse_token_type_end, symbol_andor_job_list); if (token1.type == parse_token_type_end) { - return empty_line; + return production_for(); } else if (token1.keyword == parse_keyword_and || token1.keyword == parse_keyword_or) { // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and // --help' or a naked 'and', and not part of this list. if (token2.type == parse_token_type_string && !token2.is_help_argument) { - return andor_job; + return production_for(); } } // All other cases end the list. - return empty; + return production_for(); } RESOLVE(argument_list) { UNUSED(token2); UNUSED(out_tag); - P(arg, symbol_argument, symbol_argument_list); switch (token1.type) { case parse_token_type_string: { - return arg; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } RESOLVE(freestanding_argument_list) { UNUSED(token2); UNUSED(out_tag); - P(arg, symbol_argument, symbol_freestanding_argument_list); - P(semicolon, parse_token_type_end, symbol_freestanding_argument_list); switch (token1.type) { case parse_token_type_string: { - return arg; + return production_for(); } case parse_token_type_end: { - return semicolon; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } -RESOLVE_ONLY(block_statement, symbol_block_header, symbol_job_list, symbol_end_command, - symbol_arguments_or_redirections_list); - RESOLVE(block_header) { UNUSED(token2); UNUSED(out_tag); - P(forh, symbol_for_header); - P(whileh, symbol_while_header); - P(funch, symbol_function_header); - P(beginh, symbol_begin_header); switch (token1.keyword) { case parse_keyword_for: { - return forh; + return production_for(); } case parse_keyword_while: { - return whileh; + return production_for(); } case parse_keyword_function: { - return funch; + return production_for(); } case parse_keyword_begin: { - return beginh; + return production_for(); } default: { return NO_PRODUCTION; } } } -RESOLVE_ONLY(for_header, KEYWORD(parse_keyword_for), parse_token_type_string, - KEYWORD(parse_keyword_in), symbol_argument_list, parse_token_type_end); -RESOLVE_ONLY(while_header, KEYWORD(parse_keyword_while), symbol_job, parse_token_type_end, - symbol_andor_job_list); -RESOLVE_ONLY(begin_header, KEYWORD(parse_keyword_begin)); -RESOLVE_ONLY(function_header, KEYWORD(parse_keyword_function), symbol_argument, - symbol_argument_list, parse_token_type_end); - // A boolean statement is AND or OR or NOT. RESOLVE(boolean_statement) { UNUSED(token2); - P(ands, KEYWORD(parse_keyword_and), symbol_statement); - P(ors, KEYWORD(parse_keyword_or), symbol_statement); - P(nots, KEYWORD(parse_keyword_not), symbol_statement); switch (token1.keyword) { case parse_keyword_and: { *out_tag = parse_bool_and; - return ands; + return production_for(); } case parse_keyword_or: { *out_tag = parse_bool_or; - return ors; + return production_for(); } case parse_keyword_not: { *out_tag = parse_bool_not; - return nots; + return production_for(); } default: { return NO_PRODUCTION; } } } RESOLVE(decorated_statement) { - P(plains, symbol_plain_statement); - P(cmds, KEYWORD(parse_keyword_command), symbol_plain_statement); - P(builtins, KEYWORD(parse_keyword_builtin), symbol_plain_statement); - P(execs, KEYWORD(parse_keyword_exec), symbol_plain_statement); // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the // second token is not a string, then this is a naked 'command' and we should execute it as // undecorated. if (token2.type != parse_token_type_string || token2.has_dash_prefix) { - return plains; + return production_for(); } switch (token1.keyword) { case parse_keyword_command: { *out_tag = parse_statement_decoration_command; - return cmds; + return production_for(); } case parse_keyword_builtin: { *out_tag = parse_statement_decoration_builtin; - return builtins; + return production_for(); } case parse_keyword_exec: { *out_tag = parse_statement_decoration_exec; - return execs; + return production_for(); } default: { *out_tag = parse_statement_decoration_none; - return plains; + return production_for(); } } } -RESOLVE_ONLY(plain_statement, parse_token_type_string, symbol_arguments_or_redirections_list); - RESOLVE(arguments_or_redirections_list) { UNUSED(token2); UNUSED(out_tag); - P(value, symbol_argument_or_redirection, symbol_arguments_or_redirections_list); switch (token1.type) { case parse_token_type_string: case parse_token_type_redirection: { - return value; + return production_for(); } - default: { return empty; } + default: { return production_for(); } } } RESOLVE(argument_or_redirection) { UNUSED(token2); UNUSED(out_tag); - P(arg, symbol_argument); - P(redir, symbol_redirection); switch (token1.type) { case parse_token_type_string: { - return arg; + return production_for(); } case parse_token_type_redirection: { - return redir; + return production_for(); } default: { return NO_PRODUCTION; } } } -RESOLVE_ONLY(argument, parse_token_type_string); -RESOLVE_ONLY(redirection, parse_token_type_redirection, parse_token_type_string); - RESOLVE(optional_background) { UNUSED(token2); - P(background, parse_token_type_background); switch (token1.type) { case parse_token_type_background: { *out_tag = parse_background; - return background; + return production_for(); } default: { *out_tag = parse_no_background; - return empty; + return production_for(); } } } -RESOLVE_ONLY(end_command, KEYWORD(parse_keyword_end)); - -#define TEST(sym) \ - case (symbol_##sym): \ - resolver = resolve_##sym; \ +#define TEST(SYM) \ + case (symbol_##SYM): \ + resolver = SYM::resolve; \ break; const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,