// Programmatic representation of fish grammar #ifndef FISH_PARSE_GRAMMAR_H #define FISH_PARSE_GRAMMAR_H #include "parse_constants.h" #include "tokenizer.h" #include struct parse_token_t; typedef uint8_t parse_node_tag_t; using parse_node_tag_t = uint8_t; struct parse_token_t; namespace grammar { using production_element_t = uint8_t; // Define primitive types. template struct primitive { using type_tuple = std::tuple<>; static constexpr parse_token_type_t token = Token; static constexpr production_element_t element() { return Token; } }; using tok_end = primitive; using tok_string = primitive; using tok_pipe = primitive; using tok_background = primitive; using tok_redirection = primitive; // Define keyword types. template struct keyword { using type_tuple = std::tuple<>; static constexpr production_element_t element() { // Convert a parse_keyword_t enum to a production_element_t enum. return Keyword + LAST_TOKEN_OR_SYMBOL + 1; } }; // Forward declare all the symbol types. #define ELEM(T) struct T; #include "parse_grammar_elements.inc" // A production is a sequence of production elements. // +1 to hold the terminating token_type_invalid template using production_t = std::array; // This is an ugly hack to avoid ODR violations // Given some type, return a pointer to its production. template const production_element_t *production_for() { static constexpr auto prod = T::production; return prod.data(); } // Get some production element. template constexpr production_element_t element() { return T::element(); } // Partial specialization hack. #define ELEM(T) \ template <> \ constexpr production_element_t element() { \ return symbol_##T; \ } #include "parse_grammar_elements.inc" // Empty produces nothing. struct empty { static constexpr production_t<0> production = {{token_type_invalid}}; static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, parse_node_tag_t *) { return production_for(); } }; // Sequence represents a list of (at least two) productions. template struct seq { static constexpr production_t<1 + sizeof...(Ts)> production = { {element(), element()..., token_type_invalid}}; using type_tuple = std::tuple; static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, parse_node_tag_t *) { return production_for(); } }; template using produces_sequence = seq; // Ergonomic way to create a production for a single element. template using single = seq; template using produces_single = single; // Alternative represents a choice. struct alternative { }; // Following are the grammar productions. #define BODY(T) static constexpr parse_token_type_t token = symbol_##T; #define DEF(T) struct T : public #define DEF_ALT(T) struct T : public alternative #define ALT_BODY(T) \ BODY(T) \ using type_tuple = std::tuple<>; \ static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \ parse_node_tag_t *); // A job_list is a list of jobs, separated by semicolons or newlines DEF_ALT(job_list) { using normal = seq; using empty_line = seq; using empty = grammar::empty; ALT_BODY(job_list); }; // A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases // like if statements, where we require a command). To represent "non-empty", we require a // statement, followed by a possibly empty job_continuation, and then optionally a background // specifier '&' DEF(job) produces_sequence{BODY(job)}; DEF_ALT(job_continuation) { using piped = seq; ALT_BODY(job_continuation); }; // A statement is a normal command, or an if / while / and etc DEF_ALT(statement) { using boolean = single; using block = single; using ifs = single; using switchs = single; using decorated = single; ALT_BODY(statement); }; // A block is a conditional, loop, or begin/end DEF(if_statement) produces_sequence{ BODY(if_statement)}; DEF(if_clause) produces_sequence, job, tok_end, andor_job_list, job_list>{ BODY(if_clause)}; DEF_ALT(else_clause) { using empty = grammar::empty; using else_cont = seq, else_continuation>; ALT_BODY(else_clause); }; DEF_ALT(else_continuation) { using else_if = seq; using else_only = seq; ALT_BODY(else_continuation); }; DEF(switch_statement) produces_sequence, argument, tok_end, case_item_list, end_command, arguments_or_redirections_list>{BODY(switch_statement)}; DEF_ALT(case_item_list) { using empty = grammar::empty; using case_items = seq; using blank_line = seq; ALT_BODY(case_item_list); }; DEF(case_item) produces_sequence, argument_list, tok_end, job_list> { BODY(case_item); }; DEF(block_statement) produces_sequence{}; DEF_ALT(block_header) { using forh = single; using whileh = single; using funch = single; using beginh = single; ALT_BODY(block_header); }; DEF(for_header) produces_sequence, tok_string, keyword, argument_list, tok_end>{}; DEF(while_header) produces_sequence, job, tok_end, andor_job_list>{BODY(while_header)}; DEF(begin_header) produces_single>{BODY(begin_header)}; // Functions take arguments, and require at least one (the name). No redirections allowed. DEF(function_header) produces_sequence, argument, argument_list, tok_end>{}; // A boolean statement is AND or OR or NOT DEF_ALT(boolean_statement) { using ands = seq, statement>; using ors = seq, statement>; using nots = seq, statement>; ALT_BODY(boolean_statement); }; // An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean // statement. DEF_ALT(andor_job_list) { using empty = grammar::empty; using andor_job = seq; using empty_line = seq; ALT_BODY(andor_job_list); }; // A decorated_statement is a command with a list of arguments_or_redirections, possibly with // "builtin" or "command" or "exec" DEF_ALT(decorated_statement) { using plains = single; using cmds = seq, plain_statement>; using builtins = seq, plain_statement>; using execs = seq, plain_statement>; ALT_BODY(decorated_statement); }; DEF(plain_statement) produces_sequence{BODY(plain_statement)}; DEF_ALT(argument_list) { using empty = grammar::empty; using arg = seq; ALT_BODY(argument_list); }; DEF_ALT(arguments_or_redirections_list) { using empty = grammar::empty; using value = seq; ALT_BODY(arguments_or_redirections_list); }; DEF_ALT(argument_or_redirection) { using arg = single; using redir = single; ALT_BODY(argument_or_redirection); }; DEF(argument) produces_single{BODY(argument)}; DEF(redirection) produces_sequence{BODY(redirection)}; DEF_ALT(optional_background) { using empty = grammar::empty; using background = single; ALT_BODY(optional_background); }; DEF(end_command) produces_single>{BODY(end_command)}; // A freestanding_argument_list is equivalent to a normal argument list, except it may contain // TOK_END (newlines, and even semicolons, for historical reasons) DEF_ALT(freestanding_argument_list) { using empty = grammar::empty; using arg = seq; using semicolon = seq; ALT_BODY(freestanding_argument_list); }; } #endif