fish-shell/src/parse_grammar.h

379 lines
13 KiB
C
Raw Normal View History

2018-01-08 05:34:04 +08:00
// Programmatic representation of fish grammar
#ifndef FISH_PARSE_GRAMMAR_H
#define FISH_PARSE_GRAMMAR_H
#include <array>
#include <tuple>
#include <type_traits>
2018-01-08 05:34:04 +08:00
#include "parse_constants.h"
#include "tokenizer.h"
2018-01-08 07:53:36 +08:00
struct parse_token_t;
typedef uint8_t parse_node_tag_t;
using parse_node_tag_t = uint8_t;
struct parse_token_t;
2018-01-08 05:34:04 +08:00
namespace grammar {
2018-01-08 07:53:36 +08:00
2018-01-08 05:34:04 +08:00
using production_element_t = uint8_t;
enum {
// The maximum length of any seq production.
MAX_PRODUCTION_LENGTH = 6
};
2018-01-08 07:53:36 +08:00
// Define primitive types.
template <enum parse_token_type_t Token>
struct primitive {
using type_tuple = std::tuple<>;
static constexpr parse_token_type_t token = Token;
2018-01-08 07:53:36 +08:00
static constexpr production_element_t element() { return Token; }
};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
using tok_end = primitive<parse_token_type_end>;
using tok_string = primitive<parse_token_type_string>;
using tok_pipe = primitive<parse_token_type_pipe>;
using tok_background = primitive<parse_token_type_background>;
using tok_redirection = primitive<parse_token_type_redirection>;
using tok_andand = primitive<parse_token_type_andand>;
using tok_oror = primitive<parse_token_type_oror>;
2018-01-08 07:53:36 +08:00
// Define keyword types.
template <parse_keyword_t Keyword>
struct keyword {
using type_tuple = std::tuple<>;
static constexpr parse_token_type_t token = parse_token_type_string;
2018-01-08 07:53:36 +08:00
static constexpr production_element_t element() {
// Convert a parse_keyword_t enum to a production_element_t enum.
return Keyword + LAST_TOKEN_OR_SYMBOL + 1;
}
2018-01-08 05:34:04 +08:00
};
2018-01-16 14:13:37 +08:00
// Define special types.
// Comments are not emitted as part of productions, but specially by the parser.
struct comment {
using type_tuple = std::tuple<>;
static constexpr parse_token_type_t token = parse_special_type_comment;
};
2018-01-08 07:53:36 +08:00
// Forward declare all the symbol types.
#define ELEM(T) struct T;
#include "parse_grammar_elements.inc"
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
// A production is a sequence of production elements.
// +1 to hold the terminating token_type_invalid
template <size_t Count>
using production_t = std::array<const production_element_t, Count + 1>;
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
// This is an ugly hack to avoid ODR violations
// Given some type, return a pointer to its production.
2018-01-08 05:34:04 +08:00
template <typename T>
2018-01-08 07:53:36 +08:00
const production_element_t *production_for() {
static constexpr auto prod = T::production;
return prod.data();
}
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
// Get some production element.
template <typename T>
constexpr production_element_t element() {
return T::element();
}
2018-01-08 05:34:04 +08:00
// Template goo.
namespace detail {
template <typename T, typename Tuple>
struct tuple_contains;
template <typename T>
struct tuple_contains<T, std::tuple<>> : std::false_type {};
template <typename T, typename U, typename... Ts>
struct tuple_contains<T, std::tuple<U, Ts...>> : tuple_contains<T, std::tuple<Ts...>> {};
template <typename T, typename... Ts>
struct tuple_contains<T, std::tuple<T, Ts...>> : std::true_type {};
struct void_type {
using type = void;
};
// Support for checking whether the index N is valid for T::type_tuple.
template <size_t N, typename T>
static constexpr bool index_valid() {
return N < std::tuple_size<typename T::type_tuple>::value;
}
// Get the Nth type of T::type_tuple.
template <size_t N, typename T>
using tuple_element = std::tuple_element<N, typename T::type_tuple>;
// Get the Nth type of T::type_tuple, or void if N is out of bounds.
template <size_t N, typename T>
using tuple_element_or_void =
typename std::conditional<index_valid<N, T>(), tuple_element<N, T>, void_type>::type::type;
// Make a tuple by mapping the Nth item of a list of 'seq's.
template <size_t N, typename... Ts>
struct tuple_nther {
// A tuple of the Nth types of tuples (or voids).
using type = std::tuple<tuple_element_or_void<N, Ts>...>;
};
// Given a list of Options, each one a seq, check to see if any of them contain type Desired at
// index Index.
template <typename Desired, size_t Index, typename... Options>
inline constexpr bool type_possible() {
using nths = typename tuple_nther<Index, Options...>::type;
return tuple_contains<Desired, nths>::value;
}
} // namespace detail
2018-01-08 07:53:36 +08:00
// Partial specialization hack.
#define ELEM(T) \
template <> \
constexpr production_element_t element<T>() { \
return symbol_##T; \
}
#include "parse_grammar_elements.inc"
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
// Empty produces nothing.
struct empty {
using type_tuple = std::tuple<>;
2018-01-08 07:53:36 +08:00
static constexpr production_t<0> production = {{token_type_invalid}};
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
parse_node_tag_t *) {
return production_for<empty>();
}
};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
// Sequence represents a list of (at least two) productions.
template <class T0, class... Ts>
struct seq {
static constexpr production_t<1 + sizeof...(Ts)> production = {
{element<T0>(), element<Ts>()..., token_type_invalid}};
2018-01-08 11:07:49 +08:00
static_assert(1 + sizeof...(Ts) <= MAX_PRODUCTION_LENGTH, "MAX_PRODUCTION_LENGTH too small");
2018-01-08 11:07:49 +08:00
using type_tuple = std::tuple<T0, Ts...>;
template <typename Desired, size_t Index>
static constexpr bool type_possible() {
using element_t = detail::tuple_element_or_void<Index, seq>;
return std::is_same<Desired, element_t>::value;
}
2018-01-08 07:53:36 +08:00
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
parse_node_tag_t *) {
return production_for<seq>();
}
};
2018-01-08 05:34:04 +08:00
template <class... Args>
2018-01-08 07:53:36 +08:00
using produces_sequence = seq<Args...>;
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
// Ergonomic way to create a production for a single element.
template <class T>
using single = seq<T>;
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
template <class T>
using produces_single = single<T>;
// Alternative represents a choice.
struct alternative {};
2018-01-08 05:34:04 +08:00
// Following are the grammar productions.
#define BODY(T) static constexpr parse_token_type_t token = symbol_##T;
2018-01-08 05:34:04 +08:00
#define DEF(T) struct T : public
2018-01-08 07:53:36 +08:00
#define DEF_ALT(T) struct T : public alternative
#define ALT_BODY(T, ...) \
2018-01-08 07:53:36 +08:00
BODY(T) \
using type_tuple = std::tuple<>; \
template <typename Desired, size_t Index> \
static constexpr bool type_possible() { \
return detail::type_possible<Desired, Index, __VA_ARGS__>(); \
} \
2018-01-08 07:53:36 +08:00
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \
parse_node_tag_t *)
2018-01-08 07:53:36 +08:00
// A job_list is a list of job_conjunctions, separated by semicolons or newlines
2018-01-08 07:53:36 +08:00
DEF_ALT(job_list) {
using normal = seq<job_decorator, job_conjunction, job_list>;
2018-01-08 07:53:36 +08:00
using empty_line = seq<tok_end, job_list>;
using empty = grammar::empty;
ALT_BODY(job_list, normal, empty_line, empty);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
// Job decorators are 'and' and 'or'. These apply to the whole job.
DEF_ALT(job_decorator) {
using ands = single<keyword<parse_keyword_and>>;
using ors = single<keyword<parse_keyword_or>>;
using empty = grammar::empty;
ALT_BODY(job_decorator, ands, ors, empty);
};
// A job_conjunction is a job followed by a continuation.
DEF(job_conjunction) produces_sequence<job, job_conjunction_continuation>{BODY(job_conjunction)};
DEF_ALT(job_conjunction_continuation) {
using andands = seq<tok_andand, optional_newlines, job_conjunction>;
using orors = seq<tok_oror, optional_newlines, job_conjunction>;
using empty = grammar::empty;
ALT_BODY(job_conjunction_continuation, andands, orors, empty);
};
2018-01-08 05:34:04 +08:00
// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases
// like if statements, where we require a command). To represent "non-empty", we require a
// statement, followed by a possibly empty job_continuation, and then optionally a background
// specifier '&'
2018-01-08 07:53:36 +08:00
DEF(job) produces_sequence<statement, job_continuation, optional_background>{BODY(job)};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
DEF_ALT(job_continuation) {
using piped = seq<tok_pipe, optional_newlines, statement, job_continuation>;
using empty = grammar::empty;
ALT_BODY(job_continuation, piped, empty);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
// A statement is a normal command, or an if / while / and etc
2018-01-08 07:53:36 +08:00
DEF_ALT(statement) {
using nots = single<not_statement>;
2018-01-08 07:53:36 +08:00
using block = single<block_statement>;
using ifs = single<if_statement>;
using switchs = single<switch_statement>;
using decorated = single<decorated_statement>;
ALT_BODY(statement, nots, block, ifs, switchs, decorated);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
// A block is a conditional, loop, or begin/end
DEF(if_statement)
2018-01-08 07:53:36 +08:00
produces_sequence<if_clause, else_clause, end_command, arguments_or_redirections_list>{
BODY(if_statement)};
2018-01-08 05:34:04 +08:00
DEF(if_clause)
produces_sequence<keyword<parse_keyword_if>, job_conjunction, tok_end, andor_job_list, job_list>{
2018-01-08 07:53:36 +08:00
BODY(if_clause)};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
DEF_ALT(else_clause) {
using empty = grammar::empty;
using else_cont = seq<keyword<parse_keyword_else>, else_continuation>;
ALT_BODY(else_clause, empty, else_cont);
2018-01-08 07:53:36 +08:00
};
DEF_ALT(else_continuation) {
using else_if = seq<if_clause, else_clause>;
using else_only = seq<tok_end, job_list>;
ALT_BODY(else_continuation, else_if, else_only);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
DEF(switch_statement)
produces_sequence<keyword<parse_keyword_switch>, argument, tok_end, case_item_list, end_command,
2018-01-08 07:53:36 +08:00
arguments_or_redirections_list>{BODY(switch_statement)};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
DEF_ALT(case_item_list) {
using empty = grammar::empty;
using case_items = seq<case_item, case_item_list>;
using blank_line = seq<tok_end, case_item_list>;
ALT_BODY(case_item_list, empty, case_items, blank_line);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
DEF(case_item)
produces_sequence<keyword<parse_keyword_case>, argument_list, tok_end, job_list>{BODY(case_item)};
2018-01-08 05:34:04 +08:00
DEF(block_statement)
2018-01-14 17:42:58 +08:00
produces_sequence<block_header, job_list, end_command, arguments_or_redirections_list>{
BODY(block_statement)};
2018-01-08 07:53:36 +08:00
DEF_ALT(block_header) {
using forh = single<for_header>;
using whileh = single<while_header>;
using funch = single<function_header>;
using beginh = single<begin_header>;
ALT_BODY(block_header, forh, whileh, funch, beginh);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
DEF(for_header)
produces_sequence<keyword<parse_keyword_for>, tok_string, keyword<parse_keyword_in>, argument_list,
tok_end>{BODY(for_header)};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
DEF(while_header)
produces_sequence<keyword<parse_keyword_while>, job_conjunction, tok_end, andor_job_list>{
BODY(while_header)};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
DEF(begin_header) produces_single<keyword<parse_keyword_begin>>{BODY(begin_header)};
2018-01-08 05:34:04 +08:00
// Functions take arguments, and require at least one (the name). No redirections allowed.
DEF(function_header)
2018-01-14 17:42:58 +08:00
produces_sequence<keyword<parse_keyword_function>, argument, argument_list, tok_end>{
BODY(function_header)};
2018-01-08 05:34:04 +08:00
DEF_ALT(not_statement) {
using nots = seq<keyword<parse_keyword_not>, statement>;
using exclams = seq<keyword<parse_keyword_exclam>, statement>;
ALT_BODY(not_statement, nots, exclams);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
// An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean
// statement.
2018-01-08 07:53:36 +08:00
DEF_ALT(andor_job_list) {
using empty = grammar::empty;
using andor_job = seq<job_decorator, job_conjunction, andor_job_list>;
2018-01-08 07:53:36 +08:00
using empty_line = seq<tok_end, andor_job_list>;
ALT_BODY(andor_job_list, empty, andor_job, empty_line);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
// A decorated_statement is a command with a list of arguments_or_redirections, possibly with
// "builtin" or "command" or "exec"
2018-01-08 07:53:36 +08:00
DEF_ALT(decorated_statement) {
using plains = single<plain_statement>;
using cmds = seq<keyword<parse_keyword_command>, plain_statement>;
using builtins = seq<keyword<parse_keyword_builtin>, plain_statement>;
using execs = seq<keyword<parse_keyword_exec>, plain_statement>;
ALT_BODY(decorated_statement, plains, cmds, builtins, execs);
2018-01-08 07:53:36 +08:00
};
DEF(plain_statement)
produces_sequence<tok_string, arguments_or_redirections_list>{BODY(plain_statement)};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
DEF_ALT(argument_list) {
using empty = grammar::empty;
using arg = seq<argument, argument_list>;
ALT_BODY(argument_list, empty, arg);
2018-01-08 07:53:36 +08:00
};
2018-01-08 05:34:04 +08:00
2018-01-08 07:53:36 +08:00
DEF_ALT(arguments_or_redirections_list) {
using empty = grammar::empty;
using arg = seq<argument, arguments_or_redirections_list>;
using redir = seq<redirection, arguments_or_redirections_list>;
ALT_BODY(arguments_or_redirections_list, empty, arg, redir);
2018-01-08 07:53:36 +08:00
};
DEF(argument) produces_single<tok_string>{BODY(argument)};
DEF(redirection) produces_sequence<tok_redirection, tok_string>{BODY(redirection)};
DEF_ALT(optional_background) {
using empty = grammar::empty;
using background = single<tok_background>;
ALT_BODY(optional_background, empty, background);
2018-01-08 07:53:36 +08:00
};
DEF(end_command) produces_single<keyword<parse_keyword_end>>{BODY(end_command)};
2018-01-08 05:34:04 +08:00
// Note optional_newlines only allows newline-style tok_end, not semicolons.
DEF_ALT(optional_newlines) {
using empty = grammar::empty;
using newlines = seq<tok_end, optional_newlines>;
ALT_BODY(optional_newlines, empty, newlines);
};
2018-01-08 05:34:04 +08:00
// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
// TOK_END (newlines, and even semicolons, for historical reasons)
2018-01-08 07:53:36 +08:00
DEF_ALT(freestanding_argument_list) {
using empty = grammar::empty;
using arg = seq<argument, freestanding_argument_list>;
using semicolon = seq<tok_end, freestanding_argument_list>;
ALT_BODY(freestanding_argument_list, empty, arg, semicolon);
2018-01-08 07:53:36 +08:00
};
} // namespace grammar
2018-01-08 05:34:04 +08:00
#endif