fish-shell/parse_tree.h

279 lines
7.8 KiB
C
Raw Normal View History

/**\file parse_tree.h
Programmatic representation of fish code.
*/
2013-07-26 06:24:22 +08:00
#ifndef FISH_PARSE_PRODUCTIONS_H
#define FISH_PARSE_PRODUCTIONS_H
#include <wchar.h>
#include "config.h"
#include "util.h"
#include "common.h"
2013-06-02 13:14:47 +08:00
#include "tokenizer.h"
2013-06-12 00:37:51 +08:00
#include <vector>
#define PARSE_ASSERT(a) assert(a)
2013-08-09 06:06:46 +08:00
#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0)
class parse_node_t;
class parse_node_tree_t;
typedef size_t node_offset_t;
2013-06-23 17:09:46 +08:00
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
struct parse_error_t
{
/** Text of the error */
wcstring text;
/** Offset and length of the token in the source code that triggered this error */
size_t source_start;
size_t source_length;
/** Return a string describing the error, suitable for presentation to the user */
wcstring describe(const wcstring &src) const;
};
typedef std::vector<parse_error_t> parse_error_list_t;
2013-06-12 00:37:51 +08:00
enum parse_token_type_t
{
token_type_invalid,
2013-06-12 00:37:51 +08:00
// Non-terminal tokens
2013-06-23 17:09:46 +08:00
symbol_job_list,
symbol_job,
symbol_job_continuation,
2013-06-12 00:37:51 +08:00
symbol_statement,
symbol_block_statement,
symbol_block_header,
symbol_for_header,
symbol_while_header,
symbol_begin_header,
symbol_function_header,
2013-06-28 06:12:27 +08:00
symbol_if_statement,
symbol_if_clause,
symbol_else_clause,
symbol_else_continuation,
2013-07-01 06:38:31 +08:00
symbol_switch_statement,
symbol_case_item_list,
symbol_case_item,
2013-06-12 00:37:51 +08:00
symbol_boolean_statement,
symbol_decorated_statement,
symbol_plain_statement,
symbol_arguments_or_redirections_list,
symbol_argument_or_redirection,
2013-07-01 06:38:31 +08:00
symbol_argument_list_nonempty,
symbol_argument_list,
2013-08-11 15:35:00 +08:00
2013-08-09 06:06:46 +08:00
symbol_argument,
symbol_redirection,
2013-08-11 15:35:00 +08:00
symbol_optional_background,
2013-06-12 00:37:51 +08:00
// Terminal types
parse_token_type_string,
parse_token_type_pipe,
parse_token_type_redirection,
parse_token_type_background,
2013-06-12 00:37:51 +08:00
parse_token_type_end,
parse_token_type_terminate,
2013-08-11 15:35:00 +08:00
2013-08-09 06:06:46 +08:00
// Very special terminal types that don't appear in the production list
parse_special_type_parse_error,
parse_special_type_tokenizer_error,
parse_special_type_comment,
2013-08-11 15:35:00 +08:00
FIRST_TERMINAL_TYPE = parse_token_type_string,
LAST_TERMINAL_TYPE = parse_token_type_terminate,
2013-07-27 14:59:12 +08:00
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
2013-06-12 00:37:51 +08:00
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
};
enum parse_keyword_t
{
parse_keyword_none,
parse_keyword_if,
parse_keyword_else,
parse_keyword_for,
parse_keyword_in,
parse_keyword_while,
parse_keyword_begin,
parse_keyword_function,
parse_keyword_switch,
2013-07-01 06:38:31 +08:00
parse_keyword_case,
2013-06-12 00:37:51 +08:00
parse_keyword_end,
parse_keyword_and,
parse_keyword_or,
parse_keyword_not,
parse_keyword_command,
2013-08-11 15:35:00 +08:00
parse_keyword_builtin,
LAST_KEYWORD = parse_keyword_builtin
};
enum
{
parse_flag_none = 0,
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
parse_flag_continue_after_error = 1 << 0,
/* Include comment tokens */
parse_flag_include_comments = 1 << 1
2013-06-12 00:37:51 +08:00
};
2013-08-11 15:35:00 +08:00
typedef unsigned int parse_tree_flags_t;
class parse_ll_t;
class parse_t
{
parse_ll_t * const parser;
public:
parse_t();
~parse_t();
/* Parse a string */
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
/* Parse a single token */
bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
/* Reset, ready to parse something else */
void clear();
};
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
2013-06-12 00:37:51 +08:00
2013-06-23 17:09:46 +08:00
wcstring token_type_description(parse_token_type_t type);
wcstring keyword_description(parse_keyword_t type);
2013-06-12 00:37:51 +08:00
/** Base class for nodes of a parse tree */
class parse_node_t
{
public:
2013-06-12 00:37:51 +08:00
/* Type of the node */
enum parse_token_type_t type;
2013-06-12 00:37:51 +08:00
/* Start in the source code */
size_t source_start;
2013-06-12 00:37:51 +08:00
/* Length of our range in the source code */
size_t source_length;
/* Children */
node_offset_t child_start;
node_offset_t child_count;
2013-06-12 00:37:51 +08:00
/* Type-dependent data */
uint32_t tag;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
/* Which production was used */
uint8_t production_idx;
2013-06-12 00:37:51 +08:00
/* Description */
wcstring describe(void) const;
2013-06-12 00:37:51 +08:00
/* Constructor */
2013-08-09 06:06:46 +08:00
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), child_start(0), child_count(0), tag(0)
2013-06-12 00:37:51 +08:00
{
}
2013-06-23 17:09:46 +08:00
node_offset_t child_offset(node_offset_t which) const
{
PARSE_ASSERT(which < child_count);
return child_start + which;
}
2013-08-11 15:35:00 +08:00
2013-08-09 06:06:46 +08:00
bool has_source() const
{
return source_start != (size_t)(-1);
}
2013-06-12 00:37:51 +08:00
};
class parse_node_tree_t : public std::vector<parse_node_t>
{
2013-08-11 15:35:00 +08:00
public:
2013-08-09 06:06:46 +08:00
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
2013-08-11 15:35:00 +08:00
2013-08-09 06:06:46 +08:00
/* Find all the nodes of a given type underneath a given node */
typedef std::vector<const parse_node_t *> parse_node_list_t;
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const;
};
2013-06-09 10:20:26 +08:00
/* Fish grammar:
2013-06-23 17:09:46 +08:00
# A job_list is a list of jobs, separated by semicolons or newlines
2013-06-23 17:09:46 +08:00
job_list = <empty> |
job job_list
2013-08-11 15:35:00 +08:00
<TOK_END> job_list
2013-06-23 17:09:46 +08:00
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation
2013-06-23 17:09:46 +08:00
job = statement job_continuation
job_continuation = <empty> |
2013-06-23 17:09:46 +08:00
<TOK_PIPE> statement job_continuation
# A statement is a normal command, or an if / while / and etc
2013-07-01 06:38:31 +08:00
statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement
# A block is a conditional, loop, or begin/end
if_statement = if_clause else_clause <END> arguments_or_redirections_list
2013-06-28 06:12:27 +08:00
if_clause = <IF> job STATEMENT_TERMINATOR job_list
else_clause = <empty> |
<ELSE> else_continuation
else_continuation = if_clause else_clause |
STATEMENT_TERMINATOR job_list
2013-07-01 06:38:31 +08:00
switch_statement = SWITCH <TOK_STRING> STATEMENT_TERMINATOR case_item_list <END>
case_item_list = <empty> |
case_item case_item_list
case_item = CASE argument_list STATEMENT_TERMINATOR job_list
2013-07-01 06:38:31 +08:00
argument_list_nonempty = <TOK_STRING> argument_list
argument_list = <empty> | argument_list_nonempty
2013-06-28 06:12:27 +08:00
block_statement = block_header <TOK_END> job_list <END> arguments_or_redirections_list
2013-06-28 06:12:27 +08:00
block_header = for_header | while_header | function_header | begin_header
for_header = FOR var_name IN arguments_or_redirections_list
while_header = WHILE statement
2013-06-23 17:09:46 +08:00
begin_header = BEGIN
2013-07-01 06:38:31 +08:00
function_header = FUNCTION function_name argument_list
# A boolean statement is AND or OR or NOT
boolean_statement = AND statement | OR statement | NOT statement
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command"
decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement
plain_statement = COMMAND arguments_or_redirections_list optional_background
2013-06-02 13:14:47 +08:00
arguments_or_redirections_list = <empty> |
argument_or_redirection arguments_or_redirections_list
2013-08-09 06:06:46 +08:00
argument_or_redirection = argument | redirection
argument = <TOK_STRING>
2013-06-02 13:14:47 +08:00
redirection = <TOK_REDIRECTION>
2013-08-11 15:35:00 +08:00
2013-06-02 13:14:47 +08:00
terminator = <TOK_END> | <TOK_BACKGROUND>
2013-08-11 15:35:00 +08:00
optional_background = <empty> | <TOK_BACKGROUND>
2013-06-02 13:14:47 +08:00
*/
#endif