2013-06-10 05:21:24 +08:00
/**\file parse_tree.h
2013-05-27 03:12:16 +08:00
Programmatic representation of fish code .
*/
2013-06-10 05:21:24 +08:00
# ifndef FISH_PARSE_TREE_H
# define FISH_PARSE_TREE_H
2013-05-27 03:12:16 +08:00
# include <wchar.h>
# include "config.h"
# include "util.h"
# include "common.h"
2013-06-02 13:14:47 +08:00
# include "tokenizer.h"
2013-06-12 00:37:51 +08:00
# include <vector>
# define PARSE_ASSERT(a) assert(a)
2013-06-28 06:12:27 +08:00
# define PARSER_DIE() exit_without_destructors(-1)
2013-05-27 03:12:16 +08:00
2013-06-16 05:32:38 +08:00
class parse_node_t ;
2013-06-25 03:33:40 +08:00
class parse_node_tree_t ;
2013-06-16 05:32:38 +08:00
typedef size_t node_offset_t ;
2013-06-23 17:09:46 +08:00
# define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
2013-06-16 05:32:38 +08:00
2013-06-16 06:21:35 +08:00
struct parse_error_t
{
/** Text of the error */
wcstring text ;
/** Offset and length of the token in the source code that triggered this error */
size_t source_start ;
size_t source_length ;
/** Return a string describing the error, suitable for presentation to the user */
wcstring describe ( const wcstring & src ) const ;
} ;
typedef std : : vector < parse_error_t > parse_error_list_t ;
2013-05-27 03:12:16 +08:00
2013-06-02 13:14:47 +08:00
class parse_ll_t ;
class parse_t
{
2013-06-07 12:49:40 +08:00
parse_ll_t * const parser ;
2013-06-09 10:20:26 +08:00
public :
2013-06-02 13:14:47 +08:00
parse_t ( ) ;
2013-06-16 06:21:35 +08:00
bool parse ( const wcstring & str , parse_node_tree_t * output , parse_error_list_t * errors ) ;
2013-06-02 13:14:47 +08:00
} ;
2013-06-12 00:37:51 +08:00
enum parse_token_type_t
{
token_type_invalid ,
// Non-terminal tokens
2013-06-23 17:09:46 +08:00
symbol_job_list ,
symbol_job ,
symbol_job_continuation ,
2013-06-12 00:37:51 +08:00
symbol_statement ,
symbol_block_statement ,
symbol_block_header ,
symbol_for_header ,
symbol_while_header ,
symbol_begin_header ,
symbol_function_header ,
2013-06-28 06:12:27 +08:00
symbol_if_statement ,
symbol_if_clause ,
symbol_else_clause ,
symbol_else_continuation ,
2013-07-01 06:38:31 +08:00
symbol_switch_statement ,
symbol_case_item_list ,
symbol_case_item ,
2013-06-12 00:37:51 +08:00
symbol_boolean_statement ,
symbol_decorated_statement ,
symbol_plain_statement ,
symbol_arguments_or_redirections_list ,
symbol_argument_or_redirection ,
2013-07-01 06:38:31 +08:00
symbol_argument_list_nonempty ,
symbol_argument_list ,
2013-06-12 00:37:51 +08:00
// Terminal types
parse_token_type_string ,
parse_token_type_pipe ,
parse_token_type_redirection ,
parse_token_background ,
parse_token_type_end ,
parse_token_type_terminate ,
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
} ;
enum parse_keyword_t
{
parse_keyword_none ,
parse_keyword_if ,
parse_keyword_else ,
parse_keyword_for ,
parse_keyword_in ,
parse_keyword_while ,
parse_keyword_begin ,
parse_keyword_function ,
parse_keyword_switch ,
2013-07-01 06:38:31 +08:00
parse_keyword_case ,
2013-06-12 00:37:51 +08:00
parse_keyword_end ,
parse_keyword_and ,
parse_keyword_or ,
parse_keyword_not ,
parse_keyword_command ,
parse_keyword_builtin
} ;
2013-06-23 17:09:46 +08:00
wcstring token_type_description ( parse_token_type_t type ) ;
wcstring keyword_description ( parse_keyword_t type ) ;
2013-06-12 00:37:51 +08:00
/** Base class for nodes of a parse tree */
class parse_node_t
{
public :
/* Type of the node */
enum parse_token_type_t type ;
/* Start in the source code */
size_t source_start ;
/* Length of our range in the source code */
size_t source_length ;
/* Children */
node_offset_t child_start ;
node_offset_t child_count ;
/* Type-dependent data */
uint32_t tag ;
/* Description */
wcstring describe ( void ) const ;
/* Constructor */
explicit parse_node_t ( parse_token_type_t ty ) : type ( ty ) , source_start ( 0 ) , source_length ( 0 ) , child_start ( 0 ) , child_count ( 0 ) , tag ( 0 )
{
}
2013-06-23 17:09:46 +08:00
node_offset_t child_offset ( node_offset_t which ) const
{
PARSE_ASSERT ( which < child_count ) ;
return child_start + which ;
}
2013-06-12 00:37:51 +08:00
} ;
2013-06-25 03:33:40 +08:00
class parse_node_tree_t : public std : : vector < parse_node_t >
{
} ;
2013-06-09 10:20:26 +08:00
2013-07-11 14:45:09 +08:00
namespace parse_symbols
{
# define SYMBOL(x) static inline parse_token_type_t get_token() { return x; }
/* Placeholder */
struct none
{
SYMBOL ( token_type_invalid ) ;
} ;
struct EMPTY
{
typedef none t0 ;
typedef none t1 ;
typedef none t2 ;
typedef none t3 ;
typedef none t4 ;
typedef none t5 ;
} ;
template < typename T0 , typename T1 , typename T2 = none , typename T3 = none , typename T4 = none , typename T5 = none >
struct Seq
{
typedef T0 t0 ;
typedef T1 t1 ;
typedef T2 t2 ;
typedef T3 t3 ;
typedef T4 t4 ;
typedef T5 t5 ;
} ;
template < typename P0 , typename P1 , typename P2 = none , typename P3 = none , typename P4 = none , typename P5 = none >
struct OR
{
typedef P0 p0 ;
typedef P1 p1 ;
typedef P2 p2 ;
typedef P3 p3 ;
typedef P4 p4 ;
typedef P5 p5 ;
} ;
template < parse_token_type_t WHICH >
struct Token
{
SYMBOL ( WHICH ) ;
} ;
template < parse_keyword_t WHICH >
struct Keyword
{
static inline parse_keyword_t get_token ( ) { return WHICH ; }
} ;
struct job ;
struct statement ;
struct job_continuation ;
struct boolean_statement ;
struct block_statement ;
struct if_statement ;
struct if_clause ;
struct else_clause ;
struct else_continuation ;
struct switch_statement ;
struct decorated_statement ;
struct else_clause ;
struct else_continuation ;
struct switch_statement ;
struct case_item_list ;
struct case_item ;
struct argument_list_nonempty ;
struct argument_list ;
struct block_statement ;
struct block_header ;
struct for_header ;
struct while_header ;
struct begin_header ;
struct function_header ;
struct boolean_statement ;
struct decorated_statement ;
struct plain_statement ;
struct arguments_or_redirections_list ;
struct argument_or_redirection ;
struct redirection ;
struct statement_terminator ;
/* A job_list is a list of jobs, separated by semicolons or newlines */
struct job_list : OR <
EMPTY ,
Seq < job , job_list > ,
Seq < Token < parse_token_type_end > , job_list >
>
{
SYMBOL ( symbol_job_list )
} ;
/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */
struct job : Seq < statement , job_continuation >
{
SYMBOL ( symbol_job ) ;
} ;
struct job_continuation : OR <
EMPTY ,
Seq < Token < parse_token_type_pipe > , statement , job_continuation >
>
{
SYMBOL ( symbol_job_continuation ) ;
} ;
/* A statement is a normal command, or an if / while / and etc */
struct statement : OR <
boolean_statement ,
block_statement ,
if_statement ,
switch_statement ,
decorated_statement
>
{
SYMBOL ( symbol_statement ) ;
} ;
struct if_statement : Seq < if_clause , else_clause , Keyword < parse_keyword_end > >
{
SYMBOL ( symbol_if_statement ) ;
} ;
struct if_clause : Seq < Keyword < parse_keyword_if > , job , statement_terminator , job_list >
{
SYMBOL ( symbol_if_clause ) ;
} ;
struct else_clause : OR <
EMPTY ,
Keyword < parse_keyword_else > , else_continuation
>
{
SYMBOL ( symbol_else_clause ) ;
} ;
struct else_continuation : OR <
Seq < if_clause , else_clause > ,
Seq < statement_terminator , job_list >
>
{
SYMBOL ( symbol_else_continuation ) ;
} ;
struct switch_statement : Seq < Keyword < parse_keyword_switch > , Token < parse_token_type_string > , statement_terminator , case_item_list , Keyword < parse_keyword_end >
>
{
SYMBOL ( symbol_switch_statement ) ;
} ;
struct case_item_list : OR
<
EMPTY ,
case_item , case_item_list
>
{
SYMBOL ( symbol_case_item_list ) ;
} ;
struct case_item : Seq < Keyword < parse_keyword_case > , argument_list , statement_terminator , job_list >
{
SYMBOL ( symbol_case_item ) ;
} ;
struct argument_list_nonempty : Seq < Token < parse_token_type_string > , argument_list >
{
SYMBOL ( symbol_argument_list_nonempty ) ;
} ;
struct argument_list : OR < EMPTY , argument_list_nonempty >
{
SYMBOL ( symbol_argument_list ) ;
} ;
struct block_statement : Seq < block_header , statement_terminator , job_list , Keyword < parse_keyword_end > , arguments_or_redirections_list >
{
SYMBOL ( symbol_block_statement ) ;
} ;
struct block_header : OR < for_header , while_header , function_header , begin_header >
{
SYMBOL ( symbol_block_header ) ;
} ;
struct for_header : Seq < Keyword < parse_keyword_for > , Token < parse_token_type_string > , Keyword < parse_keyword_in > , arguments_or_redirections_list >
{
SYMBOL ( symbol_for_header ) ;
} ;
struct while_header : Seq < Keyword < parse_keyword_while > , statement >
{
SYMBOL ( symbol_while_header ) ;
} ;
struct begin_header : Keyword < parse_keyword_begin >
{
SYMBOL ( symbol_begin_header ) ;
} ;
struct function_header : Keyword < parse_keyword_function >
{
SYMBOL ( symbol_function_header ) ;
} ;
/* A boolean statement is AND or OR or NOT */
struct boolean_statement : OR <
Seq < Keyword < parse_keyword_and > , statement > ,
Seq < Keyword < parse_keyword_or > , statement > ,
Seq < Keyword < parse_keyword_not > , statement >
>
{
SYMBOL ( symbol_boolean_statement ) ;
} ;
/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */
struct decorated_statement : OR <
Seq < Keyword < parse_keyword_command > , plain_statement > ,
Seq < Keyword < parse_keyword_builtin > , plain_statement > ,
plain_statement
>
{
SYMBOL ( symbol_decorated_statement ) ;
} ;
struct plain_statement : Seq < Token < parse_token_type_string > , arguments_or_redirections_list >
{
SYMBOL ( symbol_plain_statement ) ;
} ;
struct arguments_or_redirections_list : OR <
EMPTY ,
Seq < argument_or_redirection , arguments_or_redirections_list > >
{
SYMBOL ( symbol_arguments_or_redirections_list ) ;
} ;
struct argument_or_redirection : OR <
Token < parse_token_type_string > ,
redirection
>
{
SYMBOL ( symbol_argument_or_redirection ) ;
} ;
struct redirection : Token < parse_token_type_redirection >
{
SYMBOL ( parse_token_type_redirection ) ;
} ;
struct statement_terminator : Token < parse_token_type_end >
{
SYMBOL ( parse_token_type_end ) ;
} ;
}
2013-06-09 10:20:26 +08:00
2013-05-27 03:12:16 +08:00
/* Fish grammar:
2013-06-23 17:09:46 +08:00
# A job_list is a list of jobs, separated by semicolons or newlines
2013-05-27 03:12:16 +08:00
2013-06-23 17:09:46 +08:00
job_list = < empty > |
< TOK_END > job_list |
job job_list
2013-05-27 03:12:16 +08:00
2013-06-23 17:09:46 +08:00
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation
2013-05-27 03:12:16 +08:00
2013-06-23 17:09:46 +08:00
job = statement job_continuation
job_continuation = < empty > |
< TOK_PIPE > statement job_continuation
# A statement is a normal command, or an if / while / and etc
2013-07-01 06:38:31 +08:00
statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement
2013-05-27 03:12:16 +08:00
# A block is a conditional, loop, or begin / end
2013-06-28 06:12:27 +08:00
if_statement = if_clause else_clause < END >
if_clause = < IF > job STATEMENT_TERMINATOR job_list
else_clause = < empty > |
< ELSE > else_continuation
else_continuation = if_clause else_clause |
STATEMENT_TERMINATOR job_list
2013-07-01 06:38:31 +08:00
switch_statement = SWITCH < TOK_STRING > STATEMENT_TERMINATOR case_item_list < END >
case_item_list = < empty > |
case_item case_item_list
case_item = CASE argument_list STATEMENT_TERMINATOR job_list
argument_list_nonempty = < TOK_STRING > argument_list
argument_list = < empty > | argument_list_nonempty
2013-06-28 06:12:27 +08:00
2013-06-23 17:09:46 +08:00
block_statement = block_header STATEMENT_TERMINATOR job_list < END > arguments_or_redirections_list
2013-06-28 06:12:27 +08:00
block_header = for_header | while_header | function_header | begin_header
for_header = FOR var_name IN arguments_or_redirections_list
2013-05-27 03:12:16 +08:00
while_header = WHILE statement
2013-06-23 17:09:46 +08:00
begin_header = BEGIN
2013-07-01 06:38:31 +08:00
function_header = FUNCTION function_name argument_list
2013-06-23 17:09:46 +08:00
2013-05-27 03:12:16 +08:00
# A boolean statement is AND or OR or NOT
boolean_statement = AND statement | OR statement | NOT statement
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command"
decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement
2013-07-11 14:45:09 +08:00
plain_statement = COMMAND arguments_or_redirections_list
2013-05-27 03:12:16 +08:00
2013-06-02 13:14:47 +08:00
arguments_or_redirections_list = < empty > |
argument_or_redirection arguments_or_redirections_list
argument_or_redirection = redirection | < TOK_STRING >
redirection = < TOK_REDIRECTION >
terminator = < TOK_END > | < TOK_BACKGROUND >
2013-05-27 03:12:16 +08:00
2013-06-02 13:14:47 +08:00
*/
2013-05-27 03:12:16 +08:00
# endif