2013-06-10 05:21:24 +08:00
/**\file parse_tree.h
2013-05-27 03:12:16 +08:00
Programmatic representation of fish code .
*/
2013-07-26 06:24:22 +08:00
# ifndef FISH_PARSE_PRODUCTIONS_H
# define FISH_PARSE_PRODUCTIONS_H
2013-05-27 03:12:16 +08:00
2015-07-25 23:14:25 +08:00
# include <assert.h>
# include <stddef.h>
2016-04-21 14:00:54 +08:00
# include <vector>
# include <memory>
# include <sys/types.h>
# include <stdbool.h>
2013-05-27 03:12:16 +08:00
# include "common.h"
2013-06-02 13:14:47 +08:00
# include "tokenizer.h"
2013-12-09 13:54:06 +08:00
# include "parse_constants.h"
2013-06-12 00:37:51 +08:00
2013-06-25 03:33:40 +08:00
class parse_node_tree_t ;
2014-03-26 11:06:34 +08:00
typedef uint32_t node_offset_t ;
2013-06-23 17:09:46 +08:00
# define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
2013-06-16 05:32:38 +08:00
2014-03-26 11:06:34 +08:00
typedef uint32_t source_offset_t ;
# define SOURCE_OFFSET_INVALID (static_cast<source_offset_t>(-1))
2013-10-12 17:46:49 +08:00
/** A struct representing the token type that we use internally */
struct parse_token_t
{
enum parse_token_type_t type ; // The type of the token as represented by the parser
enum parse_keyword_t keyword ; // Any keyword represented by this token
bool has_dash_prefix ; // Hackish: whether the source contains a dash prefix
2014-01-14 18:29:53 +08:00
bool is_help_argument ; // Hackish: whether the source looks like '-h' or '--help'
2014-03-26 11:06:34 +08:00
source_offset_t source_start ;
source_offset_t source_length ;
2013-08-11 15:35:00 +08:00
2013-10-12 17:46:49 +08:00
wcstring describe ( ) const ;
2014-01-01 16:04:02 +08:00
wcstring user_presentable_description ( ) const ;
2013-08-11 15:35:00 +08:00
} ;
enum
{
parse_flag_none = 0 ,
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
parse_flag_continue_after_error = 1 < < 0 ,
2014-01-15 17:40:40 +08:00
2013-08-11 15:35:00 +08:00
/* Include comment tokens */
2013-10-09 17:03:50 +08:00
parse_flag_include_comments = 1 < < 1 ,
2014-01-15 17:40:40 +08:00
2013-10-09 17:03:50 +08:00
/* Indicate that the tokenizer should accept incomplete tokens */
2013-12-27 17:38:43 +08:00
parse_flag_accept_incomplete_tokens = 1 < < 2 ,
2014-01-15 17:40:40 +08:00
2013-12-27 17:38:43 +08:00
/* Indicate that the parser should not generate the terminate token, allowing an 'unfinished' tree where some nodes may have no productions. */
2014-12-24 02:58:45 +08:00
parse_flag_leave_unterminated = 1 < < 3 ,
2014-01-15 17:40:40 +08:00
2014-12-24 02:58:45 +08:00
/* Indicate that the parser should generate job_list entries for blank lines. */
parse_flag_show_blank_lines = 1 < < 4
2013-06-12 00:37:51 +08:00
} ;
2013-08-11 15:35:00 +08:00
typedef unsigned int parse_tree_flags_t ;
wcstring parse_dump_tree ( const parse_node_tree_t & tree , const wcstring & src ) ;
2013-06-12 00:37:51 +08:00
2016-04-11 10:08:07 +08:00
const wchar_t * token_type_description ( parse_token_type_t type ) ;
const wchar_t * keyword_description ( parse_keyword_t type ) ;
2013-06-23 17:09:46 +08:00
2015-12-16 06:59:03 +08:00
/* Node flags */
2014-12-24 02:58:45 +08:00
enum
{
/* Flag indicating that the node has associated comment nodes */
2015-12-16 06:59:03 +08:00
parse_node_flag_has_comments = 1 < < 0 ,
2014-12-24 02:58:45 +08:00
} ;
typedef uint8_t parse_node_flags_t ;
2015-12-16 06:59:03 +08:00
/* Node-type specific tag value */
typedef uint8_t parse_node_tag_t ;
2014-03-26 11:06:34 +08:00
/** Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields is important. */
2013-06-12 00:37:51 +08:00
class parse_node_t
{
2013-07-23 09:26:15 +08:00
public :
2013-06-12 00:37:51 +08:00
/* Start in the source code */
2014-03-26 11:06:34 +08:00
source_offset_t source_start ;
2013-07-23 09:26:15 +08:00
2013-06-12 00:37:51 +08:00
/* Length of our range in the source code */
2014-03-26 11:06:34 +08:00
source_offset_t source_length ;
2014-01-15 17:40:40 +08:00
2013-10-07 16:04:37 +08:00
/* Parent */
node_offset_t parent ;
2013-06-12 00:37:51 +08:00
/* Children */
node_offset_t child_start ;
2014-04-01 01:01:39 +08:00
2014-03-26 11:06:34 +08:00
/* Number of children */
2013-10-09 18:45:58 +08:00
uint8_t child_count ;
2013-07-23 09:26:15 +08:00
2014-03-26 11:06:34 +08:00
/* Type of the node */
enum parse_token_type_t type ;
2015-12-16 06:59:03 +08:00
/* Keyword associated with node */
enum parse_keyword_t keyword ;
2013-07-23 09:26:15 +08:00
2014-12-24 02:58:45 +08:00
/* Node flags */
2015-12-20 10:09:41 +08:00
parse_node_flags_t flags : 4 ;
2015-12-16 06:59:03 +08:00
/* This is used to store e.g. the statement decoration. */
2015-12-20 10:09:41 +08:00
parse_node_tag_t tag : 4 ;
2014-12-24 02:58:45 +08:00
2013-06-12 00:37:51 +08:00
/* Description */
wcstring describe ( void ) const ;
2013-07-23 09:26:15 +08:00
2013-06-12 00:37:51 +08:00
/* Constructor */
2016-02-20 07:45:12 +08:00
explicit parse_node_t ( parse_token_type_t ty ) :
source_start ( SOURCE_OFFSET_INVALID ) ,
source_length ( 0 ) ,
parent ( NODE_OFFSET_INVALID ) ,
child_start ( 0 ) ,
child_count ( 0 ) ,
type ( ty ) ,
keyword ( parse_keyword_none ) ,
flags ( 0 ) ,
tag ( 0 )
2013-06-12 00:37:51 +08:00
{
}
2013-07-23 09:26:15 +08:00
2013-06-23 17:09:46 +08:00
node_offset_t child_offset ( node_offset_t which ) const
{
PARSE_ASSERT ( which < child_count ) ;
return child_start + which ;
}
2013-08-11 15:35:00 +08:00
2013-10-07 07:23:45 +08:00
/* Indicate if this node has a range of source code associated with it */
2013-08-09 06:06:46 +08:00
bool has_source ( ) const
{
2014-09-30 02:29:50 +08:00
/* Should never have a nonempty range with an invalid offset */
assert ( this - > source_start ! = SOURCE_OFFSET_INVALID | | this - > source_length = = 0 ) ;
return this - > source_length > 0 ;
2013-08-09 06:06:46 +08:00
}
2014-01-15 17:40:40 +08:00
2014-12-24 02:58:45 +08:00
/* Indicate if the node has comment nodes */
bool has_comments ( ) const
{
return ! ! ( this - > flags & parse_node_flag_has_comments ) ;
}
2013-10-09 06:05:30 +08:00
/* Gets source for the node, or the empty string if it has no source */
wcstring get_source ( const wcstring & str ) const
{
if ( ! has_source ( ) )
return wcstring ( ) ;
else
return wcstring ( str , this - > source_start , this - > source_length ) ;
}
2014-01-15 17:40:40 +08:00
2013-10-13 09:17:03 +08:00
/* Returns whether the given location is within the source range or at its end */
bool location_in_or_at_end_of_source_range ( size_t loc ) const
{
return has_source ( ) & & source_start < = loc & & loc - source_start < = source_length ;
}
2013-06-12 00:37:51 +08:00
} ;
2013-10-07 07:23:45 +08:00
/* The parse tree itself */
2013-06-25 03:33:40 +08:00
class parse_node_tree_t : public std : : vector < parse_node_t >
{
2013-08-11 15:35:00 +08:00
public :
2016-02-28 16:33:11 +08:00
parse_node_tree_t ( ) { }
parse_node_tree_t ( moved_ref < parse_node_tree_t > t )
{
this - > swap ( t . val ) ;
}
2013-08-11 15:35:00 +08:00
2013-12-12 10:34:28 +08:00
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type.
*/
const parse_node_t * get_child ( const parse_node_t & parent , node_offset_t which , parse_token_type_t expected_type = token_type_invalid ) const ;
2014-01-15 17:40:40 +08:00
2013-12-24 06:53:56 +08:00
/* Find the first direct child of the given node of the given type. asserts on failure
*/
const parse_node_t & find_child ( const parse_node_t & parent , parse_token_type_t type ) const ;
2014-01-15 17:40:40 +08:00
2013-10-07 16:04:37 +08:00
/* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */
const parse_node_t * get_parent ( const parse_node_t & node , parse_token_type_t expected_type = token_type_invalid ) const ;
2014-01-15 17:40:40 +08:00
2013-12-12 10:34:28 +08:00
/* Find all the nodes of a given type underneath a given node, up to max_count of them */
2013-08-09 06:06:46 +08:00
typedef std : : vector < const parse_node_t * > parse_node_list_t ;
2013-12-12 10:34:28 +08:00
parse_node_list_t find_nodes ( const parse_node_t & parent , parse_token_type_t type , size_t max_count = ( size_t ) ( - 1 ) ) const ;
2014-01-15 17:40:40 +08:00
2013-10-09 06:05:30 +08:00
/* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */
const parse_node_t * find_last_node_of_type ( parse_token_type_t type , const parse_node_t * parent = NULL ) const ;
2014-01-15 17:40:40 +08:00
2014-01-13 18:24:11 +08:00
/* Finds a node containing the given source location. If 'parent' is not NULL, it must be an ancestor. */
2013-10-13 09:17:03 +08:00
const parse_node_t * find_node_matching_source_location ( parse_token_type_t type , size_t source_loc , const parse_node_t * parent ) const ;
2014-01-15 17:40:40 +08:00
2013-10-07 18:56:09 +08:00
/* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */
bool argument_list_is_root ( const parse_node_t & node ) const ;
2014-01-15 17:40:40 +08:00
2013-10-09 17:03:50 +08:00
/* Utilities */
2014-01-15 17:40:40 +08:00
2013-10-10 06:57:10 +08:00
/* Given a plain statement, get the decoration (from the parent node), or none if there is no decoration */
2013-10-09 17:03:50 +08:00
enum parse_statement_decoration_t decoration_for_plain_statement ( const parse_node_t & node ) const ;
2014-01-15 17:40:40 +08:00
2013-10-10 06:57:10 +08:00
/* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */
bool command_for_plain_statement ( const parse_node_t & node , const wcstring & src , wcstring * out_cmd ) const ;
2014-01-15 17:40:40 +08:00
2013-12-12 10:34:28 +08:00
/* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */
2014-01-14 05:14:18 +08:00
bool statement_is_in_pipeline ( const parse_node_t & node , bool include_first ) const ;
2014-01-15 17:40:40 +08:00
2013-10-14 07:58:40 +08:00
/* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */
2013-12-24 06:53:56 +08:00
enum token_type type_for_redirection ( const parse_node_t & node , const wcstring & src , int * out_fd , wcstring * out_target ) const ;
2014-01-15 17:40:40 +08:00
2013-12-12 10:34:28 +08:00
/* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
2014-01-02 07:29:56 +08:00
const parse_node_t * header_node_for_block_statement ( const parse_node_t & node ) const ;
2014-01-15 17:40:40 +08:00
2014-01-06 07:23:42 +08:00
/* Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return the next element of the given type in that list, and the tail (by reference). Returns NULL if we've exhausted the list. */
const parse_node_t * next_node_in_node_list ( const parse_node_t & node_list , parse_token_type_t item_type , const parse_node_t * * list_tail ) const ;
2014-01-15 17:40:40 +08:00
2014-01-02 07:29:56 +08:00
/* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */
parse_node_list_t specific_statements_for_job ( const parse_node_t & job ) const ;
2014-04-01 01:01:39 +08:00
2014-12-24 02:58:45 +08:00
/* Given a node, return all of its comment nodes. */
parse_node_list_t comment_nodes_for_node ( const parse_node_t & node ) const ;
2014-11-03 05:11:27 +08:00
/* Returns the boolean type for a boolean node */
static enum parse_bool_statement_type_t statement_boolean_type ( const parse_node_t & node ) ;
2014-03-29 05:39:47 +08:00
/* Given a job, return whether it should be backgrounded, because it has a & specifier */
bool job_should_be_backgrounded ( const parse_node_t & job ) const ;
2013-10-07 16:04:37 +08:00
} ;
2014-03-03 08:03:05 +08:00
/* The big entry point. Parse a string, attempting to produce a tree for the given goal type */
bool parse_tree_from_string ( const wcstring & str , parse_tree_flags_t flags , parse_node_tree_t * output , parse_error_list_t * errors , parse_token_type_t goal = symbol_job_list ) ;
2014-01-13 14:39:12 +08:00
2013-05-27 03:12:16 +08:00
/* Fish grammar:
2013-06-23 17:09:46 +08:00
# A job_list is a list of jobs, separated by semicolons or newlines
2013-05-27 03:12:16 +08:00
2013-06-23 17:09:46 +08:00
job_list = < empty > |
2015-12-20 06:45:45 +08:00
job job_list |
2013-08-11 15:35:00 +08:00
< TOK_END > job_list
2013-05-27 03:12:16 +08:00
2014-03-29 05:39:47 +08:00
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation, and then optionally a background specifier '&'
2013-05-27 03:12:16 +08:00
2014-03-29 05:39:47 +08:00
job = statement job_continuation optional_background
2013-07-23 09:26:15 +08:00
job_continuation = < empty > |
2013-06-23 17:09:46 +08:00
< TOK_PIPE > statement job_continuation
# A statement is a normal command, or an if / while / and etc
2013-07-01 06:38:31 +08:00
statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement
2013-07-23 09:26:15 +08:00
2013-05-27 03:12:16 +08:00
# A block is a conditional, loop, or begin / end
2013-12-12 10:34:28 +08:00
if_statement = if_clause else_clause end_command arguments_or_redirections_list
2015-12-20 06:45:45 +08:00
if_clause = < IF > job < TOK_END > andor_job_list job_list
2013-06-28 06:12:27 +08:00
else_clause = < empty > |
< ELSE > else_continuation
else_continuation = if_clause else_clause |
2014-02-28 11:56:37 +08:00
< TOK_END > job_list
2013-07-23 09:26:15 +08:00
2014-03-29 08:09:08 +08:00
switch_statement = SWITCH argument < TOK_END > case_item_list end_command arguments_or_redirections_list
2013-07-01 06:38:31 +08:00
case_item_list = < empty > |
2013-12-27 19:58:42 +08:00
case_item case_item_list |
< TOK_END > case_item_list
2014-01-15 17:40:40 +08:00
2014-02-28 11:56:37 +08:00
case_item = CASE argument_list < TOK_END > job_list
2013-07-23 09:26:15 +08:00
2015-03-12 06:53:24 +08:00
block_statement = block_header job_list end_command arguments_or_redirections_list
block_header = for_header | while_header | function_header | begin_header
for_header = FOR var_name IN argument_list < TOK_END >
2015-12-20 06:45:45 +08:00
while_header = WHILE job < TOK_END > andor_job_list
2015-04-06 19:33:57 +08:00
begin_header = BEGIN
2014-01-15 17:40:40 +08:00
2014-01-02 07:29:56 +08:00
# Functions take arguments, and require at least one (the name). No redirections allowed.
2015-03-12 06:53:24 +08:00
function_header = FUNCTION argument argument_list < TOK_END >
2013-07-23 09:26:15 +08:00
2013-05-27 03:12:16 +08:00
# A boolean statement is AND or OR or NOT
boolean_statement = AND statement | OR statement | NOT statement
2015-12-20 06:45:45 +08:00
# An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean statement
andor_job_list = < empty > |
job andor_job_list |
< TOK_END > andor_job_list
2013-07-23 09:26:15 +08:00
2014-02-14 02:08:04 +08:00
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" or "exec"
2013-05-27 03:12:16 +08:00
2014-02-14 02:08:04 +08:00
decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement | EXEC plain_statement
2014-03-29 05:39:47 +08:00
plain_statement = < TOK_STRING > arguments_or_redirections_list
2013-05-27 03:12:16 +08:00
2013-10-07 16:04:37 +08:00
argument_list = < empty > | argument argument_list
2013-06-02 13:14:47 +08:00
arguments_or_redirections_list = < empty > |
argument_or_redirection arguments_or_redirections_list
2013-08-09 06:06:46 +08:00
argument_or_redirection = argument | redirection
argument = < TOK_STRING >
2014-01-15 17:40:40 +08:00
2013-10-14 07:58:40 +08:00
redirection = < TOK_REDIRECTION > < TOK_STRING >
2013-08-11 15:35:00 +08:00
2013-07-23 09:26:15 +08:00
optional_background = < empty > | < TOK_BACKGROUND >
2014-01-15 17:40:40 +08:00
2013-12-12 10:34:28 +08:00
end_command = END
2014-04-01 01:01:39 +08:00
2014-03-29 05:39:47 +08:00
# A freestanding_argument_list is equivalent to a normal argument list, except it may contain TOK_END (newlines, and even semicolons, for historical reasons
2014-04-01 01:01:39 +08:00
2014-03-28 02:17:05 +08:00
freestanding_argument_list = < empty > |
argument freestanding_argument_list |
< TOK_END > freestanding_argument_list
2013-06-02 13:14:47 +08:00
*/
2013-05-27 03:12:16 +08:00
# endif