2014-02-27 18:55:04 +08:00
/** \file parse_productions.cpp
*/
2013-07-26 06:24:22 +08:00
# include "parse_productions.h"
using namespace parse_productions ;
2013-07-29 06:19:38 +08:00
# define NO_PRODUCTION ((production_option_idx_t)(-1))
2013-07-26 06:24:22 +08:00
2013-07-29 06:19:38 +08:00
static bool production_is_empty ( const production_t production )
{
return production [ 0 ] = = token_type_invalid ;
}
2013-10-10 06:57:10 +08:00
/* Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it */
2013-07-29 06:19:38 +08:00
static bool production_is_valid ( const production_options_t production_list , production_option_idx_t which )
{
if ( which < 0 | | which > = MAX_PRODUCTIONS )
return false ;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
bool nonempty_found = false ;
for ( int i = which ; i < MAX_PRODUCTIONS ; i + + )
{
if ( ! production_is_empty ( production_list [ i ] ) )
{
nonempty_found = true ;
break ;
}
}
return nonempty_found ;
}
# define PRODUCTIONS(sym) static const production_options_t productions_##sym
2013-10-12 17:46:49 +08:00
# define RESOLVE(sym) static production_option_idx_t resolve_##sym (const parse_token_t &token1, const parse_token_t &token2)
# define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (const parse_token_t &input1, const parse_token_t &input2) { return 0; }
2013-07-26 06:24:22 +08:00
2013-10-07 07:23:45 +08:00
# define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1)
2013-07-29 06:44:09 +08:00
2013-07-27 14:59:12 +08:00
/* A job_list is a list of jobs, separated by semicolons or newlines */
2013-07-26 06:24:22 +08:00
PRODUCTIONS ( job_list ) =
2013-07-27 14:59:12 +08:00
{
{ } ,
{ symbol_job , symbol_job_list } ,
{ parse_token_type_end , symbol_job_list }
} ;
RESOLVE ( job_list )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . type )
2013-07-26 06:24:22 +08:00
{
2013-07-27 14:59:12 +08:00
case parse_token_type_string :
2013-12-09 13:54:06 +08:00
// some keywords are special
2013-10-12 17:46:49 +08:00
switch ( token1 . keyword )
2013-07-27 14:59:12 +08:00
{
case parse_keyword_end :
case parse_keyword_else :
2013-12-09 05:41:12 +08:00
case parse_keyword_case :
2013-07-27 14:59:12 +08:00
// End this job list
return 0 ;
2013-07-26 06:24:22 +08:00
2013-07-27 14:59:12 +08:00
default :
// Normal string
return 1 ;
}
2013-07-26 06:24:22 +08:00
2013-07-27 14:59:12 +08:00
case parse_token_type_pipe :
case parse_token_type_redirection :
case parse_token_type_background :
return 1 ;
2013-07-26 06:24:22 +08:00
2013-07-27 14:59:12 +08:00
case parse_token_type_end :
// Empty line
return 2 ;
case parse_token_type_terminate :
// no more commands, just transition to empty
return 0 ;
default :
return NO_PRODUCTION ;
}
}
/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */
PRODUCTIONS ( job ) =
{
{ symbol_statement , symbol_job_continuation }
} ;
RESOLVE_ONLY ( job )
PRODUCTIONS ( job_continuation ) =
{
{ } ,
{ parse_token_type_pipe , symbol_statement , symbol_job_continuation }
} ;
RESOLVE ( job_continuation )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . type )
2013-07-27 14:59:12 +08:00
{
case parse_token_type_pipe :
// Pipe, continuation
return 1 ;
default :
// Not a pipe, no job continuation
return 0 ;
}
}
/* A statement is a normal command, or an if / while / and etc */
PRODUCTIONS ( statement ) =
{
{ symbol_boolean_statement } ,
{ symbol_block_statement } ,
{ symbol_if_statement } ,
{ symbol_switch_statement } ,
{ symbol_decorated_statement }
} ;
RESOLVE ( statement )
{
2014-01-14 18:29:53 +08:00
/* The only block-like builtin that takes any parameters is 'function' So go to decorated statements if the subsequent token looks like '--'.
The logic here is subtle :
If we are ' begin ' , then we expect to be invoked with no arguments .
If we are ' function ' , then we are a non - block if we are invoked with - h or - - help
If we are anything else , we require an argument , so do the same thing if the subsequent token is a statement terminator .
*/
2014-01-15 17:40:40 +08:00
2013-10-12 17:46:49 +08:00
if ( token1 . type = = parse_token_type_string )
2013-10-10 06:57:10 +08:00
{
2014-01-14 18:29:53 +08:00
// If we are a function, then look for help arguments
2014-01-15 07:39:53 +08:00
// Otherwise, if the next token looks like an option (starts with a dash), then parse it as a decorated statement
2014-01-14 18:29:53 +08:00
if ( token1 . keyword = = parse_keyword_function & & token2 . is_help_argument )
{
return 4 ;
}
else if ( token1 . keyword ! = parse_keyword_function & & token2 . has_dash_prefix )
2013-10-10 06:57:10 +08:00
{
2013-10-12 17:46:49 +08:00
return 4 ;
2013-10-10 06:57:10 +08:00
}
2014-01-15 17:40:40 +08:00
2013-10-12 17:46:49 +08:00
// Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if".
bool naked_invocation_invokes_help = ( token1 . keyword ! = parse_keyword_begin & & token1 . keyword ! = parse_keyword_end ) ;
2014-01-01 08:38:15 +08:00
if ( naked_invocation_invokes_help & & ( token2 . type = = parse_token_type_end | | token2 . type = = parse_token_type_terminate ) )
2013-10-12 17:46:49 +08:00
{
return 4 ;
}
2014-01-15 17:40:40 +08:00
2013-10-10 06:57:10 +08:00
}
2013-10-12 17:46:49 +08:00
switch ( token1 . type )
2013-07-27 14:59:12 +08:00
{
case parse_token_type_string :
2013-10-12 17:46:49 +08:00
switch ( token1 . keyword )
2013-07-27 14:59:12 +08:00
{
case parse_keyword_and :
case parse_keyword_or :
case parse_keyword_not :
return 0 ;
case parse_keyword_for :
case parse_keyword_while :
case parse_keyword_function :
case parse_keyword_begin :
return 1 ;
case parse_keyword_if :
return 2 ;
case parse_keyword_else :
return NO_PRODUCTION ;
case parse_keyword_switch :
return 3 ;
case parse_keyword_end :
return NO_PRODUCTION ;
2014-01-15 17:53:40 +08:00
// All other keywords fall through to decorated statement
default :
2013-07-27 14:59:12 +08:00
return 4 ;
}
break ;
case parse_token_type_pipe :
case parse_token_type_redirection :
case parse_token_type_background :
case parse_token_type_terminate :
return NO_PRODUCTION ;
//parse_error(L"statement", token);
default :
return NO_PRODUCTION ;
}
}
PRODUCTIONS ( if_statement ) =
{
2013-12-12 10:34:28 +08:00
{ symbol_if_clause , symbol_else_clause , symbol_end_command , symbol_arguments_or_redirections_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( if_statement )
PRODUCTIONS ( if_clause ) =
{
2013-10-07 07:23:45 +08:00
{ KEYWORD ( parse_keyword_if ) , symbol_job , parse_token_type_end , symbol_job_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( if_clause )
PRODUCTIONS ( else_clause ) =
{
{ } ,
2013-10-07 07:23:45 +08:00
{ KEYWORD ( parse_keyword_else ) , symbol_else_continuation }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE ( else_clause )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . keyword )
2013-07-27 14:59:12 +08:00
{
case parse_keyword_else :
return 1 ;
default :
return 0 ;
}
}
PRODUCTIONS ( else_continuation ) =
{
{ symbol_if_clause , symbol_else_clause } ,
{ parse_token_type_end , symbol_job_list }
} ;
RESOLVE ( else_continuation )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . keyword )
2013-07-27 14:59:12 +08:00
{
case parse_keyword_if :
return 0 ;
default :
return 1 ;
}
}
PRODUCTIONS ( switch_statement ) =
{
2013-12-27 19:58:42 +08:00
{ KEYWORD ( parse_keyword_switch ) , parse_token_type_string , parse_token_type_end , symbol_case_item_list , symbol_end_command , symbol_arguments_or_redirections_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( switch_statement )
PRODUCTIONS ( case_item_list ) =
{
{ } ,
{ symbol_case_item , symbol_case_item_list } ,
{ parse_token_type_end , symbol_case_item_list }
} ;
RESOLVE ( case_item_list )
{
2013-10-12 17:46:49 +08:00
if ( token1 . keyword = = parse_keyword_case ) return 1 ;
else if ( token1 . type = = parse_token_type_end ) return 2 ; //empty line
2013-07-27 14:59:12 +08:00
else return 0 ;
}
PRODUCTIONS ( case_item ) =
{
2013-10-07 07:23:45 +08:00
{ KEYWORD ( parse_keyword_case ) , symbol_argument_list , parse_token_type_end , symbol_job_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( case_item )
PRODUCTIONS ( argument_list ) =
{
{ } ,
2013-10-07 16:04:37 +08:00
{ symbol_argument , symbol_argument_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE ( argument_list )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . type )
2013-07-27 14:59:12 +08:00
{
2013-08-11 15:35:00 +08:00
case parse_token_type_string :
return 1 ;
default :
return 0 ;
2013-07-27 14:59:12 +08:00
}
}
PRODUCTIONS ( block_statement ) =
{
2013-12-12 10:34:28 +08:00
{ symbol_block_header , parse_token_type_end , symbol_job_list , symbol_end_command , symbol_arguments_or_redirections_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( block_statement )
PRODUCTIONS ( block_header ) =
{
{ symbol_for_header } ,
{ symbol_while_header } ,
{ symbol_function_header } ,
{ symbol_begin_header }
} ;
RESOLVE ( block_header )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . keyword )
2013-07-27 14:59:12 +08:00
{
case parse_keyword_for :
return 0 ;
case parse_keyword_while :
return 1 ;
case parse_keyword_function :
return 2 ;
case parse_keyword_begin :
return 3 ;
default :
return NO_PRODUCTION ;
}
}
PRODUCTIONS ( for_header ) =
{
2013-12-25 05:17:24 +08:00
{ KEYWORD ( parse_keyword_for ) , parse_token_type_string , KEYWORD ( parse_keyword_in ) , symbol_argument_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( for_header )
PRODUCTIONS ( while_header ) =
{
2013-12-25 05:17:24 +08:00
{ KEYWORD ( parse_keyword_while ) , symbol_job }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( while_header )
PRODUCTIONS ( begin_header ) =
{
2013-10-07 07:23:45 +08:00
{ KEYWORD ( parse_keyword_begin ) }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( begin_header )
PRODUCTIONS ( function_header ) =
{
2013-12-27 17:38:43 +08:00
{ KEYWORD ( parse_keyword_function ) , symbol_argument , symbol_argument_list }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE_ONLY ( function_header )
/* A boolean statement is AND or OR or NOT */
PRODUCTIONS ( boolean_statement ) =
{
2013-10-07 07:23:45 +08:00
{ KEYWORD ( parse_keyword_and ) , symbol_statement } ,
{ KEYWORD ( parse_keyword_or ) , symbol_statement } ,
{ KEYWORD ( parse_keyword_not ) , symbol_statement }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE ( boolean_statement )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . keyword )
2013-07-27 14:59:12 +08:00
{
case parse_keyword_and :
return 0 ;
case parse_keyword_or :
return 1 ;
case parse_keyword_not :
return 2 ;
default :
return NO_PRODUCTION ;
}
}
PRODUCTIONS ( decorated_statement ) =
{
2013-10-07 07:23:45 +08:00
{ symbol_plain_statement } ,
{ KEYWORD ( parse_keyword_command ) , symbol_plain_statement } ,
{ KEYWORD ( parse_keyword_builtin ) , symbol_plain_statement } ,
2014-02-14 02:08:04 +08:00
{ KEYWORD ( parse_keyword_exec ) , symbol_plain_statement }
2013-07-27 14:59:12 +08:00
} ;
RESOLVE ( decorated_statement )
{
2013-10-12 17:46:49 +08:00
/* If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the second token is not a string, then this is a naked 'command' and we should execute it as undecorated. */
if ( token2 . type ! = parse_token_type_string | | token2 . has_dash_prefix )
{
2013-10-10 06:57:10 +08:00
return 0 ;
2013-10-12 17:46:49 +08:00
}
2014-01-15 17:40:40 +08:00
2013-10-12 17:46:49 +08:00
switch ( token1 . keyword )
2013-07-27 14:59:12 +08:00
{
2013-10-07 07:23:45 +08:00
default :
2013-07-27 14:59:12 +08:00
return 0 ;
2013-10-07 07:23:45 +08:00
case parse_keyword_command :
2013-07-27 14:59:12 +08:00
return 1 ;
2013-10-07 07:23:45 +08:00
case parse_keyword_builtin :
2013-07-27 14:59:12 +08:00
return 2 ;
2014-02-14 02:08:04 +08:00
case parse_keyword_exec :
return 3 ;
2013-07-27 14:59:12 +08:00
}
}
PRODUCTIONS ( plain_statement ) =
{
{ parse_token_type_string , symbol_arguments_or_redirections_list , symbol_optional_background }
} ;
RESOLVE_ONLY ( plain_statement )
PRODUCTIONS ( arguments_or_redirections_list ) =
{
{ } ,
{ symbol_argument_or_redirection , symbol_arguments_or_redirections_list }
} ;
RESOLVE ( arguments_or_redirections_list )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . type )
2013-07-27 14:59:12 +08:00
{
case parse_token_type_string :
case parse_token_type_redirection :
return 1 ;
default :
return 0 ;
}
}
2013-07-26 06:24:22 +08:00
2013-07-29 06:19:38 +08:00
PRODUCTIONS ( argument_or_redirection ) =
{
2013-08-09 06:06:46 +08:00
{ symbol_argument } ,
2013-10-14 07:58:40 +08:00
{ symbol_redirection }
2013-07-29 06:19:38 +08:00
} ;
RESOLVE ( argument_or_redirection )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . type )
2013-07-29 06:19:38 +08:00
{
case parse_token_type_string :
return 0 ;
case parse_token_type_redirection :
return 1 ;
default :
return NO_PRODUCTION ;
}
}
2013-08-09 06:06:46 +08:00
PRODUCTIONS ( argument ) =
{
{ parse_token_type_string }
} ;
RESOLVE_ONLY ( argument )
PRODUCTIONS ( redirection ) =
{
2013-10-14 07:58:40 +08:00
{ parse_token_type_redirection , parse_token_type_string }
2013-08-09 06:06:46 +08:00
} ;
RESOLVE_ONLY ( redirection )
2013-07-29 06:19:38 +08:00
PRODUCTIONS ( optional_background ) =
{
{ } ,
{ parse_token_type_background }
} ;
RESOLVE ( optional_background )
{
2013-10-12 17:46:49 +08:00
switch ( token1 . type )
2013-07-29 06:19:38 +08:00
{
case parse_token_type_background :
return 1 ;
default :
return 0 ;
}
}
2013-12-12 10:34:28 +08:00
PRODUCTIONS ( end_command ) =
{
{ KEYWORD ( parse_keyword_end ) }
} ;
RESOLVE_ONLY ( end_command )
2013-07-29 06:19:38 +08:00
# define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break;
2013-10-12 17:46:49 +08:00
const production_t * parse_productions : : production_for_token ( parse_token_type_t node_type , const parse_token_t & input1 , const parse_token_t & input2 , production_option_idx_t * out_which_production , wcstring * out_error_text )
2013-07-29 06:19:38 +08:00
{
bool log_it = false ;
if ( log_it )
{
2013-10-12 17:46:49 +08:00
fprintf ( stderr , " Resolving production for %ls with input token <%ls> \n " , token_type_description ( node_type ) . c_str ( ) , input1 . describe ( ) . c_str ( ) ) ;
2013-07-29 06:19:38 +08:00
}
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
/* Fetch the list of productions and the function to resolve them */
const production_options_t * production_list = NULL ;
2013-10-12 17:46:49 +08:00
production_option_idx_t ( * resolver ) ( const parse_token_t & input1 , const parse_token_t & input2 ) = NULL ;
2013-07-29 06:19:38 +08:00
switch ( node_type )
{
2013-08-11 15:35:00 +08:00
TEST ( job_list )
TEST ( job )
TEST ( statement )
TEST ( job_continuation )
TEST ( boolean_statement )
TEST ( block_statement )
TEST ( if_statement )
TEST ( if_clause )
TEST ( else_clause )
TEST ( else_continuation )
TEST ( switch_statement )
TEST ( decorated_statement )
TEST ( case_item_list )
TEST ( case_item )
TEST ( argument_list )
TEST ( block_header )
TEST ( for_header )
TEST ( while_header )
TEST ( begin_header )
TEST ( function_header )
TEST ( plain_statement )
TEST ( arguments_or_redirections_list )
TEST ( argument_or_redirection )
TEST ( argument )
TEST ( redirection )
TEST ( optional_background )
2013-12-12 10:34:28 +08:00
TEST ( end_command )
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
case parse_token_type_string :
case parse_token_type_pipe :
case parse_token_type_redirection :
case parse_token_type_background :
case parse_token_type_end :
case parse_token_type_terminate :
fprintf ( stderr , " Terminal token type %ls passed to %s \n " , token_type_description ( node_type ) . c_str ( ) , __FUNCTION__ ) ;
PARSER_DIE ( ) ;
break ;
2013-08-11 15:35:00 +08:00
2013-08-09 06:06:46 +08:00
case parse_special_type_parse_error :
case parse_special_type_tokenizer_error :
case parse_special_type_comment :
fprintf ( stderr , " Special type %ls passed to %s \n " , token_type_description ( node_type ) . c_str ( ) , __FUNCTION__ ) ;
PARSER_DIE ( ) ;
break ;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
case token_type_invalid :
fprintf ( stderr , " token_type_invalid passed to %s \n " , __FUNCTION__ ) ;
PARSER_DIE ( ) ;
break ;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
}
PARSE_ASSERT ( production_list ! = NULL ) ;
PARSE_ASSERT ( resolver ! = NULL ) ;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
const production_t * result = NULL ;
2013-10-12 17:46:49 +08:00
production_option_idx_t which = resolver ( input1 , input2 ) ;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
if ( log_it )
{
fprintf ( stderr , " \t resolved to %u \n " , ( unsigned ) which ) ;
}
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
if ( which = = NO_PRODUCTION )
{
2013-07-29 06:44:09 +08:00
if ( log_it )
{
2013-10-12 17:46:49 +08:00
fprintf ( stderr , " Node type '%ls' has no production for input '%ls' (in %s) \n " , token_type_description ( node_type ) . c_str ( ) , input1 . describe ( ) . c_str ( ) , __FUNCTION__ ) ;
2013-07-29 06:44:09 +08:00
}
2013-07-29 06:19:38 +08:00
result = NULL ;
}
else
{
PARSE_ASSERT ( production_is_valid ( * production_list , which ) ) ;
result = & ( ( * production_list ) [ which ] ) ;
}
* out_which_production = which ;
return result ;
}
2013-10-12 17:46:49 +08:00