fish-shell/parse_productions.cpp

545 lines
14 KiB
C++
Raw Normal View History

2013-07-26 06:24:22 +08:00
#include "parse_productions.h"
using namespace parse_productions;
2013-07-29 06:19:38 +08:00
#define NO_PRODUCTION ((production_option_idx_t)(-1))
2013-07-26 06:24:22 +08:00
2013-07-29 06:19:38 +08:00
static bool production_is_empty(const production_t production)
{
return production[0] == token_type_invalid;
}
/* Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it */
2013-07-29 06:19:38 +08:00
static bool production_is_valid(const production_options_t production_list, production_option_idx_t which)
{
if (which < 0 || which >= MAX_PRODUCTIONS)
return false;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
bool nonempty_found = false;
for (int i=which; i < MAX_PRODUCTIONS; i++)
{
if (! production_is_empty(production_list[i]))
{
nonempty_found = true;
break;
}
}
return nonempty_found;
}
#define PRODUCTIONS(sym) static const production_options_t productions_##sym
#define RESOLVE(sym) static production_option_idx_t resolve_##sym (const parse_token_t &token1, const parse_token_t &token2)
#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (const parse_token_t &input1, const parse_token_t &input2) { return 0; }
2013-07-26 06:24:22 +08:00
2013-10-07 07:23:45 +08:00
#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1)
2013-07-29 06:44:09 +08:00
2013-07-27 14:59:12 +08:00
/* A job_list is a list of jobs, separated by semicolons or newlines */
2013-07-26 06:24:22 +08:00
PRODUCTIONS(job_list) =
2013-07-27 14:59:12 +08:00
{
{},
{symbol_job, symbol_job_list},
{parse_token_type_end, symbol_job_list}
};
RESOLVE(job_list)
{
switch (token1.type)
2013-07-26 06:24:22 +08:00
{
2013-07-27 14:59:12 +08:00
case parse_token_type_string:
// some keywords are special
switch (token1.keyword)
2013-07-27 14:59:12 +08:00
{
case parse_keyword_end:
case parse_keyword_else:
case parse_keyword_case:
2013-07-27 14:59:12 +08:00
// End this job list
return 0;
2013-07-26 06:24:22 +08:00
2013-07-27 14:59:12 +08:00
default:
// Normal string
return 1;
}
2013-07-26 06:24:22 +08:00
2013-07-27 14:59:12 +08:00
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
return 1;
2013-07-26 06:24:22 +08:00
2013-07-27 14:59:12 +08:00
case parse_token_type_end:
// Empty line
return 2;
case parse_token_type_terminate:
// no more commands, just transition to empty
return 0;
default:
return NO_PRODUCTION;
}
}
/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */
PRODUCTIONS(job) =
{
{symbol_statement, symbol_job_continuation}
};
RESOLVE_ONLY(job)
PRODUCTIONS(job_continuation) =
{
{},
{parse_token_type_pipe, symbol_statement, symbol_job_continuation}
};
RESOLVE(job_continuation)
{
switch (token1.type)
2013-07-27 14:59:12 +08:00
{
case parse_token_type_pipe:
// Pipe, continuation
return 1;
default:
// Not a pipe, no job continuation
return 0;
}
}
/* A statement is a normal command, or an if / while / and etc */
PRODUCTIONS(statement) =
{
{symbol_boolean_statement},
{symbol_block_statement},
{symbol_if_statement},
{symbol_switch_statement},
{symbol_decorated_statement}
};
RESOLVE(statement)
{
// Go to decorated statements if the subsequent token looks like '--'
// If we are 'begin', then we expect to be invoked with no arguments. But if we are anything else, we require an argument, so do the same thing if the subsequent token is a line end.
if (token1.type == parse_token_type_string)
{
// If the next token looks like an option (starts with a dash), then parse it as a decorated statement
if (token2.has_dash_prefix)
{
return 4;
}
// Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if".
bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end);
if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate))
{
return 4;
}
}
switch (token1.type)
2013-07-27 14:59:12 +08:00
{
case parse_token_type_string:
switch (token1.keyword)
2013-07-27 14:59:12 +08:00
{
case parse_keyword_and:
case parse_keyword_or:
case parse_keyword_not:
return 0;
case parse_keyword_for:
case parse_keyword_while:
case parse_keyword_function:
case parse_keyword_begin:
return 1;
case parse_keyword_if:
return 2;
case parse_keyword_else:
return NO_PRODUCTION;
case parse_keyword_switch:
return 3;
case parse_keyword_end:
return NO_PRODUCTION;
// 'in' is only special within a for_header
case parse_keyword_in:
case parse_keyword_none:
case parse_keyword_command:
case parse_keyword_builtin:
case parse_keyword_case:
return 4;
}
break;
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_terminate:
return NO_PRODUCTION;
//parse_error(L"statement", token);
default:
return NO_PRODUCTION;
}
}
PRODUCTIONS(if_statement) =
{
{symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(if_statement)
PRODUCTIONS(if_clause) =
{
2013-10-07 07:23:45 +08:00
{ KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list }
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(if_clause)
PRODUCTIONS(else_clause) =
{
{ },
2013-10-07 07:23:45 +08:00
{ KEYWORD(parse_keyword_else), symbol_else_continuation }
2013-07-27 14:59:12 +08:00
};
RESOLVE(else_clause)
{
switch (token1.keyword)
2013-07-27 14:59:12 +08:00
{
case parse_keyword_else:
return 1;
default:
return 0;
}
}
PRODUCTIONS(else_continuation) =
{
{symbol_if_clause, symbol_else_clause},
{parse_token_type_end, symbol_job_list}
};
RESOLVE(else_continuation)
{
switch (token1.keyword)
2013-07-27 14:59:12 +08:00
{
case parse_keyword_if:
return 0;
default:
return 1;
}
}
PRODUCTIONS(switch_statement) =
{
{ KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(switch_statement)
PRODUCTIONS(case_item_list) =
{
{},
{symbol_case_item, symbol_case_item_list},
{parse_token_type_end, symbol_case_item_list}
};
RESOLVE(case_item_list)
{
if (token1.keyword == parse_keyword_case) return 1;
else if (token1.type == parse_token_type_end) return 2; //empty line
2013-07-27 14:59:12 +08:00
else return 0;
}
PRODUCTIONS(case_item) =
{
2013-10-07 07:23:45 +08:00
{KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(case_item)
PRODUCTIONS(argument_list) =
{
{},
2013-10-07 16:04:37 +08:00
{symbol_argument, symbol_argument_list}
2013-07-27 14:59:12 +08:00
};
RESOLVE(argument_list)
{
switch (token1.type)
2013-07-27 14:59:12 +08:00
{
2013-08-11 15:35:00 +08:00
case parse_token_type_string:
return 1;
default:
return 0;
2013-07-27 14:59:12 +08:00
}
}
PRODUCTIONS(block_statement) =
{
{symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(block_statement)
PRODUCTIONS(block_header) =
{
{symbol_for_header},
{symbol_while_header},
{symbol_function_header},
{symbol_begin_header}
};
RESOLVE(block_header)
{
switch (token1.keyword)
2013-07-27 14:59:12 +08:00
{
case parse_keyword_for:
return 0;
case parse_keyword_while:
return 1;
case parse_keyword_function:
return 2;
case parse_keyword_begin:
return 3;
default:
return NO_PRODUCTION;
}
}
PRODUCTIONS(for_header) =
{
{KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_argument_list}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(for_header)
PRODUCTIONS(while_header) =
{
{KEYWORD(parse_keyword_while), symbol_job}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(while_header)
PRODUCTIONS(begin_header) =
{
2013-10-07 07:23:45 +08:00
{KEYWORD(parse_keyword_begin)}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(begin_header)
PRODUCTIONS(function_header) =
{
{KEYWORD(parse_keyword_function), symbol_argument, symbol_argument_list}
2013-07-27 14:59:12 +08:00
};
RESOLVE_ONLY(function_header)
/* A boolean statement is AND or OR or NOT */
PRODUCTIONS(boolean_statement) =
{
2013-10-07 07:23:45 +08:00
{KEYWORD(parse_keyword_and), symbol_statement},
{KEYWORD(parse_keyword_or), symbol_statement},
{KEYWORD(parse_keyword_not), symbol_statement}
2013-07-27 14:59:12 +08:00
};
RESOLVE(boolean_statement)
{
switch (token1.keyword)
2013-07-27 14:59:12 +08:00
{
case parse_keyword_and:
return 0;
case parse_keyword_or:
return 1;
case parse_keyword_not:
return 2;
default:
return NO_PRODUCTION;
}
}
PRODUCTIONS(decorated_statement) =
{
2013-10-07 07:23:45 +08:00
{symbol_plain_statement},
{KEYWORD(parse_keyword_command), symbol_plain_statement},
{KEYWORD(parse_keyword_builtin), symbol_plain_statement},
2013-07-27 14:59:12 +08:00
};
RESOLVE(decorated_statement)
{
/* If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the second token is not a string, then this is a naked 'command' and we should execute it as undecorated. */
if (token2.type != parse_token_type_string || token2.has_dash_prefix)
{
return 0;
}
switch (token1.keyword)
2013-07-27 14:59:12 +08:00
{
2013-10-07 07:23:45 +08:00
default:
2013-07-27 14:59:12 +08:00
return 0;
2013-10-07 07:23:45 +08:00
case parse_keyword_command:
2013-07-27 14:59:12 +08:00
return 1;
2013-10-07 07:23:45 +08:00
case parse_keyword_builtin:
2013-07-27 14:59:12 +08:00
return 2;
}
}
PRODUCTIONS(plain_statement) =
{
{parse_token_type_string, symbol_arguments_or_redirections_list, symbol_optional_background}
};
RESOLVE_ONLY(plain_statement)
PRODUCTIONS(arguments_or_redirections_list) =
{
{},
{symbol_argument_or_redirection, symbol_arguments_or_redirections_list}
};
RESOLVE(arguments_or_redirections_list)
{
switch (token1.type)
2013-07-27 14:59:12 +08:00
{
case parse_token_type_string:
case parse_token_type_redirection:
return 1;
default:
return 0;
}
}
2013-07-26 06:24:22 +08:00
2013-07-29 06:19:38 +08:00
PRODUCTIONS(argument_or_redirection) =
{
2013-08-09 06:06:46 +08:00
{symbol_argument},
{symbol_redirection}
2013-07-29 06:19:38 +08:00
};
RESOLVE(argument_or_redirection)
{
switch (token1.type)
2013-07-29 06:19:38 +08:00
{
case parse_token_type_string:
return 0;
case parse_token_type_redirection:
return 1;
default:
return NO_PRODUCTION;
}
}
2013-08-09 06:06:46 +08:00
PRODUCTIONS(argument) =
{
{parse_token_type_string}
};
RESOLVE_ONLY(argument)
PRODUCTIONS(redirection) =
{
{parse_token_type_redirection, parse_token_type_string}
2013-08-09 06:06:46 +08:00
};
RESOLVE_ONLY(redirection)
2013-07-29 06:19:38 +08:00
PRODUCTIONS(optional_background) =
{
{},
{ parse_token_type_background }
};
RESOLVE(optional_background)
{
switch (token1.type)
2013-07-29 06:19:38 +08:00
{
case parse_token_type_background:
return 1;
default:
return 0;
}
}
PRODUCTIONS(end_command) =
{
{KEYWORD(parse_keyword_end)}
};
RESOLVE_ONLY(end_command)
2013-07-29 06:19:38 +08:00
#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break;
const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text)
2013-07-29 06:19:38 +08:00
{
bool log_it = false;
if (log_it)
{
fprintf(stderr, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type).c_str(), input1.describe().c_str());
2013-07-29 06:19:38 +08:00
}
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
/* Fetch the list of productions and the function to resolve them */
const production_options_t *production_list = NULL;
production_option_idx_t (*resolver)(const parse_token_t &input1, const parse_token_t &input2) = NULL;
2013-07-29 06:19:38 +08:00
switch (node_type)
{
2013-08-11 15:35:00 +08:00
TEST(job_list)
TEST(job)
TEST(statement)
TEST(job_continuation)
TEST(boolean_statement)
TEST(block_statement)
TEST(if_statement)
TEST(if_clause)
TEST(else_clause)
TEST(else_continuation)
TEST(switch_statement)
TEST(decorated_statement)
TEST(case_item_list)
TEST(case_item)
TEST(argument_list)
TEST(block_header)
TEST(for_header)
TEST(while_header)
TEST(begin_header)
TEST(function_header)
TEST(plain_statement)
TEST(arguments_or_redirections_list)
TEST(argument_or_redirection)
TEST(argument)
TEST(redirection)
TEST(optional_background)
TEST(end_command)
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
case parse_token_type_string:
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_end:
case parse_token_type_terminate:
fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__);
PARSER_DIE();
break;
2013-08-11 15:35:00 +08:00
2013-08-09 06:06:46 +08:00
case parse_special_type_parse_error:
case parse_special_type_tokenizer_error:
case parse_special_type_comment:
fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__);
PARSER_DIE();
break;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
case token_type_invalid:
fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__);
PARSER_DIE();
break;
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
}
PARSE_ASSERT(production_list != NULL);
PARSE_ASSERT(resolver != NULL);
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
const production_t *result = NULL;
production_option_idx_t which = resolver(input1, input2);
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
if (log_it)
{
fprintf(stderr, "\tresolved to %u\n", (unsigned)which);
}
2013-08-11 15:35:00 +08:00
2013-07-29 06:19:38 +08:00
if (which == NO_PRODUCTION)
{
2013-07-29 06:44:09 +08:00
if (log_it)
{
fprintf(stderr, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type).c_str(), input1.describe().c_str(), __FUNCTION__);
2013-07-29 06:44:09 +08:00
}
2013-07-29 06:19:38 +08:00
result = NULL;
}
else
{
PARSE_ASSERT(production_is_valid(*production_list, which));
result = &((*production_list)[which]);
}
*out_which_production = which;
return result;
}