From 54f683fc6b41bd5575a8b3eb7307b69a1b725df6 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 26 May 2013 12:12:16 -0700 Subject: [PATCH 001/108] Initial work on parser bringup (squash) --- expression.cpp | 9 +++ expression.h | 103 +++++++++++++++++++++++++++++++++ fish.xcodeproj/project.pbxproj | 12 +++- 3 files changed, 121 insertions(+), 3 deletions(-) create mode 100644 expression.cpp create mode 100644 expression.h diff --git a/expression.cpp b/expression.cpp new file mode 100644 index 000000000..d8d5386e9 --- /dev/null +++ b/expression.cpp @@ -0,0 +1,9 @@ +// +// expression.cpp +// fish +// +// Created by Peter Ammon on 5/25/13. +// +// + +#include "expression.h" diff --git a/expression.h b/expression.h new file mode 100644 index 000000000..390c1bb79 --- /dev/null +++ b/expression.h @@ -0,0 +1,103 @@ +/**\file expression.h + + Programmatic representation of fish code. + +*/ + +#ifndef FISH_EXPRESSION_H +#define FISH_EXPRESSION_H + +#include + +#include "config.h" +#include "util.h" +#include "common.h" + + +/* Fish grammar: + +# A statement_list is a list of statements, separated by semicolons or newlines + + statement_list = | statement | statement statement_list + +# A statement is a normal job, or an if / while / and etc. + + statement = boolean_statement | block_statement | decorated_statement + +# A block is a conditional, loop, or begin/end + + block_statement = block_header statement_list END arguments_or_redirections_list + block_header = if_header | for_header | while_header | function_header | begin_header + if_header = IF statement + for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR + while_header = WHILE statement + begin_header = BEGIN STATEMENT_TERMINATOR + function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR + +# A boolean statement is AND or OR or NOT + + boolean_statement = AND statement | OR statement | NOT statement + +# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" + + decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement + plain_statement = command arguments_or_redirections_list terminator + + arguments_or_redirections_list = | argument_or_redirection + argument_or_redirection = redirection | + redirection = REDIRECTION + +*/ + + +class parse_command_t; + +/** Root of a parse tree */ +class parse_tree_t +{ + /** Literal source code */ + wcstring source; + + /** Initial node */ + parse_command_list_t *child; +}; + +/** Base class for nodes of a parse tree */ +class parse_node_base_t +{ + /* Backreference to the tree */ + parse_tree_t * const tree; + + /* Start in the source code */ + const unsigned int source_start; + + /* Length of our range in the source code */ + const unsigned int source_length; +}; + +class parse_statement_list_t : public parse_node_base_t +{ + std::vector statements; +}; + +class parse_statement_t : public parse_node_base_t +{ + +}; + +class parse_boolean_statement_t : public parse_statement_t +{ + +}; + +class parse_plain_statement_t : public parse_statement_t +{ + +}; + +class parse_block_statement_t : public parse_statement_t +{ + +}; + +#endif diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index b48ea5771..23b06b5ed 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -72,6 +72,7 @@ D07D266D15E33B86009E43F6 /* functions in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02815D1FEA100B9DB63 /* functions */; }; D07D266E15E33B86009E43F6 /* tools in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; D07D267215E34171009E43F6 /* config.fish in Copy Files */ = {isa = PBXBuildFile; fileRef = D0CBD580159EE48F0024809C /* config.fish */; }; + D07FEA311751E6AF003066C3 /* expression.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D07FEA2F1751E6AF003066C3 /* expression.cpp */; }; D0879AC816BF9AAB00E98E56 /* fish_term_icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; @@ -336,6 +337,8 @@ D03EE83814DF88B200FC7150 /* lru.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lru.h; sourceTree = ""; }; D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; + D07FEA2F1751E6AF003066C3 /* expression.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expression.cpp; sourceTree = ""; }; + D07FEA301751E6AF003066C3 /* expression.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = expression.h; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; D09B1C1914FC7B5B00F91077 /* postfork.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = postfork.cpp; sourceTree = ""; }; D09B1C1A14FC7B5B00F91077 /* postfork.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = postfork.h; sourceTree = ""; }; @@ -582,6 +585,8 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, + D07FEA301751E6AF003066C3 /* expression.h */, + D07FEA2F1751E6AF003066C3 /* expression.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -1102,6 +1107,7 @@ D0D02A7A15983916008E62BD /* env_universal.cpp in Sources */, D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, + D07FEA311751E6AF003066C3 /* expression.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1189,7 +1195,7 @@ "PREFIX=L\\\"/usr/local\\\"", "DATADIR=L\\\"/usr/local/share\\\"", "SYSCONFDIR=L\\\"/usr/local/etc\\\"", - "BINDIR=L\\\"/usr/local/bin\\\"", + "BINDIR=L\\\"/usr/local/bin\\\"", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; @@ -1339,7 +1345,7 @@ "PREFIX=L\\\"/usr/local\\\"", "DATADIR=L\\\"/usr/local/share\\\"", "SYSCONFDIR=L\\\"/usr/local/etc\\\"", - "BINDIR=L\\\"/usr/local/bin\\\"", + "BINDIR=L\\\"/usr/local/bin\\\"", ); GCC_SYMBOLS_PRIVATE_EXTERN = NO; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; @@ -1365,7 +1371,7 @@ "PREFIX=L\\\"/usr/local\\\"", "DATADIR=L\\\"/usr/local/share\\\"", "SYSCONFDIR=L\\\"/usr/local/etc\\\"", - "BINDIR=L\\\"/usr/local/bin\\\"", + "BINDIR=L\\\"/usr/local/bin\\\"", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES; From d54346b2055935cc54aa212e2dccd40c5487b18f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 1 Jun 2013 22:14:47 -0700 Subject: [PATCH 002/108] Stuff --- expression.cpp | 614 ++++++++++++++++++++++++++++++++++++++++++++++++- expression.h | 75 ++---- parser.cpp | 1 + tokenizer.h | 2 +- 4 files changed, 632 insertions(+), 60 deletions(-) diff --git a/expression.cpp b/expression.cpp index d8d5386e9..955621c60 100644 --- a/expression.cpp +++ b/expression.cpp @@ -1,9 +1,607 @@ -// -// expression.cpp -// fish -// -// Created by Peter Ammon on 5/25/13. -// -// - #include "expression.h" +#include +#include + +/* Fish grammar: + +# A statement_list is a list of statements, separated by semicolons or newlines + + statement_list = | + statement statement_list + +# A statement is a normal job, or an if / while / and etc. + + statement = boolean_statement | block_statement | decorated_statement + +# A block is a conditional, loop, or begin/end + + block_statement = block_header statement_list END arguments_or_redirections_list + block_header = if_header | for_header | while_header | function_header | begin_header + if_header = IF statement + for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR + while_header = WHILE statement + begin_header = BEGIN STATEMENT_TERMINATOR + function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR + +# A boolean statement is AND or OR or NOT + + boolean_statement = AND statement | OR statement | NOT statement + +# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" + + decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement + plain_statement = command arguments_or_redirections_list terminator + + arguments_or_redirections_list = | + argument_or_redirection arguments_or_redirections_list + argument_or_redirection = redirection | + redirection = + + terminator = | + +*/ + +#define PARSE_ASSERT(a) assert(a) + +#define PARSER_DIE() assert(0) + +#if 1 +class parse_command_t; + +enum parse_token_type_t +{ + token_type_invalid, + + // Non-terminal tokens + symbol_statement_list, + symbol_statement, + symbol_block_statement, + symbol_block_header, + symbol_if_header, + symbol_for_header, + symbol_while_header, + symbol_begin_header, + symbol_function_header, + symbol_boolean_statement, + symbol_decorated_statement, + symbol_plain_statement, + symbol_arguments_or_redirections_list, + symbol_argument_or_redirection, + + + // Terminal types + parse_token_type_string, + parse_token_type_pipe, + parse_token_type_redirection, + parse_token_background, + parse_token_type_end, + parse_token_type_terminate, +}; + +enum parse_keyword_t +{ + parse_keyword_none, + parse_keyword_if, + parse_keyword_else, + parse_keyword_for, + parse_keyword_in, + parse_keyword_while, + parse_keyword_begin, + parse_keyword_function, + parse_keyword_switch, + parse_keyword_end, + parse_keyword_and, + parse_keyword_or, + parse_keyword_not, + parse_keyword_command, + parse_keyword_builtin +}; + +struct parse_stack_element_t +{ + enum parse_token_type_t type; + enum parse_keyword_t keyword; + + // Construct a token type, with no keyword + parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) + { + } + + // Construct a string type from a keyword + parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k) + { + } +}; + +struct parse_token_t +{ + enum parse_token_type_t type; // The type of the token as represnted by the parser + enum token_type tokenizer_type; // The type of the token as represented by the tokenizer + enum parse_keyword_t keyword; // Any keyword represented by this parser + size_t source_start; + size_t source_end; +}; + +// Convert from tokenizer_t's token type to our token +static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) +{ + parse_token_t result = {}; + result.tokenizer_type = tokenizer_token_type; + switch (tokenizer_token_type) + { + case TOK_STRING: + result.type = parse_token_type_string; + break; + + case TOK_PIPE: + result.type = parse_token_type_pipe; + break; + + case TOK_END: + result.type = parse_token_type_end; + break; + + case TOK_BACKGROUND: + result.type = parse_token_background; + break; + + default: + fprintf(stderr, "Bad token type %d passed to %s\n", (int)tokenizer_token_type, __FUNCTION__); + assert(0); + break; + } + return result; +} + +/** Root of a parse tree */ +class parse_statement_list_t; +class parse_tree_t +{ + friend class parse_ll_t; + + parse_statement_list_t *root; +}; + +/** Base class for nodes of a parse tree */ +class parse_node_base_t +{ + /* Backreference to the tree */ + parse_tree_t * const tree; + + /* Type of the node */ + const enum parse_token_type_t type; + + /* Start in the source code */ + const unsigned int source_start; + + /* Length of our range in the source code */ + const unsigned int source_length; + + public: + parse_node_base_t(parse_tree_t *tr, parse_token_type_t ty) : tree(tr), type(ty), source_start(0), source_length(0) + { + } + + virtual ~parse_node_base_t() + { + } +}; + +class parse_statement_t; +class parse_statement_list_t : public parse_node_base_t +{ + std::vector statements; //deleted by destructor + public: + parse_statement_list_t(parse_tree_t *t) : parse_node_base_t(t, symbol_statement_list) + { + } +}; + +class parse_statement_t : public parse_node_base_t +{ + // abstract class + + public: + parse_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_node_base_t(t, ty) + { + } +}; + +class parse_boolean_statement_t : public parse_statement_t +{ + enum { + boolean_and, + boolean_or, + boolean_not + }; + parse_statement_t *subject; + + parse_boolean_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_boolean_statement) + { + } +}; + +class parse_plain_statement_t; +class parse_decorated_statement_t : public parse_statement_t +{ + enum { + decoration_command, + decoration_builtin + } decoration; + + parse_plain_statement_t *subject; + + parse_decorated_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_decorated_statement) + { + } + +}; + +class parse_plain_statement_t : public parse_statement_t +{ + wcstring_list_t arguments; + wcstring_list_t redirections; + + parse_plain_statement_t(parse_tree_t *t) : parse_statement_t(t, symbol_plain_statement) + { + } +}; + +class parse_block_statement_t : public parse_statement_t +{ + // abstract class + parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(t, ty) + { + } +}; + +class parse_ll_t +{ + friend class parse_t; + + std::stack symbol_stack; // LL parser stack + std::stack node_stack; // stack of nodes we are constructing; owned by the tree (not by us!) + parse_tree_t *tree; //tree we are constructing + + // Constructor + parse_ll_t() + { + this->tree = new parse_tree_t(); + tree->root = new parse_statement_list_t(this->tree);; + + symbol_stack.push(symbol_statement_list); // goal token + node_stack.push(tree->root); //outermost node + } + + // implementation of certain parser constructions + void accept_token(parse_token_t token); + void accept_token_statement_list(parse_token_t token); + void accept_token_statement(parse_token_t token); + void accept_token_block_header(parse_token_t token); + void accept_token_boolean_statement(parse_token_t token); + void accept_token_decorated_statement(parse_token_t token); + void accept_token_arguments_or_redirections_list(parse_token_t token); + void accept_token_argument_or_redirection(parse_token_t token); + + void token_unhandled(parse_token_t token, const char *function); + + void parse_error(const wchar_t *expected, parse_token_t token); + + parse_token_type_t stack_top_type() const + { + return symbol_stack.top().type; + } + + // Pop from the top of the symbol stack, then push. Note that these are pushed in reverse order, so the first argument will be on the top of the stack + inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) + { + symbol_stack.pop(); + if (tok5.type != token_type_invalid) symbol_stack.push(tok5); + if (tok4.type != token_type_invalid) symbol_stack.push(tok4); + if (tok3.type != token_type_invalid) symbol_stack.push(tok3); + if (tok2.type != token_type_invalid) symbol_stack.push(tok2); + if (tok1.type != token_type_invalid) symbol_stack.push(tok1); + } +}; + +void parse_ll_t::token_unhandled(parse_token_t token, const char *function) +{ + fprintf(stderr, "Unhandled token with type %d in function %s\n", (int)token.type, function); + PARSER_DIE(); +} + +void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) +{ + fprintf(stderr, "Expected a %ls, instead got a token of type %d\n", expected, (int)token.type); +} + +void parse_ll_t::accept_token_statement_list(parse_token_t token) +{ + PARSE_ASSERT(symbol_stack.top().type == symbol_statement_list); + switch (token.type) + { + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_background: + case parse_token_type_end: + symbol_stack_pop_push(symbol_statement, symbol_statement_list); + + break; + + case parse_token_type_terminate: + // no more commands, just transition to empty + symbol_stack_pop_push(); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_statement); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_and: + case parse_keyword_or: + case parse_keyword_not: + symbol_stack_pop_push(symbol_boolean_statement); + break; + + case parse_keyword_if: + case parse_keyword_else: + case parse_keyword_for: + case parse_keyword_in: + case parse_keyword_while: + case parse_keyword_begin: + case parse_keyword_function: + case parse_keyword_switch: + symbol_stack_pop_push(symbol_block_statement); + break; + + case parse_keyword_end: + // TODO + break; + + case parse_keyword_none: + case parse_keyword_command: + case parse_keyword_builtin: + symbol_stack_pop_push(symbol_decorated_statement); + break; + + } + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_background: + case parse_token_type_end: + case parse_token_type_terminate: + parse_error(L"command", token); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_block_header(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_block_header); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_if: + symbol_stack_pop_push(symbol_if_header); + break; + + case parse_keyword_else: + //todo + break; + + case parse_keyword_for: + symbol_stack_pop_push(symbol_for_header); + break; + + + case parse_keyword_while: + symbol_stack_pop_push(symbol_while_header); + break; + + case parse_keyword_begin: + symbol_stack_pop_push(symbol_begin_header); + break; + + case parse_keyword_function: + symbol_stack_pop_push(symbol_function_header); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + + } + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_boolean_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_and: + symbol_stack_pop_push(parse_keyword_and, symbol_statement); + break; + case parse_keyword_or: + symbol_stack_pop_push(parse_keyword_or, symbol_statement); + break; + case parse_keyword_not: + symbol_stack_pop_push(parse_keyword_not, symbol_statement); + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_decorated_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); + switch (token.type) + { + case parse_token_type_string: + switch (token.keyword) + { + case parse_keyword_command: + symbol_stack_pop_push(parse_keyword_command, symbol_statement); + break; + case parse_keyword_builtin: + symbol_stack_pop_push(parse_keyword_builtin, symbol_statement); + break; + default: + symbol_stack_pop_push(symbol_plain_statement); + break; + } + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); + switch (token.type) + { + case parse_token_type_string: + case parse_token_type_redirection: + symbol_stack_pop_push(symbol_argument_or_redirection, symbol_arguments_or_redirections_list); + break; + + default: + // Some other token, end of list + symbol_stack_pop_push(); + break; + } +} + +void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_argument_or_redirection); + switch (token.type) + { + case parse_token_type_string: + symbol_stack_pop_push(); + // Got an argument + break; + + case parse_token_type_redirection: + symbol_stack_pop_push(); + // Got a redirection + break; + + default: + token_unhandled(token, __FUNCTION__); + break; + } +} + +void parse_ll_t::accept_token(parse_token_t token) +{ + assert(! symbol_stack.empty()); + switch (stack_top_type()) + { + case symbol_statement_list: + accept_token_statement_list(token); + break; + + case symbol_statement: + accept_token_statement(token); + break; + + case symbol_block_statement: + symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); + break; + + case symbol_block_header: + accept_token_block_header(token); + break; + + case symbol_if_header: + break; + + case symbol_for_header: + symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_while_header: + symbol_stack_pop_push(parse_keyword_while, symbol_statement); + break; + + case symbol_begin_header: + symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); + break; + + case symbol_function_header: + symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_boolean_statement: + accept_token_boolean_statement(token); + break; + + case symbol_decorated_statement: + accept_token_decorated_statement(token); + break; + + case symbol_plain_statement: + symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_arguments_or_redirections_list: + accept_token_arguments_or_redirections_list(token); + break; + + case symbol_argument_or_redirection: + accept_token_argument_or_redirection(token); + break; + } +} +#endif + + +class parse_sr_t +{ + friend class parse_t; + + std::vector node_stack; + void accept_token(parse_token_t token); +}; + +parse_t::parse_t() : parser(new parse_sr_t()) +{ +} diff --git a/expression.h b/expression.h index 390c1bb79..c458800a7 100644 --- a/expression.h +++ b/expression.h @@ -12,13 +12,23 @@ #include "config.h" #include "util.h" #include "common.h" +#include "tokenizer.h" +class parse_ll_t; +class parse_sr_t; +class parse_t +{ + parse_sr_t * const parser; + parse_t(); +}; + /* Fish grammar: # A statement_list is a list of statements, separated by semicolons or newlines - statement_list = | statement | statement statement_list + statement_list = | + statement statement_list # A statement is a normal job, or an if / while / and etc. @@ -43,61 +53,24 @@ decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement plain_statement = command arguments_or_redirections_list terminator - arguments_or_redirections_list = | argument_or_redirection - argument_or_redirection = redirection | - redirection = REDIRECTION + arguments_or_redirections_list = | + argument_or_redirection arguments_or_redirections_list + argument_or_redirection = redirection | + redirection = + + terminator = | */ -class parse_command_t; +/* fish Shift-Reduce grammar: -/** Root of a parse tree */ -class parse_tree_t -{ - /** Literal source code */ - wcstring source; - - /** Initial node */ - parse_command_list_t *child; -}; + + IF <- if_statement + FOR <- for_statement + + -/** Base class for nodes of a parse tree */ -class parse_node_base_t -{ - /* Backreference to the tree */ - parse_tree_t * const tree; - - /* Start in the source code */ - const unsigned int source_start; - - /* Length of our range in the source code */ - const unsigned int source_length; -}; - -class parse_statement_list_t : public parse_node_base_t -{ - std::vector statements; -}; - -class parse_statement_t : public parse_node_base_t -{ - -}; - -class parse_boolean_statement_t : public parse_statement_t -{ - -}; - -class parse_plain_statement_t : public parse_statement_t -{ - -}; - -class parse_block_statement_t : public parse_statement_t -{ - -}; +*/ #endif diff --git a/parser.cpp b/parser.cpp index e0a79ea7b..89c1e31b8 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1311,6 +1311,7 @@ void parser_t::parse_job_argument_list(process_t *p, case TOK_BACKGROUND: { job_set_flag(j, JOB_FOREGROUND, 0); + // PCA note fall through, this is deliberate. The background modifier & terminates a command } case TOK_END: diff --git a/tokenizer.h b/tokenizer.h index 4357757dc..40390bcf8 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -22,7 +22,7 @@ enum token_type TOK_INVALID,/**< Invalid token */ TOK_STRING,/**< String token */ TOK_PIPE,/**< Pipe token */ - TOK_END,/**< End token */ + TOK_END,/**< End token (semicolon or newline, not literal end) */ TOK_REDIRECT_OUT, /**< redirection token */ TOK_REDIRECT_APPEND,/**< redirection append token */ TOK_REDIRECT_IN,/**< input redirection token */ From 99494afd08081fe90c16777117b2434c3051acc3 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 6 Jun 2013 21:49:40 -0700 Subject: [PATCH 003/108] Parser work. Not sure whether to have a different class for each node or not. --- expression.cpp | 283 ++++++++++++++++++++++++++++++++++++++++--------- expression.h | 2 +- 2 files changed, 234 insertions(+), 51 deletions(-) diff --git a/expression.cpp b/expression.cpp index 955621c60..bbc83fe10 100644 --- a/expression.cpp +++ b/expression.cpp @@ -97,21 +97,6 @@ enum parse_keyword_t parse_keyword_builtin }; -struct parse_stack_element_t -{ - enum parse_token_type_t type; - enum parse_keyword_t keyword; - - // Construct a token type, with no keyword - parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) - { - } - - // Construct a string type from a keyword - parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k) - { - } -}; struct parse_token_t { @@ -165,9 +150,6 @@ class parse_tree_t /** Base class for nodes of a parse tree */ class parse_node_base_t { - /* Backreference to the tree */ - parse_tree_t * const tree; - /* Type of the node */ const enum parse_token_type_t type; @@ -177,8 +159,12 @@ class parse_node_base_t /* Length of our range in the source code */ const unsigned int source_length; + protected: + /* Index of the production used */ + unsigned char branch; + public: - parse_node_base_t(parse_tree_t *tr, parse_token_type_t ty) : tree(tr), type(ty), source_start(0), source_length(0) + parse_node_base_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0) { } @@ -187,12 +173,25 @@ class parse_node_base_t } }; +class parse_node_t : public parse_node_base_t +{ + public: + parse_node_t *p1; + parse_node_t *p2; + uint32_t c1; + + parse_node_t(parse_token_type_t ty) : parse_node_base_t(ty), p1(NULL), p2(NULL), c1(0) + { + } +}; + class parse_statement_t; class parse_statement_list_t : public parse_node_base_t { - std::vector statements; //deleted by destructor public: - parse_statement_list_t(parse_tree_t *t) : parse_node_base_t(t, symbol_statement_list) + parse_statement_t *statement; + parse_statement_list_t *next; + parse_statement_list_t() : parse_node_base_t(symbol_statement_list), statement(NULL), next(NULL) { } }; @@ -200,49 +199,82 @@ class parse_statement_list_t : public parse_node_base_t class parse_statement_t : public parse_node_base_t { // abstract class - public: - parse_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_node_base_t(t, ty) + parse_statement_t(parse_token_type_t ty) : parse_node_base_t(ty) + { + } +}; + +class parse_abstract_statement_t : public parse_statement_t +{ + public: + parse_statement_t *subject; + parse_abstract_statement_t() : parse_statement_t(symbol_statement), subject(NULL) { } }; class parse_boolean_statement_t : public parse_statement_t { + public: enum { + boolean_invalid, boolean_and, boolean_or, boolean_not - }; - parse_statement_t *subject; + } condition; - parse_boolean_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_boolean_statement) + parse_boolean_statement_t() : parse_statement_t(symbol_boolean_statement), condition(boolean_invalid) { +#if 0 + switch (keyword) + { + case parse_keyword_and: + condition = boolean_and; + break; + + case parse_keyword_or: + condition = boolean_or; + break; + + case parse_keyword_not: + condition = boolean_not; + break; + + default: + PARSE_ASSERT(0 && "Unknown keyword"); + break; + } +#endif } }; class parse_plain_statement_t; class parse_decorated_statement_t : public parse_statement_t { +public: enum { + decoration_none, decoration_command, decoration_builtin } decoration; parse_plain_statement_t *subject; - parse_decorated_statement_t(parse_tree_t *t) : subject(NULL), parse_statement_t(t, symbol_decorated_statement) + parse_decorated_statement_t() : parse_statement_t(symbol_decorated_statement), subject(NULL), decoration(decoration_none) { } }; +class parse_string_t; class parse_plain_statement_t : public parse_statement_t { - wcstring_list_t arguments; - wcstring_list_t redirections; + parse_string_t *command; + parse_arguments_or_redirection_list_t *arguments_or_redirections_list; - parse_plain_statement_t(parse_tree_t *t) : parse_statement_t(t, symbol_plain_statement) + public: + parse_plain_statement_t() : parse_statement_t(symbol_plain_statement) { } }; @@ -250,27 +282,107 @@ class parse_plain_statement_t : public parse_statement_t class parse_block_statement_t : public parse_statement_t { // abstract class - parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(t, ty) + parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(ty) { } }; +class parse_string_t : public parse_node_base_t +{ +}; + +class parse_arguments_or_redirection_list_t : public parse_node_base_t +{ +}; + + +struct parse_stack_element_t +{ + enum parse_token_type_t type; + enum parse_keyword_t keyword; + parse_node_base_t *node; + + private: + void allocate_node(void) + { + assert(node == NULL); + switch (type) + { + // Set up our node + case symbol_statement_list: + node = new parse_statement_list_t(); + break; + + case symbol_statement: + node = new parse_abstract_statement_t(); + break; + + case symbol_block_statement: + case symbol_block_header: + case symbol_if_header: + case symbol_for_header: + case symbol_while_header: + case symbol_begin_header: + case symbol_function_header: + break; + + case symbol_boolean_statement: + node = new parse_boolean_statement_t(); + break; + + case symbol_decorated_statement: + node = new parse_decorated_statement_t(); + break; + + case symbol_plain_statement: + node = new parse_plain_statement_t(); + break; + + case symbol_arguments_or_redirections_list: + case symbol_argument_or_redirection: + + default: + ; + // nothing + } + } + + + public: + + // Construct a token type, with no keyword + parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) + { + allocate_node(); + } + + // Construct a string type from a keyword + parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k), node(NULL) + { + allocate_node(); + } +}; + +template +static T* cast_node(parse_node_base_t *node) +{ + return static_cast(node); +} + class parse_ll_t { friend class parse_t; - std::stack symbol_stack; // LL parser stack - std::stack node_stack; // stack of nodes we are constructing; owned by the tree (not by us!) + std::vector symbol_stack; // LL parser stack parse_tree_t *tree; //tree we are constructing // Constructor parse_ll_t() { this->tree = new parse_tree_t(); - tree->root = new parse_statement_list_t(this->tree);; - symbol_stack.push(symbol_statement_list); // goal token - node_stack.push(tree->root); //outermost node + symbol_stack.push_back(symbol_statement_list); // goal token + tree->root = stack_get_node_cast(0); } // implementation of certain parser constructions @@ -280,6 +392,7 @@ class parse_ll_t void accept_token_block_header(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); + void accept_token_plain_statement(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); @@ -289,18 +402,33 @@ class parse_ll_t parse_token_type_t stack_top_type() const { - return symbol_stack.top().type; + return symbol_stack.back().type; + } + + template + T* stack_get_node_cast(unsigned int idx) + { + assert(idx < symbol_stack.size()); + parse_node_base_t *base_node = symbol_stack.at(symbol_stack.size() - idx - 1).node; + return static_cast(base_node); + + } + + parse_node_base_t *stack_get_node(unsigned int idx) const + { + assert(idx < symbol_stack.size()); + return symbol_stack.at(symbol_stack.size() - idx - 1).node; } // Pop from the top of the symbol stack, then push. Note that these are pushed in reverse order, so the first argument will be on the top of the stack inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { - symbol_stack.pop(); - if (tok5.type != token_type_invalid) symbol_stack.push(tok5); - if (tok4.type != token_type_invalid) symbol_stack.push(tok4); - if (tok3.type != token_type_invalid) symbol_stack.push(tok3); - if (tok2.type != token_type_invalid) symbol_stack.push(tok2); - if (tok1.type != token_type_invalid) symbol_stack.push(tok1); + symbol_stack.pop_back(); + if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); + if (tok4.type != token_type_invalid) symbol_stack.push_back(tok4); + if (tok3.type != token_type_invalid) symbol_stack.push_back(tok3); + if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); + if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); } }; @@ -317,7 +445,8 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) void parse_ll_t::accept_token_statement_list(parse_token_t token) { - PARSE_ASSERT(symbol_stack.top().type == symbol_statement_list); + PARSE_ASSERT(stack_top_type() == symbol_statement_list); + parse_statement_list_t *list = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -326,7 +455,8 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) case parse_token_background: case parse_token_type_end: symbol_stack_pop_push(symbol_statement, symbol_statement_list); - + list->next = stack_get_node_cast(0); + list->statement = stack_get_node_cast(1); break; case parse_token_type_terminate: @@ -343,6 +473,7 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) void parse_ll_t::accept_token_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement); + parse_abstract_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -352,6 +483,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_or: case parse_keyword_not: symbol_stack_pop_push(symbol_boolean_statement); + statement->subject = stack_get_node_cast(0); break; case parse_keyword_if: @@ -363,6 +495,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_function: case parse_keyword_switch: symbol_stack_pop_push(symbol_block_statement); + assert(0 && "Need assignment"); break; case parse_keyword_end: @@ -373,6 +506,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_command: case parse_keyword_builtin: symbol_stack_pop_push(symbol_decorated_statement); + statement->subject = stack_get_node_cast(0); break; } @@ -441,18 +575,22 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); + parse_boolean_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_and: + statement->condition = parse_boolean_statement_t::boolean_and; symbol_stack_pop_push(parse_keyword_and, symbol_statement); break; case parse_keyword_or: + statement->condition = parse_boolean_statement_t::boolean_or; symbol_stack_pop_push(parse_keyword_or, symbol_statement); break; case parse_keyword_not: + statement->condition = parse_boolean_statement_t::boolean_not; symbol_stack_pop_push(parse_keyword_not, symbol_statement); break; @@ -470,19 +608,29 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); + parse_decorated_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_command: - symbol_stack_pop_push(parse_keyword_command, symbol_statement); + symbol_stack_pop_push(parse_keyword_command, symbol_plain_statement); + statement->subject = stack_get_node_cast(0); + statement->decoration = parse_decorated_statement_t::decoration_command; + break; + case parse_keyword_builtin: - symbol_stack_pop_push(parse_keyword_builtin, symbol_statement); + symbol_stack_pop_push(parse_keyword_builtin, symbol_plain_statement); + statement->subject = stack_get_node_cast(0); + statement->decoration = parse_decorated_statement_t::decoration_builtin; break; + default: symbol_stack_pop_push(symbol_plain_statement); + statement->subject = stack_get_node_cast(0); + statement->decoration = parse_decorated_statement_t::decoration_none; break; } @@ -492,6 +640,14 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token) } } +void parse_ll_t::accept_token_plain_statement(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); + parse_plain_statement_t *statement = stack_get_node_cast(0); + symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + statement-> +} + void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); @@ -579,7 +735,7 @@ void parse_ll_t::accept_token(parse_token_t token) break; case symbol_plain_statement: - symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + accept_token_plain_statement(token); break; case symbol_arguments_or_redirections_list: @@ -593,15 +749,42 @@ void parse_ll_t::accept_token(parse_token_t token) } #endif - +#if 0 class parse_sr_t { friend class parse_t; std::vector node_stack; void accept_token(parse_token_t token); + + void accept_token_string(parse_token_t token); }; -parse_t::parse_t() : parser(new parse_sr_t()) +void parse_sr_t::accept_token_string(parse_token_t token) +{ + assert(token.type == parse_token_type_string); +} + +void parse_sr_t::accept_token(parse_token_t token) +{ + // We are a SR parser. Our action depends on a combination of the top element(s) of our node stack and the token type. + // Switch on the token type to make progress + switch (token.type) + { + case parse_token_type_string: + accept_token_string(token); + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_background: + case parse_token_type_end: + case parse_token_type_terminate: + } +} + +#endif + +parse_t::parse_t() : parser(new parse_ll_t()) { } diff --git a/expression.h b/expression.h index c458800a7..bb79a2d04 100644 --- a/expression.h +++ b/expression.h @@ -19,7 +19,7 @@ class parse_ll_t; class parse_sr_t; class parse_t { - parse_sr_t * const parser; + parse_ll_t * const parser; parse_t(); }; From f3e5262dc0d0676e77374a77a0835329a20af03d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 8 Jun 2013 19:20:26 -0700 Subject: [PATCH 004/108] Parser limps into existence --- Makefile.in | 2 +- configure.ac | 2 +- expression.cpp | 665 ++++++++++++++++++++++++------------------------- expression.h | 5 + fish_tests.cpp | 12 +- tokenizer.h | 1 - 6 files changed, 350 insertions(+), 337 deletions(-) diff --git a/Makefile.in b/Makefile.in index c59ac0de3..61a9ef279 100644 --- a/Makefile.in +++ b/Makefile.in @@ -95,7 +95,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o + builtin_test.o expression.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/configure.ac b/configure.ac index 34f25e1f4..631ba9c7c 100644 --- a/configure.ac +++ b/configure.ac @@ -92,7 +92,7 @@ fi # So ensure this happens before we modify CXXFLAGS below # -AC_PROG_CXX([g++ c++]) +AC_PROG_CXX([clang++ g++ c++]) AC_PROG_CPP AC_PROG_INSTALL diff --git a/expression.cpp b/expression.cpp index bbc83fe10..75245bd13 100644 --- a/expression.cpp +++ b/expression.cpp @@ -1,7 +1,8 @@ #include "expression.h" -#include +#include "tokenizer.h" #include + /* Fish grammar: # A statement_list is a list of statements, separated by semicolons or newlines @@ -41,11 +42,12 @@ */ +typedef size_t node_offset_t; + #define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() assert(0) -#if 1 class parse_command_t; enum parse_token_type_t @@ -67,7 +69,6 @@ enum parse_token_type_t symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - // Terminal types parse_token_type_string, @@ -76,8 +77,42 @@ enum parse_token_type_t parse_token_background, parse_token_type_end, parse_token_type_terminate, + + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; +static wcstring token_type_description(parse_token_type_t type) +{ + switch (type) + { + case token_type_invalid: return L"invalid"; + + case symbol_statement_list: return L"statement_list"; + case symbol_statement: return L"statement"; + case symbol_block_statement: return L"block_statement"; + case symbol_block_header: return L"block_header"; + case symbol_if_header: return L"if_header"; + case symbol_for_header: return L"for_header"; + case symbol_while_header: return L"while_header"; + case symbol_begin_header: return L"begin_header"; + case symbol_function_header: return L"function_header"; + case symbol_boolean_statement: return L"boolean_statement"; + case symbol_decorated_statement: return L"decorated_statement"; + case symbol_plain_statement: return L"plain_statement"; + case symbol_arguments_or_redirections_list: return L"arguments_or_redirections_list"; + case symbol_argument_or_redirection: return L"argument_or_redirection"; + + case parse_token_type_string: return L"token_string"; + case parse_token_type_pipe: return L"token_pipe"; + case parse_token_type_redirection: return L"token_redirection"; + case parse_token_background: return L"token_background"; + case parse_token_type_end: return L"token_end"; + case parse_token_type_terminate: return L"token_terminate"; + + default: return format_string(L"Unknown token type %ld", static_cast(type)); + } +} + enum parse_keyword_t { parse_keyword_none, @@ -100,11 +135,11 @@ enum parse_keyword_t struct parse_token_t { - enum parse_token_type_t type; // The type of the token as represnted by the parser + enum parse_token_type_t type; // The type of the token as represented by the parser enum token_type tokenizer_type; // The type of the token as represented by the tokenizer enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; - size_t source_end; + size_t source_length; }; // Convert from tokenizer_t's token type to our token @@ -138,253 +173,114 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ return result; } -/** Root of a parse tree */ -class parse_statement_list_t; -class parse_tree_t -{ - friend class parse_ll_t; - - parse_statement_list_t *root; -}; - /** Base class for nodes of a parse tree */ -class parse_node_base_t +class parse_node_t { - /* Type of the node */ - const enum parse_token_type_t type; - + public: + /* Start in the source code */ - const unsigned int source_start; + size_t source_start; /* Length of our range in the source code */ - const unsigned int source_length; + size_t source_length; + + /* Children */ + node_offset_t child_start; + node_offset_t child_count; - protected: - /* Index of the production used */ - unsigned char branch; + /* Type-dependent data */ + uint32_t tag; - public: - parse_node_base_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0) + /* Type of the node */ + enum parse_token_type_t type; + + + /* Description */ + wcstring describe(void) const { + wcstring result = token_type_description(type); + return result; } - virtual ~parse_node_base_t() + /* Constructor */ + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } }; -class parse_node_t : public parse_node_base_t +static void dump_tree_recursive(const std::vector &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { - public: - parse_node_t *p1; - parse_node_t *p2; - uint32_t c1; + assert(start < nodes.size()); + const parse_node_t &node = nodes.at(start); - parse_node_t(parse_token_type_t ty) : parse_node_base_t(ty), p1(NULL), p2(NULL), c1(0) + append_format(*result, L"%2lu ", *line); + result->append(indent, L' ');; + result->append(node.describe()); + if (node.child_count > 0) { + append_format(*result, L" <%lu children>", node.child_count); } -}; - -class parse_statement_t; -class parse_statement_list_t : public parse_node_base_t -{ - public: - parse_statement_t *statement; - parse_statement_list_t *next; - parse_statement_list_t() : parse_node_base_t(symbol_statement_list), statement(NULL), next(NULL) + if (node.type == parse_token_type_string) { + result->append(L": \""); + result->append(src, node.source_start, node.source_length); + result->append(L"\""); } -}; - -class parse_statement_t : public parse_node_base_t -{ - // abstract class - public: - parse_statement_t(parse_token_type_t ty) : parse_node_base_t(ty) + result->push_back(L'\n'); + ++*line; + for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) { + dump_tree_recursive(nodes, src, child_idx, indent + 2, result, line); } -}; +} -class parse_abstract_statement_t : public parse_statement_t +static wcstring dump_tree(const std::vector &nodes, const wcstring &src) { - public: - parse_statement_t *subject; - parse_abstract_statement_t() : parse_statement_t(symbol_statement), subject(NULL) - { - } -}; - -class parse_boolean_statement_t : public parse_statement_t -{ - public: - enum { - boolean_invalid, - boolean_and, - boolean_or, - boolean_not - } condition; + if (nodes.empty()) + return L"(empty!)"; - parse_boolean_statement_t() : parse_statement_t(symbol_boolean_statement), condition(boolean_invalid) - { -#if 0 - switch (keyword) - { - case parse_keyword_and: - condition = boolean_and; - break; - - case parse_keyword_or: - condition = boolean_or; - break; - - case parse_keyword_not: - condition = boolean_not; - break; - - default: - PARSE_ASSERT(0 && "Unknown keyword"); - break; - } -#endif - } -}; - -class parse_plain_statement_t; -class parse_decorated_statement_t : public parse_statement_t -{ -public: - enum { - decoration_none, - decoration_command, - decoration_builtin - } decoration; - - parse_plain_statement_t *subject; - - parse_decorated_statement_t() : parse_statement_t(symbol_decorated_statement), subject(NULL), decoration(decoration_none) - { - } - -}; - -class parse_string_t; -class parse_plain_statement_t : public parse_statement_t -{ - parse_string_t *command; - parse_arguments_or_redirection_list_t *arguments_or_redirections_list; - - public: - parse_plain_statement_t() : parse_statement_t(symbol_plain_statement) - { - } -}; - -class parse_block_statement_t : public parse_statement_t -{ - // abstract class - parse_block_statement_t(parse_tree_t *t, parse_token_type_t ty) : parse_statement_t(ty) - { - } -}; - -class parse_string_t : public parse_node_base_t -{ -}; - -class parse_arguments_or_redirection_list_t : public parse_node_base_t -{ -}; + size_t line = 0; + wcstring result; + dump_tree_recursive(nodes, src, 0, 0, &result, &line); + return result; +} struct parse_stack_element_t { enum parse_token_type_t type; enum parse_keyword_t keyword; - parse_node_base_t *node; + node_offset_t node_idx; - private: - void allocate_node(void) + parse_stack_element_t(parse_token_type_t t) : type(t), keyword(parse_keyword_none), node_idx(-1) { - assert(node == NULL); - switch (type) - { - // Set up our node - case symbol_statement_list: - node = new parse_statement_list_t(); - break; - - case symbol_statement: - node = new parse_abstract_statement_t(); - break; - - case symbol_block_statement: - case symbol_block_header: - case symbol_if_header: - case symbol_for_header: - case symbol_while_header: - case symbol_begin_header: - case symbol_function_header: - break; - - case symbol_boolean_statement: - node = new parse_boolean_statement_t(); - break; - - case symbol_decorated_statement: - node = new parse_decorated_statement_t(); - break; - - case symbol_plain_statement: - node = new parse_plain_statement_t(); - break; - - case symbol_arguments_or_redirections_list: - case symbol_argument_or_redirection: - - default: - ; - // nothing - } - } - - - public: - - // Construct a token type, with no keyword - parse_stack_element_t(enum parse_token_type_t t) : type(t), keyword(parse_keyword_none) - { - allocate_node(); } - // Construct a string type from a keyword - parse_stack_element_t(enum parse_keyword_t k) : type(parse_token_type_string), keyword(k), node(NULL) + parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) { - allocate_node(); } }; -template -static T* cast_node(parse_node_base_t *node) -{ - return static_cast(node); -} - class parse_ll_t { friend class parse_t; std::vector symbol_stack; // LL parser stack - parse_tree_t *tree; //tree we are constructing + std::vector nodes; + bool errored; // Constructor - parse_ll_t() + parse_ll_t() : errored(false) { - this->tree = new parse_tree_t(); - - symbol_stack.push_back(symbol_statement_list); // goal token - tree->root = stack_get_node_cast(0); + // initial node + parse_stack_element_t elem = symbol_statement_list; + elem.node_idx = 0; + symbol_stack.push_back(elem); // goal token + nodes.push_back(parse_node_t(symbol_statement_list)); } + bool top_node_match_token(parse_token_t token); + // implementation of certain parser constructions void accept_token(parse_token_t token); void accept_token_statement_list(parse_token_t token); @@ -395,34 +291,63 @@ class parse_ll_t void accept_token_plain_statement(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); + bool accept_token_string(parse_token_t token); void token_unhandled(parse_token_t token, const char *function); void parse_error(const wchar_t *expected, parse_token_t token); + // Get the node corresponding to the top element of the stack + parse_node_t &node_for_top_symbol() + { + PARSE_ASSERT(! symbol_stack.empty()); + const parse_stack_element_t &top_symbol = symbol_stack.back(); + PARSE_ASSERT(top_symbol.node_idx != -1); + PARSE_ASSERT(top_symbol.node_idx < nodes.size()); + return nodes.at(top_symbol.node_idx); + } + parse_token_type_t stack_top_type() const { return symbol_stack.back().type; } - template - T* stack_get_node_cast(unsigned int idx) + void top_node_set_tag(uint32_t tag) { - assert(idx < symbol_stack.size()); - parse_node_base_t *base_node = symbol_stack.at(symbol_stack.size() - idx - 1).node; - return static_cast(base_node); + this->node_for_top_symbol().tag = tag; + } + + inline void add_child_to_node(size_t parent_node_idx, parse_stack_element_t *tok) + { + PARSE_ASSERT(tok->type != token_type_invalid); + tok->node_idx = nodes.size(); + nodes.push_back(parse_node_t(tok->type)); + nodes.at(parent_node_idx).child_count += 1; + } - } - parse_node_base_t *stack_get_node(unsigned int idx) const - { - assert(idx < symbol_stack.size()); - return symbol_stack.at(symbol_stack.size() - idx - 1).node; - } - - // Pop from the top of the symbol stack, then push. Note that these are pushed in reverse order, so the first argument will be on the top of the stack + // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { + // Get the node for the top symbol and tell it about its children + size_t node_idx = symbol_stack.back().node_idx; + parse_node_t &node = nodes.at(node_idx); + + // Should have no children yet + PARSE_ASSERT(node.child_count == 0); + + // Tell the node where its children start + node.child_start = nodes.size(); + + // Add nodes for the children + // Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) + if (tok1.type != token_type_invalid) add_child_to_node(node_idx, &tok1); + if (tok2.type != token_type_invalid) add_child_to_node(node_idx, &tok2); + if (tok3.type != token_type_invalid) add_child_to_node(node_idx, &tok3); + if (tok4.type != token_type_invalid) add_child_to_node(node_idx, &tok4); + if (tok5.type != token_type_invalid) add_child_to_node(node_idx, &tok5); + + // The above set the node_idx. Now replace the top of the stack. symbol_stack.pop_back(); if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); if (tok4.type != token_type_invalid) symbol_stack.push_back(tok4); @@ -446,7 +371,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) void parse_ll_t::accept_token_statement_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement_list); - parse_statement_list_t *list = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -455,8 +379,6 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) case parse_token_background: case parse_token_type_end: symbol_stack_pop_push(symbol_statement, symbol_statement_list); - list->next = stack_get_node_cast(0); - list->statement = stack_get_node_cast(1); break; case parse_token_type_terminate: @@ -473,7 +395,6 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) void parse_ll_t::accept_token_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement); - parse_abstract_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: @@ -483,7 +404,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_or: case parse_keyword_not: symbol_stack_pop_push(symbol_boolean_statement); - statement->subject = stack_get_node_cast(0); break; case parse_keyword_if: @@ -506,7 +426,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_command: case parse_keyword_builtin: symbol_stack_pop_push(symbol_decorated_statement); - statement->subject = stack_get_node_cast(0); break; } @@ -575,23 +494,16 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); - parse_boolean_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_and: - statement->condition = parse_boolean_statement_t::boolean_and; - symbol_stack_pop_push(parse_keyword_and, symbol_statement); - break; case parse_keyword_or: - statement->condition = parse_boolean_statement_t::boolean_or; - symbol_stack_pop_push(parse_keyword_or, symbol_statement); - break; case parse_keyword_not: - statement->condition = parse_boolean_statement_t::boolean_not; - symbol_stack_pop_push(parse_keyword_not, symbol_statement); + top_node_set_tag(token.keyword); + symbol_stack_pop_push(token.keyword, symbol_statement); break; default: @@ -608,31 +520,27 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - parse_decorated_statement_t *statement = stack_get_node_cast(0); switch (token.type) { case parse_token_type_string: switch (token.keyword) { case parse_keyword_command: + top_node_set_tag(parse_keyword_command); symbol_stack_pop_push(parse_keyword_command, symbol_plain_statement); - statement->subject = stack_get_node_cast(0); - statement->decoration = parse_decorated_statement_t::decoration_command; - break; case parse_keyword_builtin: + top_node_set_tag(parse_keyword_builtin); symbol_stack_pop_push(parse_keyword_builtin, symbol_plain_statement); - statement->subject = stack_get_node_cast(0); - statement->decoration = parse_decorated_statement_t::decoration_builtin; break; default: + top_node_set_tag(parse_keyword_none); symbol_stack_pop_push(symbol_plain_statement); - statement->subject = stack_get_node_cast(0); - statement->decoration = parse_decorated_statement_t::decoration_none; break; } + break; default: token_unhandled(token, __FUNCTION__); @@ -642,10 +550,8 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token) void parse_ll_t::accept_token_plain_statement(parse_token_t token) { - PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - parse_plain_statement_t *statement = stack_get_node_cast(0); + PARSE_ASSERT(stack_top_type() == symbol_plain_statement); symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); - statement-> } void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) @@ -671,12 +577,12 @@ void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) switch (token.type) { case parse_token_type_string: - symbol_stack_pop_push(); + symbol_stack_pop_push(parse_token_type_string); // Got an argument break; case parse_token_type_redirection: - symbol_stack_pop_push(); + symbol_stack_pop_push(parse_token_type_redirection); // Got a redirection break; @@ -686,105 +592,198 @@ void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) } } -void parse_ll_t::accept_token(parse_token_t token) +bool parse_ll_t::accept_token_string(parse_token_t token) { - assert(! symbol_stack.empty()); - switch (stack_top_type()) - { - case symbol_statement_list: - accept_token_statement_list(token); - break; - - case symbol_statement: - accept_token_statement(token); - break; - - case symbol_block_statement: - symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); - break; - - case symbol_block_header: - accept_token_block_header(token); - break; - - case symbol_if_header: - break; - - case symbol_for_header: - symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); - break; - - case symbol_while_header: - symbol_stack_pop_push(parse_keyword_while, symbol_statement); - break; - - case symbol_begin_header: - symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); - break; - - case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); - break; - - case symbol_boolean_statement: - accept_token_boolean_statement(token); - break; - - case symbol_decorated_statement: - accept_token_decorated_statement(token); - break; - - case symbol_plain_statement: - accept_token_plain_statement(token); - break; - - case symbol_arguments_or_redirections_list: - accept_token_arguments_or_redirections_list(token); - break; - - case symbol_argument_or_redirection: - accept_token_argument_or_redirection(token); - break; - } -} -#endif - -#if 0 -class parse_sr_t -{ - friend class parse_t; - - std::vector node_stack; - void accept_token(parse_token_t token); - - void accept_token_string(parse_token_t token); -}; - -void parse_sr_t::accept_token_string(parse_token_t token) -{ - assert(token.type == parse_token_type_string); -} - -void parse_sr_t::accept_token(parse_token_t token) -{ - // We are a SR parser. Our action depends on a combination of the top element(s) of our node stack and the token type. - // Switch on the token type to make progress + PARSE_ASSERT(stack_top_type() == parse_token_type_string); + bool result = false; switch (token.type) { case parse_token_type_string: - accept_token_string(token); + // Got our string + symbol_stack_pop_push(); + result = true; break; - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_background: - case parse_token_type_end: - case parse_token_type_terminate: + default: + token_unhandled(token, __FUNCTION__); + break; } + return result; } -#endif +bool parse_ll_t::top_node_match_token(parse_token_t token) +{ + PARSE_ASSERT(! symbol_stack.empty()); + PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); + bool result = false; + parse_stack_element_t &stack_top = symbol_stack.back(); + if (stack_top.type == token.type) + { + // So far so good. See if we need a particular keyword. + if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword) + { + // Success. Tell the node that it matched this token + parse_node_t &node = node_for_top_symbol(); + node.source_start = token.source_start; + node.source_length = token.source_length; + + // We consumed this symbol + symbol_stack.pop_back(); + result = true; + } + } + return result; +} + +void parse_ll_t::accept_token(parse_token_t token) +{ + PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); + PARSE_ASSERT(! symbol_stack.empty()); + bool consumed = false; + while (! consumed && ! this->errored) + { + fprintf(stderr, "Top type %ls\n", token_type_description(stack_top_type()).c_str()); + if (top_node_match_token(token)) + { + consumed = true; + break; + } + + switch (stack_top_type()) + { + /* Symbols */ + case symbol_statement_list: + accept_token_statement_list(token); + break; + + case symbol_statement: + accept_token_statement(token); + break; + + case symbol_block_statement: + symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); + break; + + case symbol_block_header: + accept_token_block_header(token); + break; + + case symbol_if_header: + break; + + case symbol_for_header: + symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_while_header: + symbol_stack_pop_push(parse_keyword_while, symbol_statement); + break; + + case symbol_begin_header: + symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); + break; + + case symbol_function_header: + symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); + break; + + case symbol_boolean_statement: + accept_token_boolean_statement(token); + break; + + case symbol_decorated_statement: + accept_token_decorated_statement(token); + break; + + case symbol_plain_statement: + accept_token_plain_statement(token); + break; + + case symbol_arguments_or_redirections_list: + accept_token_arguments_or_redirections_list(token); + break; + + case symbol_argument_or_redirection: + accept_token_argument_or_redirection(token); + break; + + /* Tokens */ + case parse_token_type_string: + consumed = accept_token_string(token); + break; + + default: + fprintf(stderr, "Bailing with token type %d\n", (int)token.type); + break; + } + } +} parse_t::parse_t() : parser(new parse_ll_t()) { } + +static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) +{ + parse_keyword_t result = parse_keyword_none; + if (tok == TOK_STRING) + { + + const struct { + const wchar_t *txt; + parse_keyword_t keyword; + } keywords[] = { + {L"if", parse_keyword_if}, + {L"else", parse_keyword_else}, + {L"for", parse_keyword_for}, + {L"in", parse_keyword_in}, + {L"while", parse_keyword_while}, + {L"begin", parse_keyword_begin}, + {L"function", parse_keyword_function}, + {L"switch", parse_keyword_switch}, + {L"end", parse_keyword_end}, + {L"and", parse_keyword_and}, + {L"or", parse_keyword_or}, + {L"not", parse_keyword_not}, + {L"command", parse_keyword_command}, + {L"builtin", parse_keyword_builtin} + }; + + for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) + { + if (! wcscmp(keywords[i].txt, tok_txt)) + { + result = keywords[i].keyword; + break; + } + } + } + return result; +} + +void parse_t::parse(const wcstring &str) +{ + tokenizer_t tok = tokenizer_t(str.c_str(), 0); + for (; tok_has_next(&tok); tok_next(&tok)) + { + token_type tok_type = static_cast(tok_last_type(&tok)); + const wchar_t *tok_txt = tok_last(&tok); + int tok_start = tok_get_pos(&tok); + + if (tok_type == TOK_ERROR) + { + fprintf(stderr, "Tokenizer error\n"); + break; + } + + parse_token_t token = parse_token_from_tokenizer_token(tok_type); + token.tokenizer_type = tok_type; + token.source_start = (size_t)tok_start; + token.source_length = wcslen(tok_txt); + token.keyword = keyword_for_token(tok_type, tok_txt); + this->parser->accept_token(token); + } + wcstring result = dump_tree(this->parser->nodes, str); + fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); + fprintf(stderr, "node size %ld", sizeof(parse_node_t)); +} diff --git a/expression.h b/expression.h index bb79a2d04..379314cdf 100644 --- a/expression.h +++ b/expression.h @@ -20,9 +20,14 @@ class parse_sr_t; class parse_t { parse_ll_t * const parser; + + public: parse_t(); + void parse(const wcstring &str); }; + + /* Fish grammar: # A statement_list is a list of statements, separated by semicolons or newlines diff --git a/fish_tests.cpp b/fish_tests.cpp index 5146e88e0..fb35f15ef 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -60,6 +60,7 @@ #include "postfork.h" #include "signal.h" #include "highlight.h" +#include "expression.h" /** The number of tests to run @@ -1717,6 +1718,12 @@ void history_tests_t::test_history_speed(void) delete hist; } +static void test_new_parser(void) +{ + say(L"Testing new parser!"); + parse_t parser; + parser.parse(L"echo hello"); +} /** Main test @@ -1733,12 +1740,15 @@ int main(int argc, char **argv) say(L"Lines beginning with '(ignore):' are not errors, they are warning messages\ngenerated by the fish parser library when given broken input, and can be\nignored. All actual errors begin with 'Error:'."); set_main_thread(); setup_fork_guards(); - proc_init(); + //proc_init(); event_init(); function_init(); builtin_init(); reader_init(); env_init(); + + test_new_parser(); + return 0; test_format(); test_escape(); diff --git a/tokenizer.h b/tokenizer.h index 40390bcf8..f2d6c0c0b 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -19,7 +19,6 @@ enum token_type { TOK_NONE, /**< Tokenizer not yet constructed */ TOK_ERROR, /**< Error reading token */ - TOK_INVALID,/**< Invalid token */ TOK_STRING,/**< String token */ TOK_PIPE,/**< Pipe token */ TOK_END,/**< End token (semicolon or newline, not literal end) */ From 048f08080d3d2fe60bcc6690c8ef124194980c32 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 9 Jun 2013 14:21:24 -0700 Subject: [PATCH 005/108] Rename expression.h to parse_tree.h and parse_exec.h --- Makefile.in | 2 +- fish.xcodeproj/project.pbxproj | 16 ++++--- fish_tests.cpp | 3 +- parse_exec.cpp | 1 + parse_exec.h | 11 +++++ expression.cpp => parse_tree.cpp | 76 +++++++++++++------------------- expression.h => parse_tree.h | 18 ++------ parser.cpp | 9 ++-- parser.h | 4 +- 9 files changed, 63 insertions(+), 77 deletions(-) create mode 100644 parse_exec.cpp create mode 100644 parse_exec.h rename expression.cpp => parse_tree.cpp (92%) rename expression.h => parse_tree.h (91%) diff --git a/Makefile.in b/Makefile.in index 61a9ef279..56e32e59d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -95,7 +95,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o expression.o + builtin_test.o parse_tree.o parse_exec.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 23b06b5ed..165fb2785 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -72,10 +72,10 @@ D07D266D15E33B86009E43F6 /* functions in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02815D1FEA100B9DB63 /* functions */; }; D07D266E15E33B86009E43F6 /* tools in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; D07D267215E34171009E43F6 /* config.fish in Copy Files */ = {isa = PBXBuildFile; fileRef = D0CBD580159EE48F0024809C /* config.fish */; }; - D07FEA311751E6AF003066C3 /* expression.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D07FEA2F1751E6AF003066C3 /* expression.cpp */; }; D0879AC816BF9AAB00E98E56 /* fish_term_icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; + D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; }; D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; @@ -337,8 +337,6 @@ D03EE83814DF88B200FC7150 /* lru.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lru.h; sourceTree = ""; }; D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; - D07FEA2F1751E6AF003066C3 /* expression.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expression.cpp; sourceTree = ""; }; - D07FEA301751E6AF003066C3 /* expression.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = expression.h; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; D09B1C1914FC7B5B00F91077 /* postfork.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = postfork.cpp; sourceTree = ""; }; D09B1C1A14FC7B5B00F91077 /* postfork.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = postfork.h; sourceTree = ""; }; @@ -444,6 +442,10 @@ D0B6B0FE14E88BA400AD6C10 /* color.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = color.cpp; sourceTree = ""; }; D0B6B0FF14E88BA400AD6C10 /* color.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = color.h; sourceTree = ""; }; D0C4FD9415A7D7EE00212EF1 /* config.fish */ = {isa = PBXFileReference; lastKnownFileType = text; name = config.fish; path = etc/config.fish; sourceTree = ""; }; + D0C52F331765281F00BFAB82 /* parse_exec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_exec.cpp; sourceTree = ""; }; + D0C52F341765281F00BFAB82 /* parse_exec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_exec.h; sourceTree = ""; }; + D0C52F351765284C00BFAB82 /* parse_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_tree.cpp; sourceTree = ""; }; + D0C52F361765284C00BFAB82 /* parse_tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree.h; sourceTree = ""; }; D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = autoload.cpp; sourceTree = ""; }; D0C6FCCB14CFA4B7004CE8AD /* autoload.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = autoload.h; sourceTree = ""; }; D0C861EA16CC7054003B5A04 /* builtin_set_color.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_set_color.cpp; sourceTree = ""; }; @@ -585,8 +587,10 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, - D07FEA301751E6AF003066C3 /* expression.h */, - D07FEA2F1751E6AF003066C3 /* expression.cpp */, + D0C52F361765284C00BFAB82 /* parse_tree.h */, + D0C52F351765284C00BFAB82 /* parse_tree.cpp */, + D0C52F341765281F00BFAB82 /* parse_exec.h */, + D0C52F331765281F00BFAB82 /* parse_exec.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -1107,7 +1111,7 @@ D0D02A7A15983916008E62BD /* env_universal.cpp in Sources */, D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, - D07FEA311751E6AF003066C3 /* expression.cpp in Sources */, + D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/fish_tests.cpp b/fish_tests.cpp index 69368993b..35c1e3277 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -60,7 +60,8 @@ #include "postfork.h" #include "signal.h" #include "highlight.h" -#include "expression.h" +#include "parse_tree.h" +#include "parse_exec.h" /** The number of tests to run diff --git a/parse_exec.cpp b/parse_exec.cpp new file mode 100644 index 000000000..5f4cf2d1d --- /dev/null +++ b/parse_exec.cpp @@ -0,0 +1 @@ +#include "parse_exec.h" diff --git a/parse_exec.h b/parse_exec.h new file mode 100644 index 000000000..50a0ddd68 --- /dev/null +++ b/parse_exec.h @@ -0,0 +1,11 @@ +/**\file parse_exec.h + + Programmatic execution of a parse tree +*/ + +#ifndef FISH_PARSE_TREE_H +#define FISH_PARSE_TREE_H + +#include "parse_tree.h" + +#endif diff --git a/expression.cpp b/parse_tree.cpp similarity index 92% rename from expression.cpp rename to parse_tree.cpp index 75245bd13..bdc3cf2bd 100644 --- a/expression.cpp +++ b/parse_tree.cpp @@ -2,47 +2,9 @@ #include "tokenizer.h" #include - -/* Fish grammar: - -# A statement_list is a list of statements, separated by semicolons or newlines - - statement_list = | - statement statement_list - -# A statement is a normal job, or an if / while / and etc. - - statement = boolean_statement | block_statement | decorated_statement - -# A block is a conditional, loop, or begin/end - - block_statement = block_header statement_list END arguments_or_redirections_list - block_header = if_header | for_header | while_header | function_header | begin_header - if_header = IF statement - for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR - while_header = WHILE statement - begin_header = BEGIN STATEMENT_TERMINATOR - function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR - -# A boolean statement is AND or OR or NOT - - boolean_statement = AND statement | OR statement | NOT statement - -# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" - - decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = command arguments_or_redirections_list terminator - - arguments_or_redirections_list = | - argument_or_redirection arguments_or_redirections_list - argument_or_redirection = redirection | - redirection = - - terminator = | - -*/ - +struct parse_node_t; typedef size_t node_offset_t; +typedef std::vector parse_node_tree_t; #define PARSE_ASSERT(a) assert(a) @@ -208,7 +170,7 @@ class parse_node_t } }; -static void dump_tree_recursive(const std::vector &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) +static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { assert(start < nodes.size()); const parse_node_t &node = nodes.at(start); @@ -234,7 +196,7 @@ static void dump_tree_recursive(const std::vector &nodes, const wc } } -static wcstring dump_tree(const std::vector &nodes, const wcstring &src) +static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) return L"(empty!)"; @@ -245,7 +207,6 @@ static wcstring dump_tree(const std::vector &nodes, const wcstring return result; } - struct parse_stack_element_t { enum parse_token_type_t type; @@ -261,12 +222,36 @@ struct parse_stack_element_t } }; +class parse_execution_context_t +{ + wcstring src; + const parse_node_tree_t nodes; + size_t node_idx; + + public: + parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) : src(s), nodes(n), node_idx(0) + { + } + + wcstring simulate(void); +}; + +wcstring parse_execution_context_t::simulate() +{ + if (nodes.empty()) + return L"(empty!"); + + PARSE_ASSERT(node_idx < nodes.size()); + PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); + +} + class parse_ll_t { friend class parse_t; std::vector symbol_stack; // LL parser stack - std::vector nodes; + parse_node_tree_t nodes; bool errored; // Constructor @@ -642,7 +627,6 @@ void parse_ll_t::accept_token(parse_token_t token) bool consumed = false; while (! consumed && ! this->errored) { - fprintf(stderr, "Top type %ls\n", token_type_description(stack_top_type()).c_str()); if (top_node_match_token(token)) { consumed = true; @@ -785,5 +769,5 @@ void parse_t::parse(const wcstring &str) } wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); - fprintf(stderr, "node size %ld", sizeof(parse_node_t)); + fprintf(stderr, "node size %ld\n", sizeof(parse_node_t)); } diff --git a/expression.h b/parse_tree.h similarity index 91% rename from expression.h rename to parse_tree.h index 379314cdf..9536479bd 100644 --- a/expression.h +++ b/parse_tree.h @@ -1,11 +1,10 @@ -/**\file expression.h +/**\file parse_tree.h Programmatic representation of fish code. - */ -#ifndef FISH_EXPRESSION_H -#define FISH_EXPRESSION_H +#ifndef FISH_PARSE_TREE_H +#define FISH_PARSE_TREE_H #include @@ -65,17 +64,6 @@ class parse_t terminator = | -*/ - - -/* fish Shift-Reduce grammar: - - - IF <- if_statement - FOR <- for_statement - - - */ #endif diff --git a/parser.cpp b/parser.cpp index 53c68c3cd..d2b0f0690 100644 --- a/parser.cpp +++ b/parser.cpp @@ -722,7 +722,7 @@ void parser_t::print_errors_stderr() } -int parser_t::eval_args(const wchar_t *line, std::vector &args) +void parser_t::eval_args(const wchar_t *line, std::vector &args) { expand_flags_t eflags = 0; @@ -731,10 +731,9 @@ int parser_t::eval_args(const wchar_t *line, std::vector &args) if (this->parser_type != PARSER_TYPE_GENERAL) eflags |= EXPAND_SKIP_CMDSUBST; - int do_loop=1; + bool do_loop=1; - CHECK(line, 1); -// CHECK( args, 1 ); + if (! line) return; // PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does. if (this->parser_type == PARSER_TYPE_GENERAL) @@ -803,8 +802,6 @@ int parser_t::eval_args(const wchar_t *line, std::vector &args) if (this->parser_type == PARSER_TYPE_GENERAL) proc_pop_interactive(); - - return 1; } void parser_t::stack_trace(block_t *b, wcstring &buff) diff --git a/parser.h b/parser.h index c2ad6c9b7..0c90641ba 100644 --- a/parser.h +++ b/parser.h @@ -396,7 +396,7 @@ public: /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. - The output is inserted into output, and should be freed by the caller. + The output is inserted into output. \param line Line to evaluate \param output List to insert output to @@ -405,7 +405,7 @@ public: \param line Line to evaluate \param output List to insert output to */ - int eval_args(const wchar_t *line, std::vector &output); + void eval_args(const wchar_t *line, std::vector &output); /** Sets the current evaluation error. This function should only be used by libraries that are called by From e2a506e54a297dcbd13cdb881936e50bbefceb28 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 11 Jun 2013 09:37:51 -0700 Subject: [PATCH 006/108] More work on new parser --- fish.xcodeproj/project.pbxproj | 2 + parse_exec.cpp | 147 +++++++++++++++++++++++++++++++++ parse_exec.h | 15 +++- parse_tree.cpp | 121 ++------------------------- parse_tree.h | 90 ++++++++++++++++++++ 5 files changed, 257 insertions(+), 118 deletions(-) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 165fb2785..6f6c83ba1 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -76,6 +76,7 @@ D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; + D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; }; D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; @@ -1112,6 +1113,7 @@ D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, + D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/parse_exec.cpp b/parse_exec.cpp index 5f4cf2d1d..30503857e 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -1 +1,148 @@ #include "parse_exec.h" +#include + + +struct exec_node_t +{ + node_offset_t parse_node_idx; + + exec_node_t(size_t pni) : parse_node_idx(pni) + { + } + + virtual ~exec_node_t(); +}; + +exec_node_t::~exec_node_t() +{ +} + +struct exec_redirection_t : public exec_node_t +{ + +}; + +struct exec_argument_t : public exec_node_t +{ + +}; + +struct exec_statement_t +{ + enum + { + decoration_plain, + decoration_command, + decoration_builtin + } decoration; + + std::vector arguments; + std::vector redirections; +}; + +class parse_exec_t +{ + parse_node_tree_t parse_tree; + wcstring src; + std::vector exec_nodes; + + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s) + { + } + + void pop_push(uint32_t child_idx) + { + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + PARSE_ASSERT(child_idx < parse_node.child_count); + node_offset_t child_node_idx = parse_node.child_start + child_idx; + exec_nodes.pop_back(); + exec_nodes.push_back(child_node_idx); + + } + + void simulate(void); + void enter_parse_node(size_t idx); + void run_top_node(void); +}; + +void parse_exec_t::run_top_node() +{ + PARSE_ASSERT(! exec_nodes.empty()); + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + + switch (parse_node.type) + { + case symbol_statement_list: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + if (parse_node.child_count == 0) + { + // Statement list done + exec_nodes.pop_back(); + } + else + { + // First child is a statement, next is the rest of the list + node_offset_t head = parse_node.child_start; + node_offset_t tail = parse_node.child_start + 1; + exec_nodes.pop_back(); + exec_nodes.push_back(tail); + exec_nodes.push_back(head); + } + break; + + case symbol_statement: + PARSE_ASSERT(parse_node.child_count == 1); + pop_push(0); + break; + + case decorated_statement: + PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); + pop_push(0); + break; + + default: + fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type); + PARSER_DIE(); + break; + + } +} + +void parse_exec_t::enter_parse_node(size_t idx) +{ + PARSE_ASSERT(idx < parse_tree.size()); + exec_node_t exec(idx); + exec_nodes.push_back(exec); +} + +wcstring parse_exec_t::simulate(void) +{ + PARSE_ASSERT(exec_nodes.empty()); + assemble_statement_list(0); + enter_parse_node(0); + run_node(); +} + +wcstring parse_execution_context_t::simulate() +{ + if (parse_tree.empty()) + return L"(empty!)"; + + PARSE_ASSERT(node_idx < nodes.size()); + PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); + + wcstring result; + +} + +parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) +{ + ctx = new parse_exec_t(n, s); +} + +wcstring parse_execution_context_t::simulate(void) +{ + return ctx->simulate(); +} diff --git a/parse_exec.h b/parse_exec.h index 50a0ddd68..635ebb836 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -3,9 +3,20 @@ Programmatic execution of a parse tree */ -#ifndef FISH_PARSE_TREE_H -#define FISH_PARSE_TREE_H +#ifndef FISH_PARSE_EXEC_H +#define FISH_PARSE_EXEC_H #include "parse_tree.h" +class parse_exec_t; +class parse_execution_context_t +{ + parse_exec_t *ctx; + + public: + parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); + wcstring simulate(void); +}; + + #endif diff --git a/parse_tree.cpp b/parse_tree.cpp index bdc3cf2bd..8c38ff0cd 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,48 +1,10 @@ -#include "expression.h" +#include "parse_tree.h" #include "tokenizer.h" #include -struct parse_node_t; -typedef size_t node_offset_t; -typedef std::vector parse_node_tree_t; - -#define PARSE_ASSERT(a) assert(a) - -#define PARSER_DIE() assert(0) class parse_command_t; -enum parse_token_type_t -{ - token_type_invalid, - - // Non-terminal tokens - symbol_statement_list, - symbol_statement, - symbol_block_statement, - symbol_block_header, - symbol_if_header, - symbol_for_header, - symbol_while_header, - symbol_begin_header, - symbol_function_header, - symbol_boolean_statement, - symbol_decorated_statement, - symbol_plain_statement, - symbol_arguments_or_redirections_list, - symbol_argument_or_redirection, - - // Terminal types - parse_token_type_string, - parse_token_type_pipe, - parse_token_type_redirection, - parse_token_background, - parse_token_type_end, - parse_token_type_terminate, - - FIRST_PARSE_TOKEN_TYPE = parse_token_type_string -}; - static wcstring token_type_description(parse_token_type_t type) { switch (type) @@ -75,25 +37,11 @@ static wcstring token_type_description(parse_token_type_t type) } } -enum parse_keyword_t +wcstring parse_node_t::describe(void) const { - parse_keyword_none, - parse_keyword_if, - parse_keyword_else, - parse_keyword_for, - parse_keyword_in, - parse_keyword_while, - parse_keyword_begin, - parse_keyword_function, - parse_keyword_switch, - parse_keyword_end, - parse_keyword_and, - parse_keyword_or, - parse_keyword_not, - parse_keyword_command, - parse_keyword_builtin -}; - + wcstring result = token_type_description(type); + return result; +} struct parse_token_t { @@ -135,41 +83,6 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ return result; } -/** Base class for nodes of a parse tree */ -class parse_node_t -{ - public: - - /* Start in the source code */ - size_t source_start; - - /* Length of our range in the source code */ - size_t source_length; - - /* Children */ - node_offset_t child_start; - node_offset_t child_count; - - /* Type-dependent data */ - uint32_t tag; - - /* Type of the node */ - enum parse_token_type_t type; - - - /* Description */ - wcstring describe(void) const - { - wcstring result = token_type_description(type); - return result; - } - - /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) - { - } -}; - static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { assert(start < nodes.size()); @@ -222,30 +135,6 @@ struct parse_stack_element_t } }; -class parse_execution_context_t -{ - wcstring src; - const parse_node_tree_t nodes; - size_t node_idx; - - public: - parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) : src(s), nodes(n), node_idx(0) - { - } - - wcstring simulate(void); -}; - -wcstring parse_execution_context_t::simulate() -{ - if (nodes.empty()) - return L"(empty!"); - - PARSE_ASSERT(node_idx < nodes.size()); - PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); - -} - class parse_ll_t { friend class parse_t; diff --git a/parse_tree.h b/parse_tree.h index 9536479bd..892c36cdd 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -12,6 +12,10 @@ #include "util.h" #include "common.h" #include "tokenizer.h" +#include + +#define PARSE_ASSERT(a) assert(a) +#define PARSER_DIE() assert(0) class parse_ll_t; @@ -25,6 +29,92 @@ class parse_t void parse(const wcstring &str); }; +class parse_node_t; +typedef std::vector parse_node_tree_t; +typedef size_t node_offset_t; + + +enum parse_token_type_t +{ + token_type_invalid, + + // Non-terminal tokens + symbol_statement_list, + symbol_statement, + symbol_block_statement, + symbol_block_header, + symbol_if_header, + symbol_for_header, + symbol_while_header, + symbol_begin_header, + symbol_function_header, + symbol_boolean_statement, + symbol_decorated_statement, + symbol_plain_statement, + symbol_arguments_or_redirections_list, + symbol_argument_or_redirection, + + // Terminal types + parse_token_type_string, + parse_token_type_pipe, + parse_token_type_redirection, + parse_token_background, + parse_token_type_end, + parse_token_type_terminate, + + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string +}; + +enum parse_keyword_t +{ + parse_keyword_none, + parse_keyword_if, + parse_keyword_else, + parse_keyword_for, + parse_keyword_in, + parse_keyword_while, + parse_keyword_begin, + parse_keyword_function, + parse_keyword_switch, + parse_keyword_end, + parse_keyword_and, + parse_keyword_or, + parse_keyword_not, + parse_keyword_command, + parse_keyword_builtin +}; + +/** Base class for nodes of a parse tree */ +class parse_node_t +{ + public: + + /* Type of the node */ + enum parse_token_type_t type; + + /* Start in the source code */ + size_t source_start; + + /* Length of our range in the source code */ + size_t source_length; + + /* Children */ + node_offset_t child_start; + node_offset_t child_count; + + /* Type-dependent data */ + uint32_t tag; + + + /* Description */ + wcstring describe(void) const; + + /* Constructor */ + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) + { + } +}; + /* Fish grammar: From 827a9d640c97b95931280e55b8567f69985c2ff1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 15 Jun 2013 14:32:38 -0700 Subject: [PATCH 007/108] Added parse builtin for testing. Lots of work on simulated execution. --- builtin.cpp | 28 ++++- fish_tests.cpp | 10 +- parse_exec.cpp | 289 +++++++++++++++++++++++++++++++++++++++++-------- parse_tree.cpp | 10 +- parse_tree.h | 11 +- 5 files changed, 295 insertions(+), 53 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 9796d356f..12b331b82 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -64,6 +64,8 @@ #include "expand.h" #include "path.h" #include "history.h" +#include "parse_tree.h" +#include "parse_exec.h" /** The default prompt for the read command @@ -3938,6 +3940,30 @@ static int builtin_history(parser_t &parser, wchar_t **argv) return STATUS_BUILTIN_ERROR; } +int builtin_parse(parser_t &parser, wchar_t **argv) +{ + std::vector txt; + for (;;) + { + char buff[256]; + ssize_t amt = read_loop(builtin_stdin, buff, sizeof buff); + if (amt <= 0) break; + txt.insert(txt.end(), buff, buff + amt); + } + if (! txt.empty()) + { + const wcstring src = str2wcstring(&txt.at(0), txt.size()); + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(src, &parse_tree); + parse_execution_context_t ctx(parse_tree, src); + stdout_buffer.append(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + stdout_buffer.append(simulation); + stdout_buffer.push_back(L'\n'); + } + return STATUS_BUILTIN_OK; +} /* END OF BUILTIN COMMANDS @@ -3985,6 +4011,7 @@ static const builtin_data_t builtin_datas[]= { L"jobs", &builtin_jobs, N_(L"Print currently running jobs") }, { L"not", &builtin_generic, N_(L"Negate exit status of job") }, { L"or", &builtin_generic, N_(L"Execute command if previous command failed") }, + { L"parse", &builtin_parse, N_(L"Try out the new parser") }, { L"printf", &builtin_printf, N_(L"Prints formatted text") }, { L"pwd", &builtin_pwd, N_(L"Print the working directory") }, { L"random", &builtin_random, N_(L"Generate random number") }, @@ -4144,4 +4171,3 @@ void builtin_pop_io(parser_t &parser) builtin_stdin = 0; } } - diff --git a/fish_tests.cpp b/fish_tests.cpp index 35c1e3277..20d79288b 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1722,8 +1722,16 @@ void history_tests_t::test_history_speed(void) static void test_new_parser(void) { say(L"Testing new parser!"); + const wcstring src = L"echo hello world"; + parse_node_tree_t parse_tree; parse_t parser; - parser.parse(L"echo hello"); + parser.parse(src, &parse_tree); + parse_execution_context_t ctx(parse_tree, src); + say(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + printf("%ls\n", simulation.c_str()); + + } /** diff --git a/parse_exec.cpp b/parse_exec.cpp index 30503857e..04601e5ba 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -1,34 +1,41 @@ #include "parse_exec.h" #include +typedef uint16_t sanity_id_t; +static sanity_id_t next_sanity_id() +{ + static sanity_id_t last_sanity_id; + return ++last_sanity_id; +} struct exec_node_t { node_offset_t parse_node_idx; + sanity_id_t command_sanity_id; exec_node_t(size_t pni) : parse_node_idx(pni) { } - virtual ~exec_node_t(); }; -exec_node_t::~exec_node_t() +struct exec_argument_t { -} + node_offset_t parse_node_idx; + sanity_id_t command_sanity_id; +}; -struct exec_redirection_t : public exec_node_t +struct exec_redirection_t { }; -struct exec_argument_t : public exec_node_t +struct exec_basic_statement_t { + // Node containing the command + node_offset_t command_idx; -}; - -struct exec_statement_t -{ + // Decoration enum { decoration_plain, @@ -38,68 +45,260 @@ struct exec_statement_t std::vector arguments; std::vector redirections; + uint16_t sanity_id; + + exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) + { + sanity_id = next_sanity_id(); + } + + void set_decoration(uint32_t k) + { + PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); + switch (k) + { + case parse_keyword_none: + decoration = decoration_plain; + break; + case parse_keyword_command: + decoration = decoration_command; + break; + case parse_keyword_builtin: + decoration = decoration_builtin; + break; + default: + PARSER_DIE(); + break; + } + + } }; class parse_exec_t { parse_node_tree_t parse_tree; wcstring src; + + bool simulating; + wcstring_list_t simulation_result; + + /* The stack of nodes as we execute them */ std::vector exec_nodes; - parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s) + /* The stack of commands being built */ + std::vector assembling_statements; + + void get_node_string(node_offset_t idx, wcstring *output) const { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.source_start <= src.size()); + PARSE_ASSERT(node.source_start + node.source_length <= src.size()); + output->assign(src, node.source_start, node.source_length); + } + + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) + { + PARSE_ASSERT(! exec_nodes.empty()); + if (child_count == 0) + { + // No children, just remove the top node + exec_nodes.pop_back(); + } + else + { + // Figure out the offset of the children + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + PARSE_ASSERT(child_idx < parse_node.child_count); + node_offset_t child_node_idx = parse_node.child_start + child_idx; + + // Remove the top node + exec_nodes.pop_back(); + + // Append the given children, backwards + sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; + node_offset_t cursor = child_count; + while (cursor--) + { + exec_nodes.push_back(child_node_idx + cursor); + exec_nodes.back().command_sanity_id = command_sanity_id; + } + } } - void pop_push(uint32_t child_idx) + void pop() + { + PARSE_ASSERT(! exec_nodes.empty()); + exec_nodes.pop_back(); + } + + void pop_push_all() { exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - PARSE_ASSERT(child_idx < parse_node.child_count); - node_offset_t child_node_idx = parse_node.child_start + child_idx; - exec_nodes.pop_back(); - exec_nodes.push_back(child_node_idx); + pop_push(0, parse_node.child_count); + } + + void assemble_command(node_offset_t idx) + { + // Set the command for our top basic statement + PARSE_ASSERT(! assembling_statements.empty()); + assembling_statements.back().command_idx = idx; + } + + void assemble_argument_or_redirection(node_offset_t idx) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(! assembling_statements.empty()); + exec_basic_statement_t &statement = assembling_statements.back(); + switch (node.type) + { + case parse_token_type_string: + // Argument + { + exec_argument_t arg = exec_argument_t(); + arg.parse_node_idx = idx; + arg.command_sanity_id = statement.sanity_id; + statement.arguments.push_back(arg); + } + break; + + case parse_token_type_redirection: + // Redirection + break; + + default: + PARSER_DIE(); + break; + } } - void simulate(void); + void assembly_complete() + { + // Finished building a command + PARSE_ASSERT(! assembling_statements.empty()); + const exec_basic_statement_t &statement = assembling_statements.back(); + + if (simulating) + { + simulate_statement(statement); + } + assembling_statements.pop_back(); + } + + void simulate_statement(const exec_basic_statement_t &statement) + { + PARSE_ASSERT(simulating); + wcstring line; + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; + + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + wcstring tmp; + get_node_string(statement.command_idx, &tmp); + line.append(L"cmd:"); + line.append(tmp); + for (size_t i=0; i < statement.arguments.size(); i++) + { + const exec_argument_t &arg = statement.arguments.at(i); + get_node_string(arg.parse_node_idx, &tmp); + line.append(L" "); + line.append(L"arg:"); + line.append(tmp); + } + simulation_result.push_back(line); + } + void enter_parse_node(size_t idx); void run_top_node(void); + exec_basic_statement_t *create_basic_statement(void); + + public: + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) + { + } + wcstring simulate(void); }; +exec_basic_statement_t *parse_exec_t::create_basic_statement() +{ + assembling_statements.push_back(exec_basic_statement_t()); + return &assembling_statements.back(); +} + void parse_exec_t::run_top_node() { PARSE_ASSERT(! exec_nodes.empty()); exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + bool log = false; + + if (log) + { + wcstring tmp; + tmp.append(exec_nodes.size(), L' '); + tmp.append(parse_node.describe()); + printf("%ls\n", tmp.c_str()); + } switch (parse_node.type) { case symbol_statement_list: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - if (parse_node.child_count == 0) - { - // Statement list done - exec_nodes.pop_back(); - } - else - { - // First child is a statement, next is the rest of the list - node_offset_t head = parse_node.child_start; - node_offset_t tail = parse_node.child_start + 1; - exec_nodes.pop_back(); - exec_nodes.push_back(tail); - exec_nodes.push_back(head); - } + pop_push_all(); break; case symbol_statement: PARSE_ASSERT(parse_node.child_count == 1); - pop_push(0); + pop_push_all(); break; - case decorated_statement: + case symbol_decorated_statement: + { PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); - pop_push(0); + exec_basic_statement_t *cmd = create_basic_statement(); + cmd->set_decoration(parse_node.tag); + + // Push the last node (skip any decoration) + pop_push(parse_node.child_count - 1, 1); + break; + } + + case symbol_plain_statement: + PARSE_ASSERT(parse_node.child_count == 3); + // Extract the command + PARSE_ASSERT(! assembling_statements.empty()); + assemble_command(parse_node.child_start + 0); + // Jump to statement list, then terminator + pop_push(1, 2); + break; + + case symbol_arguments_or_redirections_list: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + pop_push_all(); + break; + + case symbol_argument_or_redirection: + PARSE_ASSERT(parse_node.child_count == 1); + assemble_argument_or_redirection(parse_node.child_start + 0); + pop(); + break; + + case parse_token_type_end: + PARSE_ASSERT(parse_node.child_count == 0); + assembly_complete(); + pop(); break; default: @@ -118,23 +317,27 @@ void parse_exec_t::enter_parse_node(size_t idx) } wcstring parse_exec_t::simulate(void) -{ - PARSE_ASSERT(exec_nodes.empty()); - assemble_statement_list(0); - enter_parse_node(0); - run_node(); -} - -wcstring parse_execution_context_t::simulate() { if (parse_tree.empty()) return L"(empty!)"; - PARSE_ASSERT(node_idx < nodes.size()); - PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); + PARSE_ASSERT(exec_nodes.empty()); + simulating = true; + + enter_parse_node(0); + while (! exec_nodes.empty()) + { + run_top_node(); + } wcstring result; + for (size_t i=0; i < simulation_result.size(); i++) + { + result.append(simulation_result.at(i)); + result.append(L"\n"); + } + return result; } parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) diff --git a/parse_tree.cpp b/parse_tree.cpp index 8c38ff0cd..a58b0ff80 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -634,7 +634,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -void parse_t::parse(const wcstring &str) +void parse_t::parse(const wcstring &str, parse_node_tree_t *output) { tokenizer_t tok = tokenizer_t(str.c_str(), 0); for (; tok_has_next(&tok); tok_next(&tok)) @@ -658,5 +658,11 @@ void parse_t::parse(const wcstring &str) } wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); - fprintf(stderr, "node size %ld\n", sizeof(parse_node_t)); + fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); + + if (output != NULL) + { + output->swap(this->parser->nodes); + this->parser->nodes.clear(); + } } diff --git a/parse_tree.h b/parse_tree.h index 892c36cdd..0b63efa82 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -17,22 +17,21 @@ #define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() assert(0) +class parse_node_t; +typedef std::vector parse_node_tree_t; +typedef size_t node_offset_t; + class parse_ll_t; -class parse_sr_t; class parse_t { parse_ll_t * const parser; public: parse_t(); - void parse(const wcstring &str); + void parse(const wcstring &str, parse_node_tree_t *output); }; -class parse_node_t; -typedef std::vector parse_node_tree_t; -typedef size_t node_offset_t; - enum parse_token_type_t { From 4ca46b7883291fd78469369ce87dce451b73838b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 15 Jun 2013 15:21:35 -0700 Subject: [PATCH 008/108] Beginning support for new parser error messages --- builtin.cpp | 25 ++++++++++++++----- fish_tests.cpp | 19 +++++++++------ parse_tree.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++++------ parse_tree.h | 15 +++++++++++- 4 files changed, 103 insertions(+), 21 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 12b331b82..fe09f4f68 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3954,13 +3954,26 @@ int builtin_parse(parser_t &parser, wchar_t **argv) { const wcstring src = str2wcstring(&txt.at(0), txt.size()); parse_node_tree_t parse_tree; + parse_error_list_t errors; parse_t parser; - parser.parse(src, &parse_tree); - parse_execution_context_t ctx(parse_tree, src); - stdout_buffer.append(L"Simulating execution:"); - wcstring simulation = ctx.simulate(); - stdout_buffer.append(simulation); - stdout_buffer.push_back(L'\n'); + bool success = parser.parse(src, &parse_tree, &errors); + if (! success) + { + stdout_buffer.append(L"Parsing failed:\n"); + for (size_t i=0; i < errors.size(); i++) + { + stdout_buffer.append(errors.at(i).describe(src)); + stdout_buffer.push_back(L'\n'); + } + } + else + { + parse_execution_context_t ctx(parse_tree, src); + stdout_buffer.append(L"Simulating execution:\n"); + wcstring simulation = ctx.simulate(); + stdout_buffer.append(simulation); + stdout_buffer.push_back(L'\n'); + } } return STATUS_BUILTIN_OK; } diff --git a/fish_tests.cpp b/fish_tests.cpp index 20d79288b..85ead75ac 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1725,13 +1725,18 @@ static void test_new_parser(void) const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; parse_t parser; - parser.parse(src, &parse_tree); - parse_execution_context_t ctx(parse_tree, src); - say(L"Simulating execution:"); - wcstring simulation = ctx.simulate(); - printf("%ls\n", simulation.c_str()); - - + bool success = parser.parse(src, &parse_tree, NULL); + if (! success) + { + say(L"Parsing failed"); + } + else + { + parse_execution_context_t ctx(parse_tree, src); + say(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + say(simulation.c_str()); + } } /** diff --git a/parse_tree.cpp b/parse_tree.cpp index a58b0ff80..22a178408 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -3,7 +3,40 @@ #include -class parse_command_t; +wcstring parse_error_t::describe(const wcstring &src) const +{ + wcstring result = text; + if (source_start < src.size() && source_start + source_length <= src.size()) + { + // Locate the beginning of this line of source + size_t line_start = 0; + + // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline + size_t newline = src.find_last_of(L'\n', source_start); + if (newline != wcstring::npos) + { + line_start = newline + 1; + } + + size_t line_end = src.find(L'\n', source_start + source_length); + if (line_end == wcstring::npos) + { + line_end = src.size(); + } + assert(line_end >= line_start); + assert(source_start >= line_start); + + // Append the line of text + result.push_back(L'\n'); + result.append(src, line_start, line_end - line_start); + + // Append the caret line + result.push_back(L'\n'); + result.append(source_start - line_start, L' '); + result.push_back(L'^'); + } + return result; +} static wcstring token_type_description(parse_token_type_t type) { @@ -141,10 +174,12 @@ class parse_ll_t std::vector symbol_stack; // LL parser stack parse_node_tree_t nodes; - bool errored; + + bool fatal_errored; + parse_error_list_t errors; // Constructor - parse_ll_t() : errored(false) + parse_ll_t() : fatal_errored(false) { // initial node parse_stack_element_t elem = symbol_statement_list; @@ -170,6 +205,7 @@ class parse_ll_t void token_unhandled(parse_token_t token, const char *function); void parse_error(const wchar_t *expected, parse_token_t token); + void append_error_callout(wcstring &error_message, parse_token_t token); // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() @@ -239,7 +275,13 @@ void parse_ll_t::token_unhandled(parse_token_t token, const char *function) void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { - fprintf(stderr, "Expected a %ls, instead got a token of type %d\n", expected, (int)token.type); + wcstring desc = token_type_description(token.type); + parse_error_t error; + error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); + error.source_start = token.source_start; + error.source_start = token.source_length; + errors.push_back(error); + fatal_errored = true; } void parse_ll_t::accept_token_statement_list(parse_token_t token) @@ -514,7 +556,7 @@ void parse_ll_t::accept_token(parse_token_t token) PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(! symbol_stack.empty()); bool consumed = false; - while (! consumed && ! this->errored) + while (! consumed && ! this->fatal_errored) { if (top_node_match_token(token)) { @@ -634,10 +676,10 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -void parse_t::parse(const wcstring &str, parse_node_tree_t *output) +bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors) { tokenizer_t tok = tokenizer_t(str.c_str(), 0); - for (; tok_has_next(&tok); tok_next(&tok)) + for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok)) { token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); @@ -656,6 +698,7 @@ void parse_t::parse(const wcstring &str, parse_node_tree_t *output) token.keyword = keyword_for_token(tok_type, tok_txt); this->parser->accept_token(token); } + wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); @@ -665,4 +708,12 @@ void parse_t::parse(const wcstring &str, parse_node_tree_t *output) output->swap(this->parser->nodes); this->parser->nodes.clear(); } + + if (errors != NULL) + { + errors->swap(this->parser->errors); + this->parser->errors.clear(); + } + + return ! this->parser->fatal_errored; } diff --git a/parse_tree.h b/parse_tree.h index 0b63efa82..271f8cf80 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -21,6 +21,19 @@ class parse_node_t; typedef std::vector parse_node_tree_t; typedef size_t node_offset_t; +struct parse_error_t +{ + /** Text of the error */ + wcstring text; + + /** Offset and length of the token in the source code that triggered this error */ + size_t source_start; + size_t source_length; + + /** Return a string describing the error, suitable for presentation to the user */ + wcstring describe(const wcstring &src) const; +}; +typedef std::vector parse_error_list_t; class parse_ll_t; class parse_t @@ -29,7 +42,7 @@ class parse_t public: parse_t(); - void parse(const wcstring &str, parse_node_tree_t *output); + bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); }; From b771e97ac61d3377eb74f037db4424c6c4010729 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 18 Jun 2013 23:35:04 -0700 Subject: [PATCH 009/108] More work on simulating execution of parse trees. Pipes are totally screwed up. --- parse_exec.cpp | 15 +++++++++++- parse_tree.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++-------- parse_tree.h | 4 ++-- 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/parse_exec.cpp b/parse_exec.cpp index 04601e5ba..6ab90117e 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -260,9 +260,22 @@ void parse_exec_t::run_top_node() break; case symbol_statement: + { PARSE_ASSERT(parse_node.child_count == 1); - pop_push_all(); + // See if we're just an empty statement + const parse_node_t &child = parse_tree.at(parse_node.child_start + 0); + if (child.type == parse_token_type_end) + { + // Empty statement + pop(); + } + else + { + // We have a statement to execute + pop_push_all(); + } break; + } case symbol_decorated_statement: { diff --git a/parse_tree.cpp b/parse_tree.cpp index 22a178408..d2c34b3bb 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -121,8 +121,16 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & assert(start < nodes.size()); const parse_node_t &node = nodes.at(start); + const size_t spacesPerIndent = 2; + + // unindent statement lists by 1 to flatten them + if (node.type == symbol_statement_list || node.type == symbol_arguments_or_redirections_list) + { + if (indent > 0) indent -= 1; + } + append_format(*result, L"%2lu ", *line); - result->append(indent, L' ');; + result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); if (node.child_count > 0) { @@ -138,7 +146,7 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & ++*line; for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) { - dump_tree_recursive(nodes, src, child_idx, indent + 2, result, line); + dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line); } } @@ -239,6 +247,18 @@ class parse_ll_t // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { + + // Logging? + if (1) + { + fprintf(stderr, "Pop %ls\n", token_type_description(symbol_stack.back().type).c_str()); + if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok5.type).c_str()); + if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok4.type).c_str()); + if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok3.type).c_str()); + if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok2.type).c_str()); + if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok1.type).c_str()); + } + // Get the node for the top symbol and tell it about its children size_t node_idx = symbol_stack.back().node_idx; parse_node_t &node = nodes.at(node_idx); @@ -323,21 +343,38 @@ void parse_ll_t::accept_token_statement(parse_token_t token) break; case parse_keyword_if: - case parse_keyword_else: + symbol_stack_pop_push(symbol_if_header); + break; + case parse_keyword_for: - case parse_keyword_in: + symbol_stack_pop_push(symbol_for_header); + break; + case parse_keyword_while: + symbol_stack_pop_push(symbol_while_header); + break; + case parse_keyword_begin: + symbol_stack_pop_push(symbol_begin_header); + break; + case parse_keyword_function: + symbol_stack_pop_push(symbol_function_header); + break; + + case parse_keyword_else: case parse_keyword_switch: symbol_stack_pop_push(symbol_block_statement); - assert(0 && "Need assignment"); + fprintf(stderr, "Unimplemented type\n"); + PARSER_DIE(); break; case parse_keyword_end: // TODO break; - + + // 'in' is only special within a for_header + case parse_keyword_in: case parse_keyword_none: case parse_keyword_command: case parse_keyword_builtin: @@ -347,12 +384,16 @@ void parse_ll_t::accept_token_statement(parse_token_t token) } break; + case parse_token_type_end: + // Empty line, or just a semicolon + symbol_stack_pop_push(parse_token_type_end); + break; + case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: - case parse_token_type_end: case parse_token_type_terminate: - parse_error(L"command", token); + parse_error(L"statement", token); break; default: @@ -553,6 +594,10 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) void parse_ll_t::accept_token(parse_token_t token) { + if (1) + { + fprintf(stderr, "Accept token of type %ls\n", token_type_description(token.type).c_str()); + } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(! symbol_stack.empty()); bool consumed = false; @@ -628,7 +673,7 @@ void parse_ll_t::accept_token(parse_token_t token) break; default: - fprintf(stderr, "Bailing with token type %d\n", (int)token.type); + fprintf(stderr, "Bailing with token type %ls\n", token_type_description(token.type).c_str()); break; } } diff --git a/parse_tree.h b/parse_tree.h index 271f8cf80..b95e371a6 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -136,9 +136,9 @@ class parse_node_t statement_list = | statement statement_list -# A statement is a normal job, or an if / while / and etc. +# A statement is a normal job, or an if / while / and etc, or just a nothing (i.e. newline) - statement = boolean_statement | block_statement | decorated_statement + statement = boolean_statement | block_statement | decorated_statement | # A block is a conditional, loop, or begin/end From d840643cb33a12ea9433b12fe7443e15db2df75c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 23 Jun 2013 02:09:46 -0700 Subject: [PATCH 010/108] More work on new parser --- parse_exec.cpp | 328 ++++++++++++++++++++++++++++++++++++------------- parse_tree.cpp | 173 +++++++++++++++++++------- parse_tree.h | 43 +++++-- 3 files changed, 403 insertions(+), 141 deletions(-) diff --git a/parse_exec.cpp b/parse_exec.cpp index 6ab90117e..88fdd00a6 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -24,10 +24,18 @@ struct exec_argument_t node_offset_t parse_node_idx; sanity_id_t command_sanity_id; }; +typedef std::vector exec_argument_list_t; struct exec_redirection_t { + node_offset_t parse_node_idx; +}; +typedef std::vector exec_redirection_list_t; +struct exec_arguments_and_redirections_t +{ + exec_argument_list_t arguments; + exec_redirection_list_t redirections; }; struct exec_basic_statement_t @@ -35,6 +43,9 @@ struct exec_basic_statement_t // Node containing the command node_offset_t command_idx; + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + // Decoration enum { @@ -43,8 +54,6 @@ struct exec_basic_statement_t decoration_builtin } decoration; - std::vector arguments; - std::vector redirections; uint16_t sanity_id; exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) @@ -70,10 +79,48 @@ struct exec_basic_statement_t PARSER_DIE(); break; } - + } + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; } }; +struct exec_block_statement_t +{ + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; + } + +}; + +struct exec_job_t +{ + // List of statements (separated with pipes) + std::vector statements; + + void add_statement(const exec_basic_statement_t &statement) + { + statements.push_back(statement); + } +}; + + class parse_exec_t { parse_node_tree_t parse_tree; @@ -85,6 +132,9 @@ class parse_exec_t /* The stack of nodes as we execute them */ std::vector exec_nodes; + /* The stack of jobs being built */ + std::vector assembling_jobs; + /* The stack of commands being built */ std::vector assembling_statements; @@ -95,7 +145,39 @@ class parse_exec_t PARSE_ASSERT(node.source_start + node.source_length <= src.size()); output->assign(src, node.source_start, node.source_length); } + + const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const + { + return parse_tree.at(parent.child_offset(which)); + } + + void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID) + { + PARSE_ASSERT(! exec_nodes.empty()); + // Figure out the offset of the children + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + node_offset_t child_node_idx = parse_node.child_start; + // Remove the top node + exec_nodes.pop_back(); + + // Append the given children, backwards + sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; + const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; + for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) + { + node_offset_t idx = idxs[q]; + if (idx != (node_offset_t)(-1)) + { + PARSE_ASSERT(idx < parse_node.child_count); + exec_nodes.push_back(child_node_idx + idx); + exec_nodes.back().command_sanity_id = command_sanity_id; + } + } + + } + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) { PARSE_ASSERT(! exec_nodes.empty()); @@ -139,90 +221,130 @@ class parse_exec_t pop_push(0, parse_node.child_count); } - void assemble_command(node_offset_t idx) - { - // Set the command for our top basic statement - PARSE_ASSERT(! assembling_statements.empty()); - assembling_statements.back().command_idx = idx; - } - - void assemble_argument_or_redirection(node_offset_t idx) + void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const { const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(! assembling_statements.empty()); - exec_basic_statement_t &statement = assembling_statements.back(); - switch (node.type) + PARSE_ASSERT(output != NULL); + PARSE_ASSERT(node.type == symbol_argument_or_redirection); + PARSE_ASSERT(node.child_count == 1); + node_offset_t child_idx = node.child_offset(0); + const parse_node_t &child = parse_tree.at(child_idx); + switch (child.type) { case parse_token_type_string: // Argument { exec_argument_t arg = exec_argument_t(); arg.parse_node_idx = idx; - arg.command_sanity_id = statement.sanity_id; - statement.arguments.push_back(arg); + output->arguments.push_back(arg); } break; case parse_token_type_redirection: // Redirection + { + exec_redirection_t redirect = exec_redirection_t(); + redirect.parse_node_idx = idx; + output->redirections.push_back(redirect); + } break; default: PARSER_DIE(); break; } - } - void assembly_complete() + void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const { - // Finished building a command - PARSE_ASSERT(! assembling_statements.empty()); - const exec_basic_statement_t &statement = assembling_statements.back(); + node_offset_t idx = start_idx; + for (;;) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_arguments_or_redirections_list); + PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); + if (node.child_count == 0) + { + // No more children + break; + } + else + { + // Skip to next child + assemble_1_argument_or_redirection(node.child_offset(0), output); + idx = node.child_offset(1); + } + } + } + + void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_plain_statement); + PARSE_ASSERT(node.child_count == 2); + exec_basic_statement_t statement; + statement.set_decoration(decoration); + statement.command_idx = node.child_offset(0); + assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); + assembling_jobs.back().add_statement(statement); + } + + void job_assembly_complete() + { + PARSE_ASSERT(! assembling_jobs.empty()); + const exec_job_t &job = assembling_jobs.back(); if (simulating) { - simulate_statement(statement); + simulate_job(job); } - assembling_statements.pop_back(); + assembling_jobs.pop_back(); } - void simulate_statement(const exec_basic_statement_t &statement) + void simulate_job(const exec_job_t &job) { PARSE_ASSERT(simulating); wcstring line; - switch (statement.decoration) + for (size_t i=0; i < job.statements.size(); i++) { - case exec_basic_statement_t::decoration_builtin: - line.append(L" "); - break; - - case exec_basic_statement_t::decoration_command: - line.append(L" "); - break; + if (i > 0) + { + line.append(L" "); + } + const exec_basic_statement_t &statement = job.statements.at(i); + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; - default: - break; - } - - wcstring tmp; - get_node_string(statement.command_idx, &tmp); - line.append(L"cmd:"); - line.append(tmp); - for (size_t i=0; i < statement.arguments.size(); i++) - { - const exec_argument_t &arg = statement.arguments.at(i); - get_node_string(arg.parse_node_idx, &tmp); - line.append(L" "); - line.append(L"arg:"); + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + wcstring tmp; + get_node_string(statement.command_idx, &tmp); + line.append(L"cmd:"); line.append(tmp); + for (size_t i=0; i < statement.arguments().size(); i++) + { + const exec_argument_t &arg = statement.arguments().at(i); + get_node_string(arg.parse_node_idx, &tmp); + line.append(L" "); + line.append(L"arg:"); + line.append(tmp); + } } - simulation_result.push_back(line); + simulation_result.push_back(line); } void enter_parse_node(size_t idx); void run_top_node(void); - exec_basic_statement_t *create_basic_statement(void); + exec_job_t *create_job(void); public: parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) @@ -231,10 +353,10 @@ class parse_exec_t wcstring simulate(void); }; -exec_basic_statement_t *parse_exec_t::create_basic_statement() +exec_job_t *parse_exec_t::create_job() { - assembling_statements.push_back(exec_basic_statement_t()); - return &assembling_statements.back(); + assembling_jobs.push_back(exec_job_t()); + return &assembling_jobs.back(); } void parse_exec_t::run_top_node() @@ -242,7 +364,7 @@ void parse_exec_t::run_top_node() PARSE_ASSERT(! exec_nodes.empty()); exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - bool log = false; + bool log = true; if (log) { @@ -254,68 +376,102 @@ void parse_exec_t::run_top_node() switch (parse_node.type) { - case symbol_statement_list: + case symbol_job_list: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + if (parse_node.child_count == 0) + { + // No more jobs, done + pop(); + } + else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end) + { + // Empty job, so just skip it + pop_push(1, 1); + } + else + { + // Normal job + pop_push(0, 2); + } + break; + + case symbol_job: + { + PARSE_ASSERT(parse_node.child_count == 2); + exec_job_t *job = create_job(); pop_push_all(); break; - - case symbol_statement: - { - PARSE_ASSERT(parse_node.child_count == 1); - // See if we're just an empty statement - const parse_node_t &child = parse_tree.at(parse_node.child_start + 0); - if (child.type == parse_token_type_end) + } + + case symbol_job_continuation: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3); + if (parse_node.child_count == 0) { - // Empty statement + // All done with this job + job_assembly_complete(); pop(); } else { - // We have a statement to execute - pop_push_all(); + // Skip the pipe + pop_push(1, 2); } + break; + + case symbol_statement: + { + PARSE_ASSERT(parse_node.child_count == 1); + pop_push_all(); + break; + } + + case symbol_block_statement: + { + PARSE_ASSERT(parse_node.child_count == 5); + pop_push_specific(0, 2, 4); + break; + } + + case symbol_block_header: + { + PARSE_ASSERT(parse_node.child_count == 1); + pop_push_all(); + break; + } + + case symbol_function_header: + { + PARSE_ASSERT(parse_node.child_count == 3); + //pop_push_all(); + pop(); break; } case symbol_decorated_statement: { - PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); - exec_basic_statement_t *cmd = create_basic_statement(); - cmd->set_decoration(parse_node.tag); + PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); - // Push the last node (skip any decoration) - pop_push(parse_node.child_count - 1, 1); + node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1); + parse_keyword_t decoration = static_cast(parse_node.tag); + assemble_command_for_plain_statement(plain_statement_idx, decoration); + pop(); break; } - + + // The following symbols should be handled by their parents, i.e. never pushed on our stack case symbol_plain_statement: - PARSE_ASSERT(parse_node.child_count == 3); - // Extract the command - PARSE_ASSERT(! assembling_statements.empty()); - assemble_command(parse_node.child_start + 0); - // Jump to statement list, then terminator - pop_push(1, 2); - break; - case symbol_arguments_or_redirections_list: - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - pop_push_all(); - break; - case symbol_argument_or_redirection: - PARSE_ASSERT(parse_node.child_count == 1); - assemble_argument_or_redirection(parse_node.child_start + 0); - pop(); + PARSER_DIE(); break; case parse_token_type_end: PARSE_ASSERT(parse_node.child_count == 0); - assembly_complete(); pop(); break; default: - fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type); + fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), top.parse_node_idx); PARSER_DIE(); break; diff --git a/parse_tree.cpp b/parse_tree.cpp index d2c34b3bb..019afc53b 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -38,13 +38,16 @@ wcstring parse_error_t::describe(const wcstring &src) const return result; } -static wcstring token_type_description(parse_token_type_t type) +wcstring token_type_description(parse_token_type_t type) { switch (type) { case token_type_invalid: return L"invalid"; - case symbol_statement_list: return L"statement_list"; + case symbol_job_list: return L"job_list"; + case symbol_job: return L"job"; + case symbol_job_continuation: return L"job_continuation"; + case symbol_statement: return L"statement"; case symbol_block_statement: return L"block_statement"; case symbol_block_header: return L"block_header"; @@ -70,6 +73,30 @@ static wcstring token_type_description(parse_token_type_t type) } } +wcstring keyword_description(parse_keyword_t k) +{ + switch (k) + { + case parse_keyword_none: return L"none"; + case parse_keyword_if: return L"if"; + case parse_keyword_else: return L"else"; + case parse_keyword_for: return L"for"; + case parse_keyword_in: return L"in"; + case parse_keyword_while: return L"while"; + case parse_keyword_begin: return L"begin"; + case parse_keyword_function: return L"function"; + case parse_keyword_switch: return L"switch"; + case parse_keyword_end: return L"end"; + case parse_keyword_and: return L"and"; + case parse_keyword_or: return L"or"; + case parse_keyword_not: return L"not"; + case parse_keyword_command: return L"command"; + case parse_keyword_builtin: return L"builtin"; + default: + return format_string(L"Unknown keyword type %ld", static_cast(k)); + } +} + wcstring parse_node_t::describe(void) const { wcstring result = token_type_description(type); @@ -83,8 +110,20 @@ struct parse_token_t enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; size_t source_length; + + wcstring describe() const; }; +wcstring parse_token_t::describe(void) const +{ + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; +} + // Convert from tokenizer_t's token type to our token static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) { @@ -124,12 +163,12 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & const size_t spacesPerIndent = 2; // unindent statement lists by 1 to flatten them - if (node.type == symbol_statement_list || node.type == symbol_arguments_or_redirections_list) + if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list) { if (indent > 0) indent -= 1; } - append_format(*result, L"%2lu ", *line); + append_format(*result, L"%2lu - %l2u ", *line, start); result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); if (node.child_count > 0) @@ -190,17 +229,19 @@ class parse_ll_t parse_ll_t() : fatal_errored(false) { // initial node - parse_stack_element_t elem = symbol_statement_list; + parse_stack_element_t elem = symbol_job_list; elem.node_idx = 0; symbol_stack.push_back(elem); // goal token - nodes.push_back(parse_node_t(symbol_statement_list)); + nodes.push_back(parse_node_t(symbol_job_list)); } bool top_node_match_token(parse_token_t token); // implementation of certain parser constructions - void accept_token(parse_token_t token); - void accept_token_statement_list(parse_token_t token); + void accept_token(parse_token_t token, const wcstring &src); + void accept_token_job_list(parse_token_t token); + void accept_token_job(parse_token_t token); + void accept_token_job_continuation(parse_token_t token); void accept_token_statement(parse_token_t token); void accept_token_block_header(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); @@ -289,7 +330,7 @@ class parse_ll_t void parse_ll_t::token_unhandled(parse_token_t token, const char *function) { - fprintf(stderr, "Unhandled token with type %d in function %s\n", (int)token.type, function); + fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); PARSER_DIE(); } @@ -304,17 +345,33 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_statement_list(parse_token_t token) +void parse_ll_t::accept_token_job_list(parse_token_t token) { - PARSE_ASSERT(stack_top_type() == symbol_statement_list); + PARSE_ASSERT(stack_top_type() == symbol_job_list); switch (token.type) { case parse_token_type_string: + // 'end' is special + if (token.keyword == parse_keyword_end) + { + // End this job list + symbol_stack_pop_push(); + } + else + { + // Normal string + symbol_stack_pop_push(symbol_job, symbol_job_list); + } + break; + case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: + symbol_stack_pop_push(symbol_job, symbol_job_list); + break; + case parse_token_type_end: - symbol_stack_pop_push(symbol_statement, symbol_statement_list); + symbol_stack_pop_push(parse_token_type_end, symbol_job_list); break; case parse_token_type_terminate: @@ -328,6 +385,30 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token) } } +void parse_ll_t::accept_token_job(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_job); + symbol_stack_pop_push(symbol_statement, symbol_job_continuation); +} + +void parse_ll_t::accept_token_job_continuation(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_job_continuation); + switch (token.type) + { + case parse_token_type_pipe: + // Pipe, continuation + symbol_stack_pop_push(parse_token_type_pipe, symbol_statement, symbol_job_continuation); + break; + + default: + // Not a pipe, no job continuation + symbol_stack_pop_push(); + break; + } +} + + void parse_ll_t::accept_token_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_statement); @@ -341,25 +422,13 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_not: symbol_stack_pop_push(symbol_boolean_statement); break; - + case parse_keyword_if: - symbol_stack_pop_push(symbol_if_header); - break; - case parse_keyword_for: - symbol_stack_pop_push(symbol_for_header); - break; - case parse_keyword_while: - symbol_stack_pop_push(symbol_while_header); - break; - - case parse_keyword_begin: - symbol_stack_pop_push(symbol_begin_header); - break; - case parse_keyword_function: - symbol_stack_pop_push(symbol_function_header); + case parse_keyword_begin: + symbol_stack_pop_push(symbol_block_statement); break; case parse_keyword_else: @@ -370,7 +439,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) break; case parse_keyword_end: - // TODO + PARSER_DIE(); //todo break; // 'in' is only special within a for_header @@ -384,11 +453,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token) } break; - case parse_token_type_end: - // Empty line, or just a semicolon - symbol_stack_pop_push(parse_token_type_end); - break; - case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: @@ -415,7 +479,7 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) break; case parse_keyword_else: - //todo + PARSER_DIE(); //todo break; case parse_keyword_for: @@ -508,7 +572,7 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token) void parse_ll_t::accept_token_plain_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_plain_statement); - symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list); } void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) @@ -588,15 +652,23 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) symbol_stack.pop_back(); result = true; } + else if (token.type == parse_token_type_pipe) + { + // Pipes are primitive + symbol_stack.pop_back(); + result = true; + } } return result; } -void parse_ll_t::accept_token(parse_token_t token) +void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) { - if (1) + bool logit = true; + if (logit) { - fprintf(stderr, "Accept token of type %ls\n", token_type_description(token.type).c_str()); + const wcstring txt = wcstring(src, token.source_start, token.source_length); + fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(! symbol_stack.empty()); @@ -605,6 +677,10 @@ void parse_ll_t::accept_token(parse_token_t token) { if (top_node_match_token(token)) { + if (logit) + { + fprintf(stderr, "Consumed token %ls\n", token.describe().c_str()); + } consumed = true; break; } @@ -612,16 +688,24 @@ void parse_ll_t::accept_token(parse_token_t token) switch (stack_top_type()) { /* Symbols */ - case symbol_statement_list: - accept_token_statement_list(token); + case symbol_job_list: + accept_token_job_list(token); break; + case symbol_job: + accept_token_job(token); + break; + + case symbol_job_continuation: + accept_token_job_continuation(token); + break; + case symbol_statement: accept_token_statement(token); break; case symbol_block_statement: - symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); + symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list); break; case symbol_block_header: @@ -644,7 +728,7 @@ void parse_ll_t::accept_token(parse_token_t token) break; case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_arguments_or_redirections_list); break; case symbol_boolean_statement: @@ -673,7 +757,8 @@ void parse_ll_t::accept_token(parse_token_t token) break; default: - fprintf(stderr, "Bailing with token type %ls\n", token_type_description(token.type).c_str()); + fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str()); + exit_without_destructors(EXIT_FAILURE); break; } } @@ -741,7 +826,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ token.source_start = (size_t)tok_start; token.source_length = wcslen(tok_txt); token.keyword = keyword_for_token(tok_type, tok_txt); - this->parser->accept_token(token); + this->parser->accept_token(token, str); } wcstring result = dump_tree(this->parser->nodes, str); diff --git a/parse_tree.h b/parse_tree.h index b95e371a6..49ec6a132 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -20,6 +20,7 @@ class parse_node_t; typedef std::vector parse_node_tree_t; typedef size_t node_offset_t; +#define NODE_OFFSET_INVALID (static_cast(-1)) struct parse_error_t { @@ -51,7 +52,9 @@ enum parse_token_type_t token_type_invalid, // Non-terminal tokens - symbol_statement_list, + symbol_job_list, + symbol_job, + symbol_job_continuation, symbol_statement, symbol_block_statement, symbol_block_header, @@ -96,6 +99,9 @@ enum parse_keyword_t parse_keyword_builtin }; +wcstring token_type_description(parse_token_type_t type); +wcstring keyword_description(parse_keyword_t type); + /** Base class for nodes of a parse tree */ class parse_node_t { @@ -125,31 +131,46 @@ class parse_node_t explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } + + node_offset_t child_offset(node_offset_t which) const + { + PARSE_ASSERT(which < child_count); + return child_start + which; + } }; /* Fish grammar: -# A statement_list is a list of statements, separated by semicolons or newlines +# A job_list is a list of jobs, separated by semicolons or newlines - statement_list = | - statement statement_list + job_list = | + job_list | + job job_list -# A statement is a normal job, or an if / while / and etc, or just a nothing (i.e. newline) +# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation - statement = boolean_statement | block_statement | decorated_statement | + job = statement job_continuation + job_continuation = | + statement job_continuation + +# A statement is a normal command, or an if / while / and etc + + statement = boolean_statement | block_statement | decorated_statement # A block is a conditional, loop, or begin/end - block_statement = block_header statement_list END arguments_or_redirections_list + block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list block_header = if_header | for_header | while_header | function_header | begin_header if_header = IF statement - for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR + for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement - begin_header = BEGIN STATEMENT_TERMINATOR - function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR + begin_header = BEGIN + function_header = FUNCTION function_name arguments_or_redirections_list +#(TODO: functions should not support taking redirections in their arguments) + # A boolean statement is AND or OR or NOT boolean_statement = AND statement | OR statement | NOT statement @@ -157,7 +178,7 @@ class parse_node_t # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = command arguments_or_redirections_list terminator + plain_statement = command arguments_or_redirections_list arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list From 66af0c1a53b8108e6509a24921a068870120244c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 24 Jun 2013 12:33:40 -0700 Subject: [PATCH 011/108] More work on the AST. block statements worked out a bit more. --- builtin.cpp | 112 ++++++++++++++- parse_exec.cpp | 366 ++++++++++++++++++++----------------------------- parse_exec.h | 125 ++++++++++++++++- parse_tree.cpp | 2 + parse_tree.h | 8 +- proc.h | 6 - 6 files changed, 389 insertions(+), 230 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index fe09f4f68..d77d6361b 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3940,6 +3940,105 @@ static int builtin_history(parser_t &parser, wchar_t **argv) return STATUS_BUILTIN_ERROR; } +#pragma mark Simulator + +struct parse_execution_simulator_t : public parse_execution_visitor_t +{ + wcstring_list_t result; + + wcstring &back() + { + assert(! result.empty()); + return result.back(); + } + + void append_src(node_offset_t idx) + { + wcstring tmp; + context->get_source(idx, &tmp); + back().append(tmp); + } + + void append(const wchar_t *s) + { + back().append(s); + } + + bool enter_job_list(void) + { + return true; + } + + bool enter_job(void) + { + result.resize(result.size() + 1); + return true; + } + + void visit_statement(void) + { + } + + virtual void visit_boolean_statement(void) + { + } + + virtual void enter_if_header(const if_header_t &statement) + { + } + + virtual void exit_if_header(const if_header_t &statement) + { + append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); + } + + void visit_basic_statement(const exec_basic_statement_t &statement) + { + wcstring &line = this->back(); + if (! line.empty()) + { + line.append(L" "); + } + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; + + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + line.append(L"cmd:"); + this->append_src(statement.command_idx); + for (size_t i=0; i < statement.arguments().size(); i++) + { + const exec_argument_t &arg = statement.arguments().at(i); + append(L" "); + append(L"arg:"); + append_src(arg.parse_node_idx); + } + } + + void visit_function(const exec_function_header_t &function) { + wcstring &line = this->back(); + line.append(L"define function: "); + wcstring tmp; + context->get_source(function.name_idx, &tmp); + line.append(tmp); + } + + void exit_job_list(void) + { + } +}; + + + int builtin_parse(parser_t &parser, wchar_t **argv) { std::vector txt; @@ -3969,10 +4068,17 @@ int builtin_parse(parser_t &parser, wchar_t **argv) else { parse_execution_context_t ctx(parse_tree, src); + parse_execution_simulator_t sim; + sim.context = &ctx; + while (ctx.visit_next_node(&sim)) + { + } stdout_buffer.append(L"Simulating execution:\n"); - wcstring simulation = ctx.simulate(); - stdout_buffer.append(simulation); - stdout_buffer.push_back(L'\n'); + for (size_t i=0; i < sim.result.size(); i++) + { + stdout_buffer.append(sim.result.at(i)); + stdout_buffer.push_back(L'\n'); + } } } return STATUS_BUILTIN_OK; diff --git a/parse_exec.cpp b/parse_exec.cpp index 88fdd00a6..f12f757a7 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -1,124 +1,25 @@ #include "parse_exec.h" #include -typedef uint16_t sanity_id_t; -static sanity_id_t next_sanity_id() -{ - static sanity_id_t last_sanity_id; - return ++last_sanity_id; -} - struct exec_node_t { node_offset_t parse_node_idx; - sanity_id_t command_sanity_id; + node_offset_t body_parse_node_idx; + bool visited; - exec_node_t(size_t pni) : parse_node_idx(pni) + explicit exec_node_t(node_offset_t pni) : parse_node_idx(pni), body_parse_node_idx(NODE_OFFSET_INVALID), visited(false) { } + explicit exec_node_t(node_offset_t pni, node_offset_t body_pni) : parse_node_idx(pni), body_parse_node_idx(body_pni), visited(false) + { + } }; -struct exec_argument_t +exec_basic_statement_t::exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) { - node_offset_t parse_node_idx; - sanity_id_t command_sanity_id; -}; -typedef std::vector exec_argument_list_t; - -struct exec_redirection_t -{ - node_offset_t parse_node_idx; -}; -typedef std::vector exec_redirection_list_t; - -struct exec_arguments_and_redirections_t -{ - exec_argument_list_t arguments; - exec_redirection_list_t redirections; -}; - -struct exec_basic_statement_t -{ - // Node containing the command - node_offset_t command_idx; - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; - - // Decoration - enum - { - decoration_plain, - decoration_command, - decoration_builtin - } decoration; - - uint16_t sanity_id; - - exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) - { - sanity_id = next_sanity_id(); - } - - void set_decoration(uint32_t k) - { - PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); - switch (k) - { - case parse_keyword_none: - decoration = decoration_plain; - break; - case parse_keyword_command: - decoration = decoration_command; - break; - case parse_keyword_builtin: - decoration = decoration_builtin; - break; - default: - PARSER_DIE(); - break; - } - } - - const exec_argument_list_t &arguments() const - { - return arguments_and_redirections.arguments; - } - - const exec_redirection_list_t &redirections() const - { - return arguments_and_redirections.redirections; - } -}; - -struct exec_block_statement_t -{ - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; - - const exec_argument_list_t &arguments() const - { - return arguments_and_redirections.arguments; - } - - const exec_redirection_list_t &redirections() const - { - return arguments_and_redirections.redirections; - } - -}; - -struct exec_job_t -{ - // List of statements (separated with pipes) - std::vector statements; - - void add_statement(const exec_basic_statement_t &statement) - { - statements.push_back(statement); - } -}; +} class parse_exec_t @@ -126,26 +27,15 @@ class parse_exec_t parse_node_tree_t parse_tree; wcstring src; - bool simulating; - wcstring_list_t simulation_result; - /* The stack of nodes as we execute them */ std::vector exec_nodes; - /* The stack of jobs being built */ - std::vector assembling_jobs; - /* The stack of commands being built */ std::vector assembling_statements; - void get_node_string(node_offset_t idx, wcstring *output) const - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.source_start <= src.size()); - PARSE_ASSERT(node.source_start + node.source_length <= src.size()); - output->assign(src, node.source_start, node.source_length); - } - + /* Current visitor (very transient) */ + struct parse_execution_visitor_t * visitor; + const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const { return parse_tree.at(parent.child_offset(which)); @@ -163,7 +53,6 @@ class parse_exec_t exec_nodes.pop_back(); // Append the given children, backwards - sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) { @@ -171,13 +60,23 @@ class parse_exec_t if (idx != (node_offset_t)(-1)) { PARSE_ASSERT(idx < parse_node.child_count); - exec_nodes.push_back(child_node_idx + idx); - exec_nodes.back().command_sanity_id = command_sanity_id; + exec_nodes.push_back(exec_node_t(child_node_idx + idx)); } } } + void push(node_offset_t global_idx) + { + exec_nodes.push_back(exec_node_t(global_idx)); + } + + void push(const exec_node_t &node) + { + exec_nodes.push_back(node); + } + + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) { PARSE_ASSERT(! exec_nodes.empty()); @@ -198,12 +97,10 @@ class parse_exec_t exec_nodes.pop_back(); // Append the given children, backwards - sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; node_offset_t cursor = child_count; while (cursor--) { - exec_nodes.push_back(child_node_idx + cursor); - exec_nodes.back().command_sanity_id = command_sanity_id; + exec_nodes.push_back(exec_node_t(child_node_idx + cursor)); } } } @@ -235,7 +132,7 @@ class parse_exec_t // Argument { exec_argument_t arg = exec_argument_t(); - arg.parse_node_idx = idx; + arg.parse_node_idx = child_idx; output->arguments.push_back(arg); } break; @@ -244,7 +141,7 @@ class parse_exec_t // Redirection { exec_redirection_t redirect = exec_redirection_t(); - redirect.parse_node_idx = idx; + redirect.parse_node_idx = child_idx; output->redirections.push_back(redirect); } break; @@ -286,84 +183,106 @@ class parse_exec_t statement.set_decoration(decoration); statement.command_idx = node.child_offset(0); assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); - assembling_jobs.back().add_statement(statement); + visitor->visit_basic_statement(statement); } - void job_assembly_complete() + void assemble_block_statement(node_offset_t parse_node_idx) { - PARSE_ASSERT(! assembling_jobs.empty()); - const exec_job_t &job = assembling_jobs.back(); - if (simulating) - { - simulate_job(job); - } - assembling_jobs.pop_back(); + const parse_node_t &node = parse_tree.at(parse_node_idx); + PARSE_ASSERT(node.type == symbol_block_statement); + PARSE_ASSERT(node.child_count == 5); + + // Fetch arguments and redirections. These ought to be evaluated before the job list + exec_block_statement_t statement; + assemble_arguments_and_redirections(node.child_offset(4), &statement.arguments_and_redirections); + + // Generic visit + visitor->enter_block_statement(statement); + + // Dig into the header to discover the type + const parse_node_t &header_parent = parse_tree.at(node.child_offset(0)); + PARSE_ASSERT(header_parent.type == symbol_block_header); + PARSE_ASSERT(header_parent.child_count == 1); + const node_offset_t header_idx = header_parent.child_offset(0); + + // Fetch body (job list) + node_offset_t body_idx = node.child_offset(2); + PARSE_ASSERT(parse_tree.at(body_idx).type == symbol_job_list); + + pop(); + push(exec_node_t(header_idx, body_idx)); } - void simulate_job(const exec_job_t &job) + void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) { - PARSE_ASSERT(simulating); - wcstring line; - for (size_t i=0; i < job.statements.size(); i++) + PARSE_ASSERT(header.type == symbol_function_header); + PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); + PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); + exec_function_header_t function_info; + function_info.name_idx = header.child_offset(1); + function_info.body_idx = exec_node.body_parse_node_idx; + assemble_arguments_and_redirections(header.child_offset(2), &function_info.arguments_and_redirections); + visitor->visit_function(function_info); + + // Always pop + pop(); + } + + void assemble_if_header(exec_node_t &exec_node, const parse_node_t &header) + { + PARSE_ASSERT(header.type == symbol_if_header); + PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); + PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); + if_header_t if_header; + if_header.body = exec_node.body_parse_node_idx; + // We may hit this on enter or exit + if (! exec_node.visited) { - if (i > 0) - { - line.append(L" "); - } - const exec_basic_statement_t &statement = job.statements.at(i); - switch (statement.decoration) - { - case exec_basic_statement_t::decoration_builtin: - line.append(L" "); - break; - - case exec_basic_statement_t::decoration_command: - line.append(L" "); - break; - - default: - break; - } - - wcstring tmp; - get_node_string(statement.command_idx, &tmp); - line.append(L"cmd:"); - line.append(tmp); - for (size_t i=0; i < statement.arguments().size(); i++) - { - const exec_argument_t &arg = statement.arguments().at(i); - get_node_string(arg.parse_node_idx, &tmp); - line.append(L" "); - line.append(L"arg:"); - line.append(tmp); - } + // Entry. Don't pop the header - just push the job. We'll pop it on exit. + exec_node.visited = true; + visitor->enter_if_header(if_header); + push(header.child_offset(1)); } - simulation_result.push_back(line); + else + { + // Exit. Pop it. + visitor->exit_if_header(if_header); + pop(); + } + } void enter_parse_node(size_t idx); void run_top_node(void); - exec_job_t *create_job(void); public: - parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) + + void get_node_string(node_offset_t idx, wcstring *output) const { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.source_start <= src.size()); + PARSE_ASSERT(node.source_start + node.source_length <= src.size()); + output->assign(src, node.source_start, node.source_length); + } + + bool visit_next_node(parse_execution_visitor_t *v); + + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), visitor(NULL) + { + if (! parse_tree.empty()) + { + exec_nodes.push_back(exec_node_t(0)); + } } - wcstring simulate(void); }; -exec_job_t *parse_exec_t::create_job() -{ - assembling_jobs.push_back(exec_job_t()); - return &assembling_jobs.back(); -} - void parse_exec_t::run_top_node() { PARSE_ASSERT(! exec_nodes.empty()); - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + exec_node_t &exec_node = exec_nodes.back(); + const node_offset_t parse_node_idx = exec_node.parse_node_idx; + const parse_node_t &parse_node = parse_tree.at(exec_node.parse_node_idx); bool log = true; if (log) @@ -381,6 +300,7 @@ void parse_exec_t::run_top_node() if (parse_node.child_count == 0) { // No more jobs, done + visitor->exit_job_list(); pop(); } else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end) @@ -391,6 +311,7 @@ void parse_exec_t::run_top_node() else { // Normal job + visitor->enter_job_list(); pop_push(0, 2); } break; @@ -398,7 +319,7 @@ void parse_exec_t::run_top_node() case symbol_job: { PARSE_ASSERT(parse_node.child_count == 2); - exec_job_t *job = create_job(); + visitor->enter_job(); pop_push_all(); break; } @@ -408,7 +329,7 @@ void parse_exec_t::run_top_node() if (parse_node.child_count == 0) { // All done with this job - job_assembly_complete(); + visitor->exit_job(); pop(); } else @@ -428,7 +349,7 @@ void parse_exec_t::run_top_node() case symbol_block_statement: { PARSE_ASSERT(parse_node.child_count == 5); - pop_push_specific(0, 2, 4); + assemble_block_statement(parse_node_idx); break; } @@ -442,8 +363,14 @@ void parse_exec_t::run_top_node() case symbol_function_header: { PARSE_ASSERT(parse_node.child_count == 3); - //pop_push_all(); - pop(); + assemble_function_header(exec_node, parse_node); + break; + } + + case symbol_if_header: + { + PARSE_ASSERT(parse_node.child_count == 2); + assemble_if_header(exec_node, parse_node); break; } @@ -462,6 +389,7 @@ void parse_exec_t::run_top_node() case symbol_plain_statement: case symbol_arguments_or_redirections_list: case symbol_argument_or_redirection: + fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; @@ -471,13 +399,28 @@ void parse_exec_t::run_top_node() break; default: - fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), top.parse_node_idx); + fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; } } +bool parse_exec_t::visit_next_node(parse_execution_visitor_t *v) +{ + PARSE_ASSERT(v != NULL); + PARSE_ASSERT(visitor == NULL); + if (exec_nodes.empty()) + { + return false; + } + + visitor = v; + run_top_node(); + visitor = NULL; + return true; +} + void parse_exec_t::enter_parse_node(size_t idx) { PARSE_ASSERT(idx < parse_tree.size()); @@ -485,36 +428,27 @@ void parse_exec_t::enter_parse_node(size_t idx) exec_nodes.push_back(exec); } -wcstring parse_exec_t::simulate(void) -{ - if (parse_tree.empty()) - return L"(empty!)"; - - PARSE_ASSERT(exec_nodes.empty()); - simulating = true; - - enter_parse_node(0); - while (! exec_nodes.empty()) - { - run_top_node(); - } - - wcstring result; - for (size_t i=0; i < simulation_result.size(); i++) - { - result.append(simulation_result.at(i)); - result.append(L"\n"); - } - - return result; -} parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) { ctx = new parse_exec_t(n, s); } -wcstring parse_execution_context_t::simulate(void) +parse_execution_context_t::~parse_execution_context_t() { - return ctx->simulate(); + delete ctx; } + +bool parse_execution_context_t::visit_next_node(parse_execution_visitor_t *visitor) +{ + return ctx->visit_next_node(visitor); +} + +void parse_execution_context_t::get_source(node_offset_t idx, wcstring *result) const +{ + return ctx->get_node_string(idx, result); +} + + + + diff --git a/parse_exec.h b/parse_exec.h index 635ebb836..533051993 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -8,15 +8,136 @@ #include "parse_tree.h" +struct parse_execution_visitor_t; class parse_exec_t; class parse_execution_context_t { - parse_exec_t *ctx; + parse_exec_t *ctx; //owned public: parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); - wcstring simulate(void); + ~parse_execution_context_t(); + + bool visit_next_node(parse_execution_visitor_t *visitor); + + // Gets the source for a node at a given index + void get_source(node_offset_t idx, wcstring *result) const; }; +struct exec_argument_t +{ + node_offset_t parse_node_idx; +}; +typedef std::vector exec_argument_list_t; + +struct exec_redirection_t +{ + node_offset_t parse_node_idx; +}; +typedef std::vector exec_redirection_list_t; + +struct exec_arguments_and_redirections_t +{ + exec_argument_list_t arguments; + exec_redirection_list_t redirections; +}; + +struct exec_basic_statement_t +{ + // Node containing the command + node_offset_t command_idx; + + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; + + // Decoration + enum + { + decoration_plain, + decoration_command, + decoration_builtin + } decoration; + + exec_basic_statement_t(); + + void set_decoration(uint32_t k) + { + PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); + switch (k) + { + case parse_keyword_none: + decoration = decoration_plain; + break; + case parse_keyword_command: + decoration = decoration_command; + break; + case parse_keyword_builtin: + decoration = decoration_builtin; + break; + default: + PARSER_DIE(); + break; + } + } + + const exec_argument_list_t &arguments() const + { + return arguments_and_redirections.arguments; + } + + const exec_redirection_list_t &redirections() const + { + return arguments_and_redirections.redirections; + } +}; + +struct exec_function_header_t +{ + // Node containing the function name + node_offset_t name_idx; + + // Node containing the function body + node_offset_t body_idx; + + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; +}; + +struct exec_block_statement_t +{ + // Arguments + exec_arguments_and_redirections_t arguments_and_redirections; +}; + +struct if_header_t +{ + // Node containing the body of the if statement + node_offset_t body; +}; + +struct parse_execution_visitor_t +{ + node_offset_t node_idx; + parse_execution_context_t *context; + + parse_execution_visitor_t() : node_idx(0), context(NULL) + { + } + + virtual bool enter_job_list(void) { return true; } + virtual bool enter_job(void) { return true; } + virtual void visit_statement(void) { } + virtual void visit_function(const exec_function_header_t &function) { } + virtual bool enter_block_statement(const exec_block_statement_t &statement) { return true; } + + virtual void enter_if_header(const if_header_t &statement) { } + virtual void exit_if_header(const if_header_t &statement) { } + + virtual void visit_boolean_statement(void) { } + virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } + virtual void exit_job(void) { } + virtual void exit_job_list(void) { } +}; + #endif diff --git a/parse_tree.cpp b/parse_tree.cpp index 019afc53b..6f6982e19 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -244,6 +244,7 @@ class parse_ll_t void accept_token_job_continuation(parse_token_t token); void accept_token_statement(parse_token_t token); void accept_token_block_header(parse_token_t token); + void accept_token_if_header(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); @@ -713,6 +714,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_if_header: + symbol_stack_pop_push(parse_keyword_if, symbol_job); break; case symbol_for_header: diff --git a/parse_tree.h b/parse_tree.h index 49ec6a132..29365209c 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -18,7 +18,7 @@ #define PARSER_DIE() assert(0) class parse_node_t; -typedef std::vector parse_node_tree_t; +class parse_node_tree_t; typedef size_t node_offset_t; #define NODE_OFFSET_INVALID (static_cast(-1)) @@ -123,7 +123,6 @@ class parse_node_t /* Type-dependent data */ uint32_t tag; - /* Description */ wcstring describe(void) const; @@ -139,6 +138,9 @@ class parse_node_t } }; +class parse_node_tree_t : public std::vector +{ +}; /* Fish grammar: @@ -163,7 +165,7 @@ class parse_node_t block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list block_header = if_header | for_header | while_header | function_header | begin_header - if_header = IF statement + if_header = IF job for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN diff --git a/proc.h b/proc.h index 163831116..1d4d210d4 100644 --- a/proc.h +++ b/proc.h @@ -482,18 +482,12 @@ void job_free(job_t* j); */ void job_promote(job_t *job); -/** - Create a new job. -*/ -job_t *job_create(); - /** Return the job with the specified job id. If id is 0 or less, return the last job used. */ job_t *job_get(job_id_t id); - /** Return the job with the specified pid. */ From eba5b0e4c5c079aaf9b5e1cf69d096ca9965342a Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 27 Jun 2013 15:12:27 -0700 Subject: [PATCH 012/108] If statements parse --- parse_exec.cpp | 31 ++----------- parse_tree.cpp | 124 ++++++++++++++++++++++++++++++++++++++++++------- parse_tree.h | 24 +++++++--- 3 files changed, 130 insertions(+), 49 deletions(-) diff --git a/parse_exec.cpp b/parse_exec.cpp index f12f757a7..c5c15a076 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -228,30 +228,7 @@ class parse_exec_t // Always pop pop(); } - - void assemble_if_header(exec_node_t &exec_node, const parse_node_t &header) - { - PARSE_ASSERT(header.type == symbol_if_header); - PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); - PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); - if_header_t if_header; - if_header.body = exec_node.body_parse_node_idx; - // We may hit this on enter or exit - if (! exec_node.visited) - { - // Entry. Don't pop the header - just push the job. We'll pop it on exit. - exec_node.visited = true; - visitor->enter_if_header(if_header); - push(header.child_offset(1)); - } - else - { - // Exit. Pop it. - visitor->exit_if_header(if_header); - pop(); - } - } void enter_parse_node(size_t idx); void run_top_node(void); @@ -367,11 +344,11 @@ void parse_exec_t::run_top_node() break; } - case symbol_if_header: + case symbol_if_statement: { - PARSE_ASSERT(parse_node.child_count == 2); - assemble_if_header(exec_node, parse_node); - break; + PARSE_ASSERT(parse_node.child_count == 3); + + } case symbol_decorated_statement: diff --git a/parse_tree.cpp b/parse_tree.cpp index 6f6982e19..bdf77dc55 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -51,11 +51,16 @@ wcstring token_type_description(parse_token_type_t type) case symbol_statement: return L"statement"; case symbol_block_statement: return L"block_statement"; case symbol_block_header: return L"block_header"; - case symbol_if_header: return L"if_header"; case symbol_for_header: return L"for_header"; case symbol_while_header: return L"while_header"; case symbol_begin_header: return L"begin_header"; case symbol_function_header: return L"function_header"; + + case symbol_if_statement: return L"if_statement"; + case symbol_if_clause: return L"if_clause"; + case symbol_else_clause: return L"else_clause"; + case symbol_else_continuation: return L"else_continuation"; + case symbol_boolean_statement: return L"boolean_statement"; case symbol_decorated_statement: return L"decorated_statement"; case symbol_plain_statement: return L"plain_statement"; @@ -213,6 +218,17 @@ struct parse_stack_element_t parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) { } + + wcstring describe(void) const + { + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; + } + }; class parse_ll_t @@ -244,7 +260,8 @@ class parse_ll_t void accept_token_job_continuation(parse_token_t token); void accept_token_statement(parse_token_t token); void accept_token_block_header(parse_token_t token); - void accept_token_if_header(parse_token_t token); + void accept_token_else_clause(parse_token_t token); + void accept_token_else_continuation(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); @@ -257,6 +274,8 @@ class parse_ll_t void parse_error(const wchar_t *expected, parse_token_t token); void append_error_callout(wcstring &error_message, parse_token_t token); + void dump_stack(void) const; + // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() { @@ -294,11 +313,11 @@ class parse_ll_t if (1) { fprintf(stderr, "Pop %ls\n", token_type_description(symbol_stack.back().type).c_str()); - if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok5.type).c_str()); - if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok4.type).c_str()); - if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok3.type).c_str()); - if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok2.type).c_str()); - if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", token_type_description(tok1.type).c_str()); + if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); + if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok4.describe().c_str()); + if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok3.describe().c_str()); + if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok2.describe().c_str()); + if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok1.describe().c_str()); } // Get the node for the top symbol and tell it about its children @@ -329,9 +348,41 @@ class parse_ll_t } }; +void parse_ll_t::dump_stack(void) const +{ + // Walk backwards from the top, looking for parents + wcstring_list_t lines; + if (symbol_stack.empty()) + { + lines.push_back(L"(empty)"); + } + else + { + node_offset_t child = symbol_stack.back().node_idx; + node_offset_t cursor = child; + lines.push_back(nodes.at(cursor).describe()); + while (cursor--) + { + const parse_node_t &node = nodes.at(cursor); + if (node.child_start <= child && node.child_start + node.child_count > child) + { + lines.push_back(node.describe()); + child = cursor; + } + } + } + + fprintf(stderr, "Stack dump (%lu elements):\n", symbol_stack.size()); + for (size_t idx = 0; idx < lines.size(); idx++) + { + fprintf(stderr, " %ls\n", lines.at(idx).c_str()); + } +} + void parse_ll_t::token_unhandled(parse_token_t token, const char *function) { fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); + this->dump_stack(); PARSER_DIE(); } @@ -424,13 +475,16 @@ void parse_ll_t::accept_token_statement(parse_token_t token) symbol_stack_pop_push(symbol_boolean_statement); break; - case parse_keyword_if: case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: symbol_stack_pop_push(symbol_block_statement); break; + + case parse_keyword_if: + symbol_stack_pop_push(symbol_if_statement); + break; case parse_keyword_else: case parse_keyword_switch: @@ -475,10 +529,6 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) case parse_token_type_string: switch (token.keyword) { - case parse_keyword_if: - symbol_stack_pop_push(symbol_if_header); - break; - case parse_keyword_else: PARSER_DIE(); //todo break; @@ -513,6 +563,36 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) } } +void parse_ll_t::accept_token_else_clause(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_else_clause); + switch (token.keyword) + { + case parse_keyword_else: + symbol_stack_pop_push(parse_keyword_else, symbol_else_continuation); + break; + + default: + symbol_stack_pop_push(); + break; + } +} + +void parse_ll_t::accept_token_else_continuation(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_else_continuation); + switch (token.keyword) + { + case parse_keyword_if: + symbol_stack_pop_push(symbol_if_clause, symbol_else_clause); + break; + + default: + symbol_stack_pop_push(parse_token_type_end, symbol_job_list); + break; + } +} + void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); @@ -705,6 +785,22 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) accept_token_statement(token); break; + case symbol_if_statement: + symbol_stack_pop_push(symbol_if_clause, symbol_else_clause, parse_keyword_end); + break; + + case symbol_if_clause: + symbol_stack_pop_push(parse_keyword_if, symbol_job, parse_token_type_end, symbol_job_list); + break; + + case symbol_else_clause: + accept_token_else_clause(token); + break; + + case symbol_else_continuation: + accept_token_else_continuation(token); + break; + case symbol_block_statement: symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list); break; @@ -713,10 +809,6 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) accept_token_block_header(token); break; - case symbol_if_header: - symbol_stack_pop_push(parse_keyword_if, symbol_job); - break; - case symbol_for_header: symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); break; diff --git a/parse_tree.h b/parse_tree.h index 29365209c..4530a6326 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -15,7 +15,7 @@ #include #define PARSE_ASSERT(a) assert(a) -#define PARSER_DIE() assert(0) +#define PARSER_DIE() exit_without_destructors(-1) class parse_node_t; class parse_node_tree_t; @@ -58,11 +58,16 @@ enum parse_token_type_t symbol_statement, symbol_block_statement, symbol_block_header, - symbol_if_header, symbol_for_header, symbol_while_header, symbol_begin_header, symbol_function_header, + + symbol_if_statement, + symbol_if_clause, + symbol_else_clause, + symbol_else_continuation, + symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, @@ -159,17 +164,24 @@ class parse_node_tree_t : public std::vector # A statement is a normal command, or an if / while / and etc - statement = boolean_statement | block_statement | decorated_statement + statement = boolean_statement | block_statement | if_statement | decorated_statement # A block is a conditional, loop, or begin/end + if_statement = if_clause else_clause + if_clause = job STATEMENT_TERMINATOR job_list + else_clause = | + else_continuation + else_continuation = if_clause else_clause | + STATEMENT_TERMINATOR job_list + block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list - block_header = if_header | for_header | while_header | function_header | begin_header - if_header = IF job - for_header = FOR var_name IN arguments_or_redirections_list + block_header = for_header | while_header | function_header | begin_header + for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN function_header = FUNCTION function_name arguments_or_redirections_list + #(TODO: functions should not support taking redirections in their arguments) From 70b83a3bbbb7b4b7809d7164fc4ed62342355eb3 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 30 Jun 2013 15:38:31 -0700 Subject: [PATCH 013/108] Implement support for switch --- builtin.cpp | 10 ++- parse_exec.cpp | 172 +++++++++++++++++++++++++++++++++++++++++++++++-- parse_exec.h | 29 +++++++-- parse_tree.cpp | 94 +++++++++++++++++++++++---- parse_tree.h | 23 +++++-- 5 files changed, 298 insertions(+), 30 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index d77d6361b..331f96308 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3983,11 +3983,11 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t { } - virtual void enter_if_header(const if_header_t &statement) + virtual void enter_if_clause(const exec_if_clause_t &statement) { } - virtual void exit_if_header(const if_header_t &statement) + virtual void exit_if_clause(const exec_if_clause_t &statement) { append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); } @@ -4041,6 +4041,12 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t int builtin_parse(parser_t &parser, wchar_t **argv) { + struct sigaction act; + sigemptyset(& act.sa_mask); + act.sa_flags=0; + act.sa_handler=SIG_DFL; + sigaction(SIGINT, &act, 0); + std::vector txt; for (;;) { diff --git a/parse_exec.cpp b/parse_exec.cpp index c5c15a076..0028d530c 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -36,7 +36,7 @@ class parse_exec_t /* Current visitor (very transient) */ struct parse_execution_visitor_t * visitor; - const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const + const parse_node_t &get_child(const parse_node_t &parent, node_offset_t which) const { return parse_tree.at(parent.child_offset(which)); } @@ -214,6 +214,118 @@ class parse_exec_t push(exec_node_t(header_idx, body_idx)); } + /* which: 0 -> if, 1 -> else if, 2 -> else */ + void assemble_if_else_clause(exec_node_t &exec_node, const parse_node_t &node, int which) + { + if (which == 0) + { + PARSE_ASSERT(node.type == symbol_if_clause); + PARSE_ASSERT(node.child_count == 4); + } + else if (which == 2) + { + PARSE_ASSERT(node.type == symbol_else_continuation); + PARSE_ASSERT(node.child_count == 2); + } + + struct exec_if_clause_t clause; + if (which == 0) + { + clause.body = node.child_offset(3); + } + else + { + clause.body = node.child_offset(1); + } + if (! exec_node.visited) + { + visitor->enter_if_clause(clause); + exec_node.visited = true; + if (which == 0) + { + push(node.child_offset(1)); + } + } + else + { + visitor->exit_if_clause(clause); + pop(); + } + } + + void assemble_arguments(node_offset_t start_idx, exec_argument_list_t *output) const + { + node_offset_t idx = start_idx; + for (;;) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.type == symbol_argument_list || node.type == symbol_argument_list_nonempty); + if (node.type == symbol_argument_list) + { + // argument list, may be empty + PARSE_ASSERT(node.child_count == 0 || node.child_count == 1); + if (node.child_count == 0) + { + break; + } + else + { + idx = node.child_offset(0); + } + } + else + { + // nonempty argument list + PARSE_ASSERT(node.child_count == 2); + output->push_back(exec_argument_t(node.child_offset(0))); + idx = node.child_offset(1); + } + } + } + + void assemble_1_case_item(exec_switch_statement_t *statement, node_offset_t node_idx) + { + const parse_node_t &node = parse_tree.at(node_idx); + PARSE_ASSERT(node.type == symbol_case_item); + + // add a new case + size_t len = statement->cases.size(); + statement->cases.resize(len + 1); + exec_switch_case_t &new_case = statement->cases.back(); + + // assemble it + new_case.body = node.child_offset(3); + assemble_arguments(node.child_offset(1), &new_case.arguments); + + + } + + void assemble_case_item_list(exec_switch_statement_t *statement, node_offset_t node_idx) + { + const parse_node_t &node = parse_tree.at(node_idx); + PARSE_ASSERT(node.type == symbol_case_item_list); + PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); + if (node.child_count == 2) + { + assemble_1_case_item(statement, node.child_offset(0)); + assemble_case_item_list(statement, node.child_offset(1)); + } + } + + void assemble_switch_statement(const exec_node_t &exec_node, const parse_node_t &parse_node) + { + PARSE_ASSERT(parse_node.type == symbol_switch_statement); + exec_switch_statement_t statement; + + statement.argument.parse_node_idx = parse_node.child_offset(1); + assemble_case_item_list(&statement, parse_node.child_offset(3)); + + visitor->visit_switch_statement(statement); + + // pop off the switch + pop(); + } + void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) { PARSE_ASSERT(header.type == symbol_function_header); @@ -222,7 +334,7 @@ class parse_exec_t exec_function_header_t function_info; function_info.name_idx = header.child_offset(1); function_info.body_idx = exec_node.body_parse_node_idx; - assemble_arguments_and_redirections(header.child_offset(2), &function_info.arguments_and_redirections); + assemble_arguments(header.child_offset(2), &function_info.arguments); visitor->visit_function(function_info); // Always pop @@ -347,10 +459,59 @@ void parse_exec_t::run_top_node() case symbol_if_statement: { PARSE_ASSERT(parse_node.child_count == 3); - - + pop_push(0, 2); + break; } - + + case symbol_if_clause: + { + PARSE_ASSERT(parse_node.child_count == 4); + assemble_if_else_clause(exec_node, parse_node, 0); + pop(); + break; + } + + case symbol_else_clause: + { + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + if (parse_node.child_count == 0) + { + // No else + pop(); + } + else + { + // We have an else + pop_push(1); + } + break; + } + + case symbol_else_continuation: + { + // Figure out if this is an else if or a terminating else + PARSE_ASSERT(parse_node.child_count == 2); + const parse_node_t &first_child = get_child(parse_node, 1); + PARSE_ASSERT(first_child.type == symbol_if_clause || first_child.type == parse_token_type_end); + if (first_child.type == symbol_if_clause) + { + pop_push_all(); + } + else + { + // else + assemble_if_else_clause(exec_node, parse_node, 2); + pop(); + } + break; + } + + case symbol_switch_statement: + { + assemble_switch_statement(exec_node, parse_node); + break; + } + case symbol_decorated_statement: { PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); @@ -363,6 +524,7 @@ void parse_exec_t::run_top_node() } // The following symbols should be handled by their parents, i.e. never pushed on our stack + case symbol_case_item_list: case symbol_plain_statement: case symbol_arguments_or_redirections_list: case symbol_argument_or_redirection: diff --git a/parse_exec.h b/parse_exec.h index 533051993..197f656d3 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -28,6 +28,12 @@ class parse_execution_context_t struct exec_argument_t { node_offset_t parse_node_idx; + exec_argument_t(node_offset_t p) : parse_node_idx(p) + { + } + exec_argument_t() + { + } }; typedef std::vector exec_argument_list_t; @@ -101,7 +107,7 @@ struct exec_function_header_t node_offset_t body_idx; // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; + exec_argument_list_t arguments; }; struct exec_block_statement_t @@ -110,12 +116,24 @@ struct exec_block_statement_t exec_arguments_and_redirections_t arguments_and_redirections; }; -struct if_header_t +struct exec_if_clause_t { // Node containing the body of the if statement node_offset_t body; }; +struct exec_switch_case_t +{ + exec_argument_list_t arguments; + node_offset_t body; +}; + +struct exec_switch_statement_t +{ + exec_argument_t argument; + std::vector cases; +}; + struct parse_execution_visitor_t { node_offset_t node_idx; @@ -131,8 +149,11 @@ struct parse_execution_visitor_t virtual void visit_function(const exec_function_header_t &function) { } virtual bool enter_block_statement(const exec_block_statement_t &statement) { return true; } - virtual void enter_if_header(const if_header_t &statement) { } - virtual void exit_if_header(const if_header_t &statement) { } + virtual void enter_if_clause(const exec_if_clause_t &statement) { } + virtual void exit_if_clause(const exec_if_clause_t &statement) { } + + virtual void visit_switch_statement(const exec_switch_statement_t &header) { } + virtual void visit_boolean_statement(void) { } virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } diff --git a/parse_tree.cpp b/parse_tree.cpp index bdf77dc55..0f5395c78 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -61,6 +61,13 @@ wcstring token_type_description(parse_token_type_t type) case symbol_else_clause: return L"else_clause"; case symbol_else_continuation: return L"else_continuation"; + case symbol_switch_statement: return L"switch_statement"; + case symbol_case_item_list: return L"case_item_list"; + case symbol_case_item: return L"case_item"; + + case symbol_argument_list_nonempty: return L"argument_list_nonempty"; + case symbol_argument_list: return L"argument_list"; + case symbol_boolean_statement: return L"boolean_statement"; case symbol_decorated_statement: return L"decorated_statement"; case symbol_plain_statement: return L"plain_statement"; @@ -263,8 +270,10 @@ class parse_ll_t void accept_token_else_clause(parse_token_t token); void accept_token_else_continuation(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); + void accept_token_case_item_list(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); + void accept_token_argument_list(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); bool accept_token_string(parse_token_t token); @@ -312,7 +321,7 @@ class parse_ll_t // Logging? if (1) { - fprintf(stderr, "Pop %ls\n", token_type_description(symbol_stack.back().type).c_str()); + fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size()); if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok4.describe().c_str()); if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok3.describe().c_str()); @@ -404,15 +413,18 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) { case parse_token_type_string: // 'end' is special - if (token.keyword == parse_keyword_end) + switch (token.keyword) { - // End this job list - symbol_stack_pop_push(); - } - else - { - // Normal string - symbol_stack_pop_push(symbol_job, symbol_job_list); + case parse_keyword_end: + case parse_keyword_else: + // End this job list + symbol_stack_pop_push(); + break; + + default: + // Normal string + symbol_stack_pop_push(symbol_job, symbol_job_list); + break; } break; @@ -487,10 +499,11 @@ void parse_ll_t::accept_token_statement(parse_token_t token) break; case parse_keyword_else: + symbol_stack_pop_push(); + break; + case parse_keyword_switch: - symbol_stack_pop_push(symbol_block_statement); - fprintf(stderr, "Unimplemented type\n"); - PARSER_DIE(); + symbol_stack_pop_push(symbol_switch_statement); break; case parse_keyword_end: @@ -502,6 +515,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_none: case parse_keyword_command: case parse_keyword_builtin: + case parse_keyword_case: symbol_stack_pop_push(symbol_decorated_statement); break; @@ -612,6 +626,7 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) token_unhandled(token, __FUNCTION__); break; } + break; default: token_unhandled(token, __FUNCTION__); @@ -619,6 +634,22 @@ void parse_ll_t::accept_token_boolean_statement(parse_token_t token) } } +void parse_ll_t::accept_token_case_item_list(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_case_item_list); + switch (token.keyword) + { + case parse_keyword_case: + symbol_stack_pop_push(symbol_case_item, symbol_case_item_list); + break; + + default: + // empty list + symbol_stack_pop_push(); + break; + } +} + void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); @@ -656,6 +687,20 @@ void parse_ll_t::accept_token_plain_statement(parse_token_t token) symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list); } +void parse_ll_t::accept_token_argument_list(parse_token_t token) +{ + PARSE_ASSERT(stack_top_type() == symbol_argument_list); + if (token.type == parse_token_type_string) + { + symbol_stack_pop_push(symbol_argument_list_nonempty); + } + else + { + symbol_stack_pop_push(); + } +} + + void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); @@ -822,7 +867,19 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_arguments_or_redirections_list); + symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_argument_list); + break; + + case symbol_switch_statement: + symbol_stack_pop_push(parse_keyword_switch, parse_token_type_string, parse_token_type_end, symbol_case_item_list, parse_keyword_end); + break; + + case symbol_case_item_list: + accept_token_case_item_list(token); + break; + + case symbol_case_item: + symbol_stack_pop_push(parse_keyword_case, symbol_argument_list, parse_token_type_end, symbol_job_list); break; case symbol_boolean_statement: @@ -837,6 +894,14 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) accept_token_plain_statement(token); break; + case symbol_argument_list_nonempty: + symbol_stack_pop_push(parse_token_type_string, symbol_argument_list); + break; + + case symbol_argument_list: + accept_token_argument_list(token); + break; + case symbol_arguments_or_redirections_list: accept_token_arguments_or_redirections_list(token); break; @@ -844,7 +909,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) case symbol_argument_or_redirection: accept_token_argument_or_redirection(token); break; - + /* Tokens */ case parse_token_type_string: consumed = accept_token_string(token); @@ -880,6 +945,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"begin", parse_keyword_begin}, {L"function", parse_keyword_function}, {L"switch", parse_keyword_switch}, + {L"case", parse_keyword_case}, {L"end", parse_keyword_end}, {L"and", parse_keyword_and}, {L"or", parse_keyword_or}, diff --git a/parse_tree.h b/parse_tree.h index 4530a6326..525480f3a 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -68,11 +68,18 @@ enum parse_token_type_t symbol_else_clause, symbol_else_continuation, + symbol_switch_statement, + symbol_case_item_list, + symbol_case_item, + symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, + + symbol_argument_list_nonempty, + symbol_argument_list, // Terminal types parse_token_type_string, @@ -96,6 +103,7 @@ enum parse_keyword_t parse_keyword_begin, parse_keyword_function, parse_keyword_switch, + parse_keyword_case, parse_keyword_end, parse_keyword_and, parse_keyword_or, @@ -164,7 +172,7 @@ class parse_node_tree_t : public std::vector # A statement is a normal command, or an if / while / and etc - statement = boolean_statement | block_statement | if_statement | decorated_statement + statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement # A block is a conditional, loop, or begin/end @@ -174,16 +182,21 @@ class parse_node_tree_t : public std::vector else_continuation else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list + + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list + case_item_list = | + case_item case_item_list + case_item = CASE argument_list STATEMENT_TERMINATOR job_list + + argument_list_nonempty = argument_list + argument_list = | argument_list_nonempty block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN - function_header = FUNCTION function_name arguments_or_redirections_list - - -#(TODO: functions should not support taking redirections in their arguments) + function_header = FUNCTION function_name argument_list # A boolean statement is AND or OR or NOT From 66d5436789083f0c1a4d8b32a4264f38ce6fd7c1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 4 Jul 2013 14:05:42 -0700 Subject: [PATCH 014/108] Fix for extra token_end --- parse_tree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index 0f5395c78..d3e14fd2f 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -863,7 +863,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_begin_header: - symbol_stack_pop_push(parse_keyword_begin, parse_token_type_end); + symbol_stack_pop_push(parse_keyword_begin); break; case symbol_function_header: From 93f27666db09107561c2500c7b5e2047dbbc72fb Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 10 Jul 2013 23:45:09 -0700 Subject: [PATCH 015/108] More work --- parse_tree.cpp | 34 +++++-- parse_tree.h | 263 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 289 insertions(+), 8 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index d3e14fd2f..d812c67ed 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -2,6 +2,7 @@ #include "tokenizer.h" #include +using namespace parse_symbols; wcstring parse_error_t::describe(const wcstring &src) const { @@ -355,6 +356,25 @@ class parse_ll_t if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); } + + template + inline void symbol_stack_pop_push2() + { + symbol_stack_pop_push(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); + } + + template + inline void symbol_stack_pop_push_production(int which) + { + switch (which) + { + case 0: symbol_stack_pop_push2(); break; + case 1: symbol_stack_pop_push2(); break; + case 2: symbol_stack_pop_push2(); break; + case 3: symbol_stack_pop_push2(); break; + case 4: symbol_stack_pop_push2(); break; + } + } }; void parse_ll_t::dump_stack(void) const @@ -418,12 +438,12 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) case parse_keyword_end: case parse_keyword_else: // End this job list - symbol_stack_pop_push(); + symbol_stack_pop_push_production(0); break; default: // Normal string - symbol_stack_pop_push(symbol_job, symbol_job_list); + symbol_stack_pop_push_production(1); break; } break; @@ -431,16 +451,17 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) case parse_token_type_pipe: case parse_token_type_redirection: case parse_token_background: - symbol_stack_pop_push(symbol_job, symbol_job_list); + symbol_stack_pop_push_production(1); break; case parse_token_type_end: - symbol_stack_pop_push(parse_token_type_end, symbol_job_list); + // Empty line + symbol_stack_pop_push_production(2); break; case parse_token_type_terminate: // no more commands, just transition to empty - symbol_stack_pop_push(); + symbol_stack_pop_push_production(0); break; default: @@ -452,7 +473,8 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) void parse_ll_t::accept_token_job(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_job); - symbol_stack_pop_push(symbol_statement, symbol_job_continuation); + //symbol_stack_pop_push(symbol_statement, symbol_job_continuation); + symbol_stack_pop_push2(); } void parse_ll_t::accept_token_job_continuation(parse_token_t token) diff --git a/parse_tree.h b/parse_tree.h index 525480f3a..9e3f087e0 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -46,7 +46,6 @@ class parse_t bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); }; - enum parse_token_type_t { token_type_invalid, @@ -155,6 +154,266 @@ class parse_node_tree_t : public std::vector { }; +namespace parse_symbols +{ + + #define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } + + /* Placeholder */ + struct none + { + SYMBOL(token_type_invalid); + }; + + struct EMPTY + { + typedef none t0; + typedef none t1; + typedef none t2; + typedef none t3; + typedef none t4; + typedef none t5; + }; + + template + struct Seq + { + typedef T0 t0; + typedef T1 t1; + typedef T2 t2; + typedef T3 t3; + typedef T4 t4; + typedef T5 t5; + }; + + template + struct OR + { + typedef P0 p0; + typedef P1 p1; + typedef P2 p2; + typedef P3 p3; + typedef P4 p4; + typedef P5 p5; + }; + + template + struct Token + { + SYMBOL(WHICH); + }; + + template + struct Keyword + { + static inline parse_keyword_t get_token() { return WHICH; } + }; + + struct job; + struct statement; + struct job_continuation; + struct boolean_statement; + struct block_statement; + struct if_statement; + struct if_clause; + struct else_clause; + struct else_continuation; + struct switch_statement; + struct decorated_statement; + struct else_clause; + struct else_continuation; + struct switch_statement; + struct case_item_list; + struct case_item; + struct argument_list_nonempty; + struct argument_list; + struct block_statement; + struct block_header; + struct for_header; + struct while_header; + struct begin_header; + struct function_header; + struct boolean_statement; + struct decorated_statement; + struct plain_statement; + struct arguments_or_redirections_list; + struct argument_or_redirection; + struct redirection; + struct statement_terminator; + + /* A job_list is a list of jobs, separated by semicolons or newlines */ + struct job_list : OR< + EMPTY, + Seq, + Seq, job_list> + > + { + SYMBOL(symbol_job_list) + }; + + /* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ + struct job : Seq + { + SYMBOL(symbol_job); + }; + + struct job_continuation : OR< + EMPTY, + Seq, statement, job_continuation> + > + { + SYMBOL(symbol_job_continuation); + }; + + /* A statement is a normal command, or an if / while / and etc */ + struct statement : OR< + boolean_statement, + block_statement, + if_statement, + switch_statement, + decorated_statement + > + { + SYMBOL(symbol_statement); + }; + + struct if_statement : Seq > + { + SYMBOL(symbol_if_statement); + }; + + struct if_clause : Seq, job, statement_terminator, job_list> + { + SYMBOL(symbol_if_clause); + }; + + struct else_clause : OR< + EMPTY, + Keyword, else_continuation + > + { + SYMBOL(symbol_else_clause); + }; + + struct else_continuation : OR< + Seq, + Seq + > + { + SYMBOL(symbol_else_continuation); + }; + + struct switch_statement : Seq, Token, statement_terminator, case_item_list, Keyword + > + { + SYMBOL(symbol_switch_statement); + }; + + struct case_item_list : OR + < + EMPTY, + case_item, case_item_list + > + { + SYMBOL(symbol_case_item_list); + }; + + struct case_item : Seq, argument_list, statement_terminator, job_list> + { + SYMBOL(symbol_case_item); + }; + + struct argument_list_nonempty : Seq, argument_list> + { + SYMBOL(symbol_argument_list_nonempty); + }; + + struct argument_list : OR + { + SYMBOL(symbol_argument_list); + }; + + struct block_statement : Seq, arguments_or_redirections_list> + { + SYMBOL(symbol_block_statement); + }; + + struct block_header : OR + { + SYMBOL(symbol_block_header); + }; + + struct for_header : Seq, Token, Keyword, arguments_or_redirections_list> + { + SYMBOL(symbol_for_header); + }; + + struct while_header : Seq, statement> + { + SYMBOL(symbol_while_header); + }; + + struct begin_header : Keyword + { + SYMBOL(symbol_begin_header); + }; + + struct function_header : Keyword + { + SYMBOL(symbol_function_header); + }; + + /* A boolean statement is AND or OR or NOT */ + struct boolean_statement : OR< + Seq, statement>, + Seq, statement>, + Seq, statement> + > + { + SYMBOL(symbol_boolean_statement); + }; + + /* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ + struct decorated_statement : OR< + Seq, plain_statement>, + Seq, plain_statement>, + plain_statement + > + { + SYMBOL(symbol_decorated_statement); + }; + + struct plain_statement : Seq, arguments_or_redirections_list> + { + SYMBOL(symbol_plain_statement); + }; + + struct arguments_or_redirections_list : OR< + EMPTY, + Seq > + { + SYMBOL(symbol_arguments_or_redirections_list); + }; + + struct argument_or_redirection : OR< + Token, + redirection + > + { + SYMBOL(symbol_argument_or_redirection); + }; + + struct redirection : Token + { + SYMBOL(parse_token_type_redirection); + }; + + struct statement_terminator : Token + { + SYMBOL(parse_token_type_end); + }; +} + /* Fish grammar: @@ -205,7 +464,7 @@ class parse_node_tree_t : public std::vector # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = command arguments_or_redirections_list + plain_statement = COMMAND arguments_or_redirections_list arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list From 77b6b0a9b2e7260884064dbac72f17bb5e86431f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 21 Jul 2013 15:22:11 -0700 Subject: [PATCH 016/108] Move production logic into templates --- parse_tree.cpp | 212 ++++++++++++------------------------------------- parse_tree.h | 116 ++++++++++++++++++++++++--- 2 files changed, 157 insertions(+), 171 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index d812c67ed..24bf41e7c 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -271,7 +271,6 @@ class parse_ll_t void accept_token_else_clause(parse_token_t token); void accept_token_else_continuation(parse_token_t token); void accept_token_boolean_statement(parse_token_t token); - void accept_token_case_item_list(parse_token_t token); void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); void accept_token_argument_list(parse_token_t token); @@ -313,10 +312,15 @@ class parse_ll_t nodes.push_back(parse_node_t(tok->type)); nodes.at(parent_node_idx).child_count += 1; } + + inline void symbol_stack_pop() + { + symbol_stack.pop_back(); + } // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. - inline void symbol_stack_pop_push(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) + inline void symbol_stack_pop_push_int(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { // Logging? @@ -360,7 +364,7 @@ class parse_ll_t template inline void symbol_stack_pop_push2() { - symbol_stack_pop_push(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); + symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); } template @@ -375,6 +379,12 @@ class parse_ll_t case 4: symbol_stack_pop_push2(); break; } } + + template + inline void symbol_stack_produce(parse_token_t tok) + { + symbol_stack_pop_push_production(T::production(tok.type, tok.keyword)); + } }; void parse_ll_t::dump_stack(void) const @@ -470,13 +480,6 @@ void parse_ll_t::accept_token_job_list(parse_token_t token) } } -void parse_ll_t::accept_token_job(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_job); - //symbol_stack_pop_push(symbol_statement, symbol_job_continuation); - symbol_stack_pop_push2(); -} - void parse_ll_t::accept_token_job_continuation(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_job_continuation); @@ -484,12 +487,12 @@ void parse_ll_t::accept_token_job_continuation(parse_token_t token) { case parse_token_type_pipe: // Pipe, continuation - symbol_stack_pop_push(parse_token_type_pipe, symbol_statement, symbol_job_continuation); + symbol_stack_pop_push_production(1); break; default: // Not a pipe, no job continuation - symbol_stack_pop_push(); + symbol_stack_pop_push_production(0); break; } } @@ -506,26 +509,26 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_and: case parse_keyword_or: case parse_keyword_not: - symbol_stack_pop_push(symbol_boolean_statement); + symbol_stack_pop_push_production(0); break; case parse_keyword_for: case parse_keyword_while: case parse_keyword_function: case parse_keyword_begin: - symbol_stack_pop_push(symbol_block_statement); + symbol_stack_pop_push_production(1); break; case parse_keyword_if: - symbol_stack_pop_push(symbol_if_statement); + symbol_stack_pop_push_production(2); break; case parse_keyword_else: - symbol_stack_pop_push(); + symbol_stack_pop(); break; case parse_keyword_switch: - symbol_stack_pop_push(symbol_switch_statement); + symbol_stack_pop_push_production(3); break; case parse_keyword_end: @@ -538,7 +541,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: - symbol_stack_pop_push(symbol_decorated_statement); + symbol_stack_pop_push_production(4); break; } @@ -570,22 +573,22 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) break; case parse_keyword_for: - symbol_stack_pop_push(symbol_for_header); + symbol_stack_pop_push_production(0); break; case parse_keyword_while: - symbol_stack_pop_push(symbol_while_header); + symbol_stack_pop_push_production(1); + break; + + case parse_keyword_function: + symbol_stack_pop_push_production(2); break; case parse_keyword_begin: - symbol_stack_pop_push(symbol_begin_header); + symbol_stack_pop_push_production(3); break; - case parse_keyword_function: - symbol_stack_pop_push(symbol_function_header); - break; - default: token_unhandled(token, __FUNCTION__); break; @@ -602,163 +605,52 @@ void parse_ll_t::accept_token_block_header(parse_token_t token) void parse_ll_t::accept_token_else_clause(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_else_clause); - switch (token.keyword) - { - case parse_keyword_else: - symbol_stack_pop_push(parse_keyword_else, symbol_else_continuation); - break; - - default: - symbol_stack_pop_push(); - break; - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_else_continuation(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_else_continuation); - switch (token.keyword) - { - case parse_keyword_if: - symbol_stack_pop_push(symbol_if_clause, symbol_else_clause); - break; - - default: - symbol_stack_pop_push(parse_token_type_end, symbol_job_list); - break; - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_boolean_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_and: - case parse_keyword_or: - case parse_keyword_not: - top_node_set_tag(token.keyword); - symbol_stack_pop_push(token.keyword, symbol_statement); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - -void parse_ll_t::accept_token_case_item_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_case_item_list); - switch (token.keyword) - { - case parse_keyword_case: - symbol_stack_pop_push(symbol_case_item, symbol_case_item_list); - break; - - default: - // empty list - symbol_stack_pop_push(); - break; - } + top_node_set_tag(token.keyword); + symbol_stack_produce(token); } void parse_ll_t::accept_token_decorated_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_command: - top_node_set_tag(parse_keyword_command); - symbol_stack_pop_push(parse_keyword_command, symbol_plain_statement); - break; - - case parse_keyword_builtin: - top_node_set_tag(parse_keyword_builtin); - symbol_stack_pop_push(parse_keyword_builtin, symbol_plain_statement); - break; - - default: - top_node_set_tag(parse_keyword_none); - symbol_stack_pop_push(symbol_plain_statement); - break; - } - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } + top_node_set_tag(token.keyword); + symbol_stack_produce(token); } void parse_ll_t::accept_token_plain_statement(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_plain_statement); - symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list); + symbol_stack_produce(token); } void parse_ll_t::accept_token_argument_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_argument_list); - if (token.type == parse_token_type_string) - { - symbol_stack_pop_push(symbol_argument_list_nonempty); - } - else - { - symbol_stack_pop_push(); - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); - switch (token.type) - { - case parse_token_type_string: - case parse_token_type_redirection: - symbol_stack_pop_push(symbol_argument_or_redirection, symbol_arguments_or_redirections_list); - break; - - default: - // Some other token, end of list - symbol_stack_pop_push(); - break; - } + symbol_stack_produce(token); } void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_argument_or_redirection); - switch (token.type) - { - case parse_token_type_string: - symbol_stack_pop_push(parse_token_type_string); - // Got an argument - break; - - case parse_token_type_redirection: - symbol_stack_pop_push(parse_token_type_redirection); - // Got a redirection - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } + symbol_stack_produce(token); } bool parse_ll_t::accept_token_string(parse_token_t token) @@ -769,7 +661,7 @@ bool parse_ll_t::accept_token_string(parse_token_t token) { case parse_token_type_string: // Got our string - symbol_stack_pop_push(); + symbol_stack_pop(); result = true; break; @@ -841,7 +733,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_job: - accept_token_job(token); + symbol_stack_pop_push2(); break; case symbol_job_continuation: @@ -853,11 +745,11 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_if_statement: - symbol_stack_pop_push(symbol_if_clause, symbol_else_clause, parse_keyword_end); + symbol_stack_produce(token); break; case symbol_if_clause: - symbol_stack_pop_push(parse_keyword_if, symbol_job, parse_token_type_end, symbol_job_list); + symbol_stack_produce(token); break; case symbol_else_clause: @@ -869,39 +761,39 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_block_statement: - symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list); + symbol_stack_produce(token); break; case symbol_block_header: - accept_token_block_header(token); + symbol_stack_produce(token); break; case symbol_for_header: - symbol_stack_pop_push(parse_keyword_for, parse_token_type_string, parse_keyword_in, symbol_arguments_or_redirections_list, parse_token_type_end); + symbol_stack_produce(token); break; case symbol_while_header: - symbol_stack_pop_push(parse_keyword_while, symbol_statement); + symbol_stack_produce(token); break; case symbol_begin_header: - symbol_stack_pop_push(parse_keyword_begin); + symbol_stack_produce(token); break; case symbol_function_header: - symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_argument_list); + symbol_stack_produce(token); break; case symbol_switch_statement: - symbol_stack_pop_push(parse_keyword_switch, parse_token_type_string, parse_token_type_end, symbol_case_item_list, parse_keyword_end); + symbol_stack_produce(token); break; case symbol_case_item_list: - accept_token_case_item_list(token); + symbol_stack_produce(token); break; case symbol_case_item: - symbol_stack_pop_push(parse_keyword_case, symbol_argument_list, parse_token_type_end, symbol_job_list); + symbol_stack_produce(token); break; case symbol_boolean_statement: @@ -917,7 +809,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; case symbol_argument_list_nonempty: - symbol_stack_pop_push(parse_token_type_string, symbol_argument_list); + symbol_stack_produce(token); break; case symbol_argument_list: diff --git a/parse_tree.h b/parse_tree.h index 9e3f087e0..39e370af6 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -159,12 +159,27 @@ namespace parse_symbols #define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - /* Placeholder */ - struct none + #define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } + + #define NO_PRODUCTION (-1) + + + template + struct Token { - SYMBOL(token_type_invalid); + SYMBOL(WHICH); + + typedef Token t0; + typedef Token t1; + typedef Token t2; + typedef Token t3; + typedef Token t4; + typedef Token t5; }; + /* Placeholder */ + typedef Token none; + struct EMPTY { typedef none t0; @@ -197,12 +212,6 @@ namespace parse_symbols typedef P5 p5; }; - template - struct Token - { - SYMBOL(WHICH); - }; - template struct Keyword { @@ -220,8 +229,6 @@ namespace parse_symbols struct else_continuation; struct switch_statement; struct decorated_statement; - struct else_clause; - struct else_continuation; struct switch_statement; struct case_item_list; struct case_item; @@ -280,11 +287,13 @@ namespace parse_symbols struct if_statement : Seq > { SYMBOL(symbol_if_statement); + PRODUCE(0) }; struct if_clause : Seq, job, statement_terminator, job_list> { SYMBOL(symbol_if_clause); + PRODUCE(0) }; struct else_clause : OR< @@ -293,6 +302,15 @@ namespace parse_symbols > { SYMBOL(symbol_else_clause); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_else: return 1; + default: return 0; + } + } }; struct else_continuation : OR< @@ -301,6 +319,15 @@ namespace parse_symbols > { SYMBOL(symbol_else_continuation); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_if: return 0; + default: return 1; + } + } }; struct switch_statement : Seq, Token, statement_terminator, case_item_list, Keyword @@ -316,6 +343,15 @@ namespace parse_symbols > { SYMBOL(symbol_case_item_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_case: return 1; + default: return 0; + } + } }; struct case_item : Seq, argument_list, statement_terminator, job_list> @@ -331,11 +367,20 @@ namespace parse_symbols struct argument_list : OR { SYMBOL(symbol_argument_list); + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: return 1; + default: return 0; + } + } }; struct block_statement : Seq, arguments_or_redirections_list> { SYMBOL(symbol_block_statement); + PRODUCE(0) }; struct block_header : OR @@ -371,6 +416,17 @@ namespace parse_symbols > { SYMBOL(symbol_boolean_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_and: return 0; + case parse_keyword_or: return 1; + case parse_keyword_not: return 2; + default: return NO_PRODUCTION; + } + } }; /* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ @@ -381,11 +437,27 @@ namespace parse_symbols > { SYMBOL(symbol_decorated_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_command: return 0; + case parse_keyword_builtin: return 1; + default: return 2; + } + } }; struct plain_statement : Seq, arguments_or_redirections_list> { SYMBOL(symbol_plain_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + return 0; + } + }; struct arguments_or_redirections_list : OR< @@ -393,6 +465,18 @@ namespace parse_symbols Seq > { SYMBOL(symbol_arguments_or_redirections_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + case parse_token_type_redirection: + return 1; + default: + return 0; + } + } }; struct argument_or_redirection : OR< @@ -401,6 +485,16 @@ namespace parse_symbols > { SYMBOL(symbol_argument_or_redirection); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: return 0; + case parse_token_type_redirection: return 1; + default: return NO_PRODUCTION; + } + } }; struct redirection : Token From 3e3eefc2dcb2e0e31b224703a063e05dc8c67996 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 22 Jul 2013 18:26:15 -0700 Subject: [PATCH 017/108] Improvements to new parser. All functions and completions now parse. --- builtin.cpp | 61 ++-- builtin_printf.cpp | 2 +- exec.cpp | 16 +- fish.xcodeproj/project.pbxproj | 2 + fish_tests.cpp | 6 +- parse_exec.cpp | 170 ++++----- parse_exec.h | 49 +-- parse_tree.cpp | 628 +++++++++++++++------------------ parse_tree.h | 414 ++-------------------- parse_tree_construction.h | 586 ++++++++++++++++++++++++++++++ parse_util.cpp | 10 +- tokenizer.cpp | 10 +- tokenizer.h | 5 +- 13 files changed, 1080 insertions(+), 879 deletions(-) create mode 100644 parse_tree_construction.h diff --git a/builtin.cpp b/builtin.cpp index 3b40be3c4..d2a80a8c4 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3946,53 +3946,53 @@ static int builtin_history(parser_t &parser, wchar_t **argv) struct parse_execution_simulator_t : public parse_execution_visitor_t { wcstring_list_t result; - + wcstring &back() { assert(! result.empty()); return result.back(); } - + void append_src(node_offset_t idx) { wcstring tmp; context->get_source(idx, &tmp); back().append(tmp); } - + void append(const wchar_t *s) { back().append(s); } - + bool enter_job_list(void) { return true; } - + bool enter_job(void) { result.resize(result.size() + 1); return true; } - + void visit_statement(void) { } - + virtual void visit_boolean_statement(void) { } - + virtual void enter_if_clause(const exec_if_clause_t &statement) { } - + virtual void exit_if_clause(const exec_if_clause_t &statement) { append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); } - + void visit_basic_statement(const exec_basic_statement_t &statement) { wcstring &line = this->back(); @@ -4005,34 +4005,35 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t case exec_basic_statement_t::decoration_builtin: line.append(L" "); break; - + case exec_basic_statement_t::decoration_command: line.append(L" "); break; - + default: - break; + break; } - + line.append(L"cmd:"); this->append_src(statement.command_idx); for (size_t i=0; i < statement.arguments().size(); i++) { const exec_argument_t &arg = statement.arguments().at(i); append(L" "); - append(L"arg:"); + append(L"arg:"); append_src(arg.parse_node_idx); } } - - void visit_function(const exec_function_header_t &function) { + + void visit_function(const exec_function_header_t &function) + { wcstring &line = this->back(); line.append(L"define function: "); wcstring tmp; context->get_source(function.name_idx, &tmp); line.append(tmp); } - + void exit_job_list(void) { } @@ -4074,17 +4075,19 @@ int builtin_parse(parser_t &parser, wchar_t **argv) } else { - parse_execution_context_t ctx(parse_tree, src); - parse_execution_simulator_t sim; - sim.context = &ctx; - while (ctx.visit_next_node(&sim)) - { - } - stdout_buffer.append(L"Simulating execution:\n"); - for (size_t i=0; i < sim.result.size(); i++) - { - stdout_buffer.append(sim.result.at(i)); - stdout_buffer.push_back(L'\n'); + if (0) { + parse_execution_context_t ctx(parse_tree, src); + parse_execution_simulator_t sim; + sim.context = &ctx; + while (ctx.visit_next_node(&sim)) + { + } + stdout_buffer.append(L"Simulating execution:\n"); + for (size_t i=0; i < sim.result.size(); i++) + { + stdout_buffer.append(sim.result.at(i)); + stdout_buffer.push_back(L'\n'); + } } } } diff --git a/builtin_printf.cpp b/builtin_printf.cpp index efe4a2118..b7df7fa82 100644 --- a/builtin_printf.cpp +++ b/builtin_printf.cpp @@ -632,7 +632,7 @@ int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, wch } break; } - + modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true); for (;; f++, direc_length++) diff --git a/exec.cpp b/exec.cpp index d5c7d4bf8..5f4676631 100644 --- a/exec.cpp +++ b/exec.cpp @@ -537,7 +537,7 @@ static bool can_use_posix_spawn_for_job(const job_t *job, const process_t *proce return false; } } - + /* Now see if we have a redirection involving a file. The only one we allow is /dev/null, which we assume will not fail. */ bool result = true; for (size_t idx = 0; idx < job->io.size(); idx++) @@ -545,8 +545,8 @@ static bool can_use_posix_spawn_for_job(const job_t *job, const process_t *proce const shared_ptr &io = job->io.at(idx); if (redirection_is_to_real_file(io.get())) { - result = false; - break; + result = false; + break; } } return result; @@ -1192,16 +1192,16 @@ void exec(parser_t &parser, job_t *j) forking is expensive, fish tries to avoid it when possible. */ - + bool fork_was_skipped = false; - + const shared_ptr stdout_io = io_chain_get(j->io, STDOUT_FILENO); const shared_ptr stderr_io = io_chain_get(j->io, STDERR_FILENO); - + /* If we are outputting to a file, we have to actually do it, even if we have no output, so that we can truncate the file. Does not apply to /dev/null. */ bool must_fork = redirection_is_to_real_file(stdout_io.get()) || redirection_is_to_real_file(stderr_io.get()); if (! must_fork) - { + { if (p->next == NULL) { const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER; @@ -1250,7 +1250,7 @@ void exec(parser_t &parser, job_t *j) } } } - + if (fork_was_skipped) { diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 93293e50c..5ae10bfc4 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -465,6 +465,7 @@ D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; }; D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = ""; }; D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; + D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree_construction.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -589,6 +590,7 @@ D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, D0C52F361765284C00BFAB82 /* parse_tree.h */, + D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, D0C52F341765281F00BFAB82 /* parse_exec.h */, D0C52F331765281F00BFAB82 /* parse_exec.cpp */, diff --git a/fish_tests.cpp b/fish_tests.cpp index 739b47b02..6ebd3d220 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -534,7 +534,7 @@ static void test_utils() { say(L"Testing utils"); const wchar_t *a = L"echo (echo (echo hi"; - + const wchar_t *begin = NULL, *end = NULL; parse_util_cmdsubst_extent(a, 0, &begin, &end); if (begin != a || end != begin + wcslen(begin)) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); @@ -544,7 +544,7 @@ static void test_utils() if (begin != a || end != begin + wcslen(begin)) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); parse_util_cmdsubst_extent(a, 3, &begin, &end); if (begin != a || end != begin + wcslen(begin)) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); - + parse_util_cmdsubst_extent(a, 8, &begin, &end); if (begin != a + wcslen(L"echo (")) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__); @@ -1842,7 +1842,7 @@ int main(int argc, char **argv) builtin_init(); reader_init(); env_init(); - + test_new_parser(); return 0; diff --git a/parse_exec.cpp b/parse_exec.cpp index 0028d530c..3f2074f4a 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -6,11 +6,11 @@ struct exec_node_t node_offset_t parse_node_idx; node_offset_t body_parse_node_idx; bool visited; - + explicit exec_node_t(node_offset_t pni) : parse_node_idx(pni), body_parse_node_idx(NODE_OFFSET_INVALID), visited(false) { } - + explicit exec_node_t(node_offset_t pni, node_offset_t body_pni) : parse_node_idx(pni), body_parse_node_idx(body_pni), visited(false) { } @@ -18,7 +18,7 @@ struct exec_node_t exec_basic_statement_t::exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) { - + } @@ -26,21 +26,21 @@ class parse_exec_t { parse_node_tree_t parse_tree; wcstring src; - + /* The stack of nodes as we execute them */ std::vector exec_nodes; - + /* The stack of commands being built */ std::vector assembling_statements; - + /* Current visitor (very transient) */ struct parse_execution_visitor_t * visitor; - + const parse_node_t &get_child(const parse_node_t &parent, node_offset_t which) const { return parse_tree.at(parent.child_offset(which)); } - + void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID) { PARSE_ASSERT(! exec_nodes.empty()); @@ -48,10 +48,10 @@ class parse_exec_t exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); node_offset_t child_node_idx = parse_node.child_start; - + // Remove the top node exec_nodes.pop_back(); - + // Append the given children, backwards const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) @@ -65,18 +65,18 @@ class parse_exec_t } } - + void push(node_offset_t global_idx) { exec_nodes.push_back(exec_node_t(global_idx)); } - + void push(const exec_node_t &node) { exec_nodes.push_back(node); } - + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) { PARSE_ASSERT(! exec_nodes.empty()); @@ -92,10 +92,10 @@ class parse_exec_t const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); PARSE_ASSERT(child_idx < parse_node.child_count); node_offset_t child_node_idx = parse_node.child_start + child_idx; - + // Remove the top node exec_nodes.pop_back(); - + // Append the given children, backwards node_offset_t cursor = child_count; while (cursor--) @@ -104,20 +104,20 @@ class parse_exec_t } } } - + void pop() { PARSE_ASSERT(! exec_nodes.empty()); exec_nodes.pop_back(); } - + void pop_push_all() { exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); pop_push(0, parse_node.child_count); } - + void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const { const parse_node_t &node = parse_tree.at(idx); @@ -130,28 +130,28 @@ class parse_exec_t { case parse_token_type_string: // Argument - { - exec_argument_t arg = exec_argument_t(); - arg.parse_node_idx = child_idx; - output->arguments.push_back(arg); - } - break; - + { + exec_argument_t arg = exec_argument_t(); + arg.parse_node_idx = child_idx; + output->arguments.push_back(arg); + } + break; + case parse_token_type_redirection: // Redirection - { - exec_redirection_t redirect = exec_redirection_t(); - redirect.parse_node_idx = child_idx; - output->redirections.push_back(redirect); - } - break; - + { + exec_redirection_t redirect = exec_redirection_t(); + redirect.parse_node_idx = child_idx; + output->redirections.push_back(redirect); + } + break; + default: PARSER_DIE(); break; } } - + void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const { node_offset_t idx = start_idx; @@ -173,7 +173,7 @@ class parse_exec_t } } } - + void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration) { const parse_node_t &node = parse_tree.at(idx); @@ -185,10 +185,10 @@ class parse_exec_t assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); visitor->visit_basic_statement(statement); } - + void assemble_block_statement(node_offset_t parse_node_idx) { - + const parse_node_t &node = parse_tree.at(parse_node_idx); PARSE_ASSERT(node.type == symbol_block_statement); PARSE_ASSERT(node.child_count == 5); @@ -196,24 +196,24 @@ class parse_exec_t // Fetch arguments and redirections. These ought to be evaluated before the job list exec_block_statement_t statement; assemble_arguments_and_redirections(node.child_offset(4), &statement.arguments_and_redirections); - + // Generic visit visitor->enter_block_statement(statement); - + // Dig into the header to discover the type const parse_node_t &header_parent = parse_tree.at(node.child_offset(0)); PARSE_ASSERT(header_parent.type == symbol_block_header); - PARSE_ASSERT(header_parent.child_count == 1); + PARSE_ASSERT(header_parent.child_count == 1); const node_offset_t header_idx = header_parent.child_offset(0); - + // Fetch body (job list) node_offset_t body_idx = node.child_offset(2); PARSE_ASSERT(parse_tree.at(body_idx).type == symbol_job_list); - + pop(); push(exec_node_t(header_idx, body_idx)); } - + /* which: 0 -> if, 1 -> else if, 2 -> else */ void assemble_if_else_clause(exec_node_t &exec_node, const parse_node_t &node, int which) { @@ -227,7 +227,7 @@ class parse_exec_t PARSE_ASSERT(node.type == symbol_else_continuation); PARSE_ASSERT(node.child_count == 2); } - + struct exec_if_clause_t clause; if (which == 0) { @@ -252,7 +252,7 @@ class parse_exec_t pop(); } } - + void assemble_arguments(node_offset_t start_idx, exec_argument_list_t *output) const { node_offset_t idx = start_idx; @@ -282,24 +282,24 @@ class parse_exec_t } } } - + void assemble_1_case_item(exec_switch_statement_t *statement, node_offset_t node_idx) { const parse_node_t &node = parse_tree.at(node_idx); PARSE_ASSERT(node.type == symbol_case_item); - + // add a new case size_t len = statement->cases.size(); statement->cases.resize(len + 1); exec_switch_case_t &new_case = statement->cases.back(); - + // assemble it new_case.body = node.child_offset(3); assemble_arguments(node.child_offset(1), &new_case.arguments); - - + + } - + void assemble_case_item_list(exec_switch_statement_t *statement, node_offset_t node_idx) { const parse_node_t &node = parse_tree.at(node_idx); @@ -311,21 +311,21 @@ class parse_exec_t assemble_case_item_list(statement, node.child_offset(1)); } } - + void assemble_switch_statement(const exec_node_t &exec_node, const parse_node_t &parse_node) { PARSE_ASSERT(parse_node.type == symbol_switch_statement); exec_switch_statement_t statement; - + statement.argument.parse_node_idx = parse_node.child_offset(1); assemble_case_item_list(&statement, parse_node.child_offset(3)); - + visitor->visit_switch_statement(statement); - + // pop off the switch pop(); } - + void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) { PARSE_ASSERT(header.type == symbol_function_header); @@ -336,17 +336,17 @@ class parse_exec_t function_info.body_idx = exec_node.body_parse_node_idx; assemble_arguments(header.child_offset(2), &function_info.arguments); visitor->visit_function(function_info); - + // Always pop pop(); } - + void enter_parse_node(size_t idx); void run_top_node(void); - - public: - + +public: + void get_node_string(node_offset_t idx, wcstring *output) const { const parse_node_t &node = parse_tree.at(idx); @@ -354,9 +354,9 @@ class parse_exec_t PARSE_ASSERT(node.source_start + node.source_length <= src.size()); output->assign(src, node.source_start, node.source_length); } - + bool visit_next_node(parse_execution_visitor_t *v); - + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), visitor(NULL) { if (! parse_tree.empty()) @@ -373,7 +373,7 @@ void parse_exec_t::run_top_node() const node_offset_t parse_node_idx = exec_node.parse_node_idx; const parse_node_t &parse_node = parse_tree.at(exec_node.parse_node_idx); bool log = true; - + if (log) { wcstring tmp; @@ -381,7 +381,7 @@ void parse_exec_t::run_top_node() tmp.append(parse_node.describe()); printf("%ls\n", tmp.c_str()); } - + switch (parse_node.type) { case symbol_job_list: @@ -404,7 +404,7 @@ void parse_exec_t::run_top_node() pop_push(0, 2); } break; - + case symbol_job: { PARSE_ASSERT(parse_node.child_count == 2); @@ -412,7 +412,7 @@ void parse_exec_t::run_top_node() pop_push_all(); break; } - + case symbol_job_continuation: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3); if (parse_node.child_count == 0) @@ -426,51 +426,51 @@ void parse_exec_t::run_top_node() // Skip the pipe pop_push(1, 2); } - break; - + break; + case symbol_statement: { PARSE_ASSERT(parse_node.child_count == 1); pop_push_all(); break; } - + case symbol_block_statement: { PARSE_ASSERT(parse_node.child_count == 5); assemble_block_statement(parse_node_idx); break; } - + case symbol_block_header: { PARSE_ASSERT(parse_node.child_count == 1); pop_push_all(); break; } - + case symbol_function_header: { PARSE_ASSERT(parse_node.child_count == 3); assemble_function_header(exec_node, parse_node); break; } - + case symbol_if_statement: { - PARSE_ASSERT(parse_node.child_count == 3); + PARSE_ASSERT(parse_node.child_count == 4); pop_push(0, 2); break; } - + case symbol_if_clause: { PARSE_ASSERT(parse_node.child_count == 4); assemble_if_else_clause(exec_node, parse_node, 0); pop(); - break; + break; } - + case symbol_else_clause: { PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); @@ -486,7 +486,7 @@ void parse_exec_t::run_top_node() } break; } - + case symbol_else_continuation: { // Figure out if this is an else if or a terminating else @@ -505,17 +505,17 @@ void parse_exec_t::run_top_node() } break; } - + case symbol_switch_statement: { assemble_switch_statement(exec_node, parse_node); break; } - + case symbol_decorated_statement: { PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); - + node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1); parse_keyword_t decoration = static_cast(parse_node.tag); assemble_command_for_plain_statement(plain_statement_idx, decoration); @@ -528,20 +528,20 @@ void parse_exec_t::run_top_node() case symbol_plain_statement: case symbol_arguments_or_redirections_list: case symbol_argument_or_redirection: - fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); + fprintf(stderr, "Unexpected token type %ls at index %ld. This should have been handled by the parent.\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; - + case parse_token_type_end: PARSE_ASSERT(parse_node.child_count == 0); pop(); break; - + default: fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); PARSER_DIE(); break; - + } } @@ -553,7 +553,7 @@ bool parse_exec_t::visit_next_node(parse_execution_visitor_t *v) { return false; } - + visitor = v; run_top_node(); visitor = NULL; diff --git a/parse_exec.h b/parse_exec.h index 197f656d3..1eea99ab1 100644 --- a/parse_exec.h +++ b/parse_exec.h @@ -13,13 +13,13 @@ class parse_exec_t; class parse_execution_context_t { parse_exec_t *ctx; //owned - - public: + +public: parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); ~parse_execution_context_t(); - + bool visit_next_node(parse_execution_visitor_t *visitor); - + // Gets the source for a node at a given index void get_source(node_offset_t idx, wcstring *result) const; }; @@ -53,10 +53,10 @@ struct exec_basic_statement_t { // Node containing the command node_offset_t command_idx; - + // Arguments exec_arguments_and_redirections_t arguments_and_redirections; - + // Decoration enum { @@ -64,9 +64,9 @@ struct exec_basic_statement_t decoration_command, decoration_builtin } decoration; - + exec_basic_statement_t(); - + void set_decoration(uint32_t k) { PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); @@ -86,12 +86,12 @@ struct exec_basic_statement_t break; } } - + const exec_argument_list_t &arguments() const { return arguments_and_redirections.arguments; } - + const exec_redirection_list_t &redirections() const { return arguments_and_redirections.redirections; @@ -102,10 +102,10 @@ struct exec_function_header_t { // Node containing the function name node_offset_t name_idx; - + // Node containing the function body node_offset_t body_idx; - + // Arguments exec_argument_list_t arguments; }; @@ -138,23 +138,32 @@ struct parse_execution_visitor_t { node_offset_t node_idx; parse_execution_context_t *context; - + parse_execution_visitor_t() : node_idx(0), context(NULL) { } - - virtual bool enter_job_list(void) { return true; } - virtual bool enter_job(void) { return true; } + + virtual bool enter_job_list(void) + { + return true; + } + virtual bool enter_job(void) + { + return true; + } virtual void visit_statement(void) { } virtual void visit_function(const exec_function_header_t &function) { } - virtual bool enter_block_statement(const exec_block_statement_t &statement) { return true; } - + virtual bool enter_block_statement(const exec_block_statement_t &statement) + { + return true; + } + virtual void enter_if_clause(const exec_if_clause_t &statement) { } virtual void exit_if_clause(const exec_if_clause_t &statement) { } - + virtual void visit_switch_statement(const exec_switch_statement_t &header) { } - + virtual void visit_boolean_statement(void) { } virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } virtual void exit_job(void) { } diff --git a/parse_tree.cpp b/parse_tree.cpp index 24bf41e7c..aea3e729f 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,4 +1,4 @@ -#include "parse_tree.h" +#include "parse_tree_construction.h" #include "tokenizer.h" #include @@ -11,26 +11,28 @@ wcstring parse_error_t::describe(const wcstring &src) const { // Locate the beginning of this line of source size_t line_start = 0; - + // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline size_t newline = src.find_last_of(L'\n', source_start); + fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); if (newline != wcstring::npos) { - line_start = newline + 1; + line_start = newline;// + 1; } - + size_t line_end = src.find(L'\n', source_start + source_length); if (line_end == wcstring::npos) { line_end = src.size(); } assert(line_end >= line_start); + fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); assert(source_start >= line_start); - + // Append the line of text result.push_back(L'\n'); result.append(src, line_start, line_end - line_start); - + // Append the caret line result.push_back(L'\n'); result.append(source_start - line_start, L' '); @@ -43,68 +45,115 @@ wcstring token_type_description(parse_token_type_t type) { switch (type) { - case token_type_invalid: return L"invalid"; - - case symbol_job_list: return L"job_list"; - case symbol_job: return L"job"; - case symbol_job_continuation: return L"job_continuation"; - - case symbol_statement: return L"statement"; - case symbol_block_statement: return L"block_statement"; - case symbol_block_header: return L"block_header"; - case symbol_for_header: return L"for_header"; - case symbol_while_header: return L"while_header"; - case symbol_begin_header: return L"begin_header"; - case symbol_function_header: return L"function_header"; - - case symbol_if_statement: return L"if_statement"; - case symbol_if_clause: return L"if_clause"; - case symbol_else_clause: return L"else_clause"; - case symbol_else_continuation: return L"else_continuation"; - - case symbol_switch_statement: return L"switch_statement"; - case symbol_case_item_list: return L"case_item_list"; - case symbol_case_item: return L"case_item"; - - case symbol_argument_list_nonempty: return L"argument_list_nonempty"; - case symbol_argument_list: return L"argument_list"; - - case symbol_boolean_statement: return L"boolean_statement"; - case symbol_decorated_statement: return L"decorated_statement"; - case symbol_plain_statement: return L"plain_statement"; - case symbol_arguments_or_redirections_list: return L"arguments_or_redirections_list"; - case symbol_argument_or_redirection: return L"argument_or_redirection"; - - case parse_token_type_string: return L"token_string"; - case parse_token_type_pipe: return L"token_pipe"; - case parse_token_type_redirection: return L"token_redirection"; - case parse_token_background: return L"token_background"; - case parse_token_type_end: return L"token_end"; - case parse_token_type_terminate: return L"token_terminate"; - - default: return format_string(L"Unknown token type %ld", static_cast(type)); + case token_type_invalid: + return L"invalid"; + + case symbol_job_list: + return L"job_list"; + case symbol_job: + return L"job"; + case symbol_job_continuation: + return L"job_continuation"; + + case symbol_statement: + return L"statement"; + case symbol_block_statement: + return L"block_statement"; + case symbol_block_header: + return L"block_header"; + case symbol_for_header: + return L"for_header"; + case symbol_while_header: + return L"while_header"; + case symbol_begin_header: + return L"begin_header"; + case symbol_function_header: + return L"function_header"; + + case symbol_if_statement: + return L"if_statement"; + case symbol_if_clause: + return L"if_clause"; + case symbol_else_clause: + return L"else_clause"; + case symbol_else_continuation: + return L"else_continuation"; + + case symbol_switch_statement: + return L"switch_statement"; + case symbol_case_item_list: + return L"case_item_list"; + case symbol_case_item: + return L"case_item"; + + case symbol_argument_list_nonempty: + return L"argument_list_nonempty"; + case symbol_argument_list: + return L"argument_list"; + + case symbol_boolean_statement: + return L"boolean_statement"; + case symbol_decorated_statement: + return L"decorated_statement"; + case symbol_plain_statement: + return L"plain_statement"; + case symbol_arguments_or_redirections_list: + return L"arguments_or_redirections_list"; + case symbol_argument_or_redirection: + return L"argument_or_redirection"; + + case parse_token_type_string: + return L"token_string"; + case parse_token_type_pipe: + return L"token_pipe"; + case parse_token_type_redirection: + return L"token_redirection"; + case parse_token_type_background: + return L"token_background"; + case parse_token_type_end: + return L"token_end"; + case parse_token_type_terminate: + return L"token_terminate"; + case symbol_optional_background: + return L"optional_background"; } + return format_string(L"Unknown token type %ld", static_cast(type)); } wcstring keyword_description(parse_keyword_t k) { switch (k) { - case parse_keyword_none: return L"none"; - case parse_keyword_if: return L"if"; - case parse_keyword_else: return L"else"; - case parse_keyword_for: return L"for"; - case parse_keyword_in: return L"in"; - case parse_keyword_while: return L"while"; - case parse_keyword_begin: return L"begin"; - case parse_keyword_function: return L"function"; - case parse_keyword_switch: return L"switch"; - case parse_keyword_end: return L"end"; - case parse_keyword_and: return L"and"; - case parse_keyword_or: return L"or"; - case parse_keyword_not: return L"not"; - case parse_keyword_command: return L"command"; - case parse_keyword_builtin: return L"builtin"; + case parse_keyword_none: + return L"none"; + case parse_keyword_if: + return L"if"; + case parse_keyword_else: + return L"else"; + case parse_keyword_for: + return L"for"; + case parse_keyword_in: + return L"in"; + case parse_keyword_while: + return L"while"; + case parse_keyword_begin: + return L"begin"; + case parse_keyword_function: + return L"function"; + case parse_keyword_switch: + return L"switch"; + case parse_keyword_end: + return L"end"; + case parse_keyword_and: + return L"and"; + case parse_keyword_or: + return L"or"; + case parse_keyword_not: + return L"not"; + case parse_keyword_command: + return L"command"; + case parse_keyword_builtin: + return L"builtin"; default: return format_string(L"Unknown keyword type %ld", static_cast(k)); } @@ -123,7 +172,7 @@ struct parse_token_t enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; size_t source_length; - + wcstring describe() const; }; @@ -147,19 +196,28 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ case TOK_STRING: result.type = parse_token_type_string; break; - + case TOK_PIPE: result.type = parse_token_type_pipe; break; - + case TOK_END: result.type = parse_token_type_end; break; - + case TOK_BACKGROUND: - result.type = parse_token_background; + result.type = parse_token_type_background; break; + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_IN: + case TOK_REDIRECT_FD: + case TOK_REDIRECT_NOCLOB: + result.type = parse_token_type_redirection; + break; + + default: fprintf(stderr, "Bad token type %d passed to %s\n", (int)tokenizer_token_type, __FUNCTION__); assert(0); @@ -172,15 +230,15 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & { assert(start < nodes.size()); const parse_node_t &node = nodes.at(start); - + const size_t spacesPerIndent = 2; - + // unindent statement lists by 1 to flatten them if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list) { if (indent > 0) indent -= 1; } - + append_format(*result, L"%2lu - %l2u ", *line, start); result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); @@ -206,7 +264,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) return L"(empty!)"; - + size_t line = 0; wcstring result; dump_tree_recursive(nodes, src, 0, 0, &result, &line); @@ -218,15 +276,15 @@ struct parse_stack_element_t enum parse_token_type_t type; enum parse_keyword_t keyword; node_offset_t node_idx; - + parse_stack_element_t(parse_token_type_t t) : type(t), keyword(parse_keyword_none), node_idx(-1) { } - + parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) { } - + wcstring describe(void) const { wcstring result = token_type_description(type); @@ -242,13 +300,13 @@ struct parse_stack_element_t class parse_ll_t { friend class parse_t; - + std::vector symbol_stack; // LL parser stack parse_node_tree_t nodes; - + bool fatal_errored; parse_error_list_t errors; - + // Constructor parse_ll_t() : fatal_errored(false) { @@ -258,33 +316,30 @@ class parse_ll_t symbol_stack.push_back(elem); // goal token nodes.push_back(parse_node_t(symbol_job_list)); } - + bool top_node_match_token(parse_token_t token); - + // implementation of certain parser constructions void accept_token(parse_token_t token, const wcstring &src); void accept_token_job_list(parse_token_t token); void accept_token_job(parse_token_t token); void accept_token_job_continuation(parse_token_t token); - void accept_token_statement(parse_token_t token); - void accept_token_block_header(parse_token_t token); void accept_token_else_clause(parse_token_t token); void accept_token_else_continuation(parse_token_t token); - void accept_token_boolean_statement(parse_token_t token); - void accept_token_decorated_statement(parse_token_t token); void accept_token_plain_statement(parse_token_t token); void accept_token_argument_list(parse_token_t token); void accept_token_arguments_or_redirections_list(parse_token_t token); void accept_token_argument_or_redirection(parse_token_t token); bool accept_token_string(parse_token_t token); - + void token_unhandled(parse_token_t token, const char *function); - + void parse_error(const wchar_t *expected, parse_token_t token); + void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); - + void dump_stack(void) const; - + // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() { @@ -294,17 +349,17 @@ class parse_ll_t PARSE_ASSERT(top_symbol.node_idx < nodes.size()); return nodes.at(top_symbol.node_idx); } - + parse_token_type_t stack_top_type() const { return symbol_stack.back().type; } - + void top_node_set_tag(uint32_t tag) { this->node_for_top_symbol().tag = tag; } - + inline void add_child_to_node(size_t parent_node_idx, parse_stack_element_t *tok) { PARSE_ASSERT(tok->type != token_type_invalid); @@ -312,19 +367,19 @@ class parse_ll_t nodes.push_back(parse_node_t(tok->type)); nodes.at(parent_node_idx).child_count += 1; } - + inline void symbol_stack_pop() { symbol_stack.pop_back(); } - + // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. inline void symbol_stack_pop_push_int(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) { - + // Logging? - if (1) + if (0) { fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size()); if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); @@ -333,17 +388,17 @@ class parse_ll_t if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok2.describe().c_str()); if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok1.describe().c_str()); } - + // Get the node for the top symbol and tell it about its children size_t node_idx = symbol_stack.back().node_idx; parse_node_t &node = nodes.at(node_idx); - + // Should have no children yet PARSE_ASSERT(node.child_count == 0); - + // Tell the node where its children start node.child_start = nodes.size(); - + // Add nodes for the children // Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) if (tok1.type != token_type_invalid) add_child_to_node(node_idx, &tok1); @@ -351,7 +406,7 @@ class parse_ll_t if (tok3.type != token_type_invalid) add_child_to_node(node_idx, &tok3); if (tok4.type != token_type_invalid) add_child_to_node(node_idx, &tok4); if (tok5.type != token_type_invalid) add_child_to_node(node_idx, &tok5); - + // The above set the node_idx. Now replace the top of the stack. symbol_stack.pop_back(); if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); @@ -360,31 +415,69 @@ class parse_ll_t if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); } - + template - inline void symbol_stack_pop_push2() + inline void symbol_stack_pop_push2(typename T::magic_seq_type_t x = 0) { symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); } - + template - inline void symbol_stack_pop_push_production(int which) + inline void symbol_stack_pop_push2(typename T::magic_symbol_type_t x = 0) { + symbol_stack_pop_push_int(T::get_token()); + } + + // Singular. Sole productions are always of type Seq. + template + inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_seq_type_t magic=0) + { + typedef typename T::sole_production seq; + symbol_stack_pop_push_int(seq::t0::get_token(), seq::t1::get_token(), seq::t2::get_token(), seq::t3::get_token(), seq::t4::get_token()); + } + + // Plural productions, of type Or. + template + inline void symbol_stack_produce(parse_token_t tok, typename T::productions::magic_or_type_t magic=0) + { + typedef typename T::productions ors; + int which = T::production(tok.type, tok.keyword); switch (which) { - case 0: symbol_stack_pop_push2(); break; - case 1: symbol_stack_pop_push2(); break; - case 2: symbol_stack_pop_push2(); break; - case 3: symbol_stack_pop_push2(); break; - case 4: symbol_stack_pop_push2(); break; + case 0: + symbol_stack_pop_push2(); + break; + case 1: + symbol_stack_pop_push2(); + break; + case 2: + symbol_stack_pop_push2(); + break; + case 3: + symbol_stack_pop_push2(); + break; + case 4: + symbol_stack_pop_push2(); + break; + + case NO_PRODUCTION: + parse_error(tok, L"Failed to produce with stack top '%ls' for token '%ls'\n", symbol_stack.back().describe().c_str(), tok.describe().c_str()); + break; + + default: + parse_error(tok, L"Unexpected production %d for token %ls\n", which, tok.describe().c_str()); + break; } } + // Non-sequence basic productions template - inline void symbol_stack_produce(parse_token_t tok) + inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_symbol_type_t magic=0) { - symbol_stack_pop_push_production(T::production(tok.type, tok.keyword)); + symbol_stack_pop_push_int(T::sole_production::get_token()); } + + }; void parse_ll_t::dump_stack(void) const @@ -410,7 +503,7 @@ void parse_ll_t::dump_stack(void) const } } } - + fprintf(stderr, "Stack dump (%lu elements):\n", symbol_stack.size()); for (size_t idx = 0; idx < lines.size(); idx++) { @@ -422,9 +515,31 @@ void parse_ll_t::token_unhandled(parse_token_t token, const char *function) { fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); this->dump_stack(); - PARSER_DIE(); + parse_error_t err; + err.text = format_string(L"Unhandled token with type %ls in function %s", token_type_description(token.type).c_str(), function); + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + this->fatal_errored = true; } +void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) +{ + this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + this->fatal_errored = true; +} + + void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { wcstring desc = token_type_description(token.type); @@ -436,172 +551,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_job_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_job_list); - switch (token.type) - { - case parse_token_type_string: - // 'end' is special - switch (token.keyword) - { - case parse_keyword_end: - case parse_keyword_else: - // End this job list - symbol_stack_pop_push_production(0); - break; - - default: - // Normal string - symbol_stack_pop_push_production(1); - break; - } - break; - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_background: - symbol_stack_pop_push_production(1); - break; - - case parse_token_type_end: - // Empty line - symbol_stack_pop_push_production(2); - break; - - case parse_token_type_terminate: - // no more commands, just transition to empty - symbol_stack_pop_push_production(0); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - -void parse_ll_t::accept_token_job_continuation(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_job_continuation); - switch (token.type) - { - case parse_token_type_pipe: - // Pipe, continuation - symbol_stack_pop_push_production(1); - break; - - default: - // Not a pipe, no job continuation - symbol_stack_pop_push_production(0); - break; - } -} - - -void parse_ll_t::accept_token_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_statement); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_and: - case parse_keyword_or: - case parse_keyword_not: - symbol_stack_pop_push_production(0); - break; - - case parse_keyword_for: - case parse_keyword_while: - case parse_keyword_function: - case parse_keyword_begin: - symbol_stack_pop_push_production(1); - break; - - case parse_keyword_if: - symbol_stack_pop_push_production(2); - break; - - case parse_keyword_else: - symbol_stack_pop(); - break; - - case parse_keyword_switch: - symbol_stack_pop_push_production(3); - break; - - case parse_keyword_end: - PARSER_DIE(); //todo - break; - - // 'in' is only special within a for_header - case parse_keyword_in: - case parse_keyword_none: - case parse_keyword_command: - case parse_keyword_builtin: - case parse_keyword_case: - symbol_stack_pop_push_production(4); - break; - - } - break; - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_background: - case parse_token_type_terminate: - parse_error(L"statement", token); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - -void parse_ll_t::accept_token_block_header(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_block_header); - switch (token.type) - { - case parse_token_type_string: - switch (token.keyword) - { - case parse_keyword_else: - PARSER_DIE(); //todo - break; - - case parse_keyword_for: - symbol_stack_pop_push_production(0); - break; - - - case parse_keyword_while: - symbol_stack_pop_push_production(1); - break; - - case parse_keyword_function: - symbol_stack_pop_push_production(2); - break; - - case parse_keyword_begin: - symbol_stack_pop_push_production(3); - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - - } - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } -} - void parse_ll_t::accept_token_else_clause(parse_token_t token) { PARSE_ASSERT(stack_top_type() == symbol_else_clause); @@ -614,25 +563,6 @@ void parse_ll_t::accept_token_else_continuation(parse_token_t token) symbol_stack_produce(token); } -void parse_ll_t::accept_token_boolean_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_boolean_statement); - top_node_set_tag(token.keyword); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_decorated_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_decorated_statement); - top_node_set_tag(token.keyword); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_plain_statement(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_plain_statement); - symbol_stack_produce(token); -} void parse_ll_t::accept_token_argument_list(parse_token_t token) { @@ -664,7 +594,7 @@ bool parse_ll_t::accept_token_string(parse_token_t token) symbol_stack_pop(); result = true; break; - + default: token_unhandled(token, __FUNCTION__); break; @@ -687,7 +617,7 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) parse_node_t &node = node_for_top_symbol(); node.source_start = token.source_start; node.source_length = token.source_length; - + // We consumed this symbol symbol_stack.pop_back(); result = true; @@ -704,7 +634,7 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) { - bool logit = true; + bool logit = false; if (logit) { const wcstring txt = wcstring(src, token.source_start, token.source_length); @@ -724,107 +654,113 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) consumed = true; break; } - + switch (stack_top_type()) { - /* Symbols */ + /* Symbols */ case symbol_job_list: - accept_token_job_list(token); + symbol_stack_produce(token); break; - + case symbol_job: - symbol_stack_pop_push2(); + symbol_stack_produce(token); break; - + case symbol_job_continuation: - accept_token_job_continuation(token); + symbol_stack_produce(token); break; case symbol_statement: - accept_token_statement(token); + symbol_stack_produce(token); break; - + case symbol_if_statement: symbol_stack_produce(token); break; - + case symbol_if_clause: symbol_stack_produce(token); break; - + case symbol_else_clause: accept_token_else_clause(token); break; - + case symbol_else_continuation: accept_token_else_continuation(token); break; - + case symbol_block_statement: symbol_stack_produce(token); break; - + case symbol_block_header: symbol_stack_produce(token); break; - + case symbol_for_header: symbol_stack_produce(token); break; - + case symbol_while_header: symbol_stack_produce(token); break; - + case symbol_begin_header: symbol_stack_produce(token); break; - + case symbol_function_header: symbol_stack_produce(token); break; - + case symbol_switch_statement: symbol_stack_produce(token); break; - + case symbol_case_item_list: symbol_stack_produce(token); break; - + case symbol_case_item: symbol_stack_produce(token); break; - + case symbol_boolean_statement: - accept_token_boolean_statement(token); + top_node_set_tag(token.keyword); + symbol_stack_produce(token); break; - + case symbol_decorated_statement: - accept_token_decorated_statement(token); + top_node_set_tag(token.keyword); + symbol_stack_produce(token); break; - + case symbol_plain_statement: - accept_token_plain_statement(token); + symbol_stack_produce(token); break; - + case symbol_argument_list_nonempty: symbol_stack_produce(token); break; - + case symbol_argument_list: accept_token_argument_list(token); break; - + case symbol_arguments_or_redirections_list: accept_token_arguments_or_redirections_list(token); break; - + case symbol_argument_or_redirection: accept_token_argument_or_redirection(token); break; - - /* Tokens */ + + case symbol_optional_background: + symbol_stack_produce(token); + break; + + /* Tokens */ case parse_token_type_string: consumed = accept_token_string(token); break; @@ -846,11 +782,13 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) parse_keyword_t result = parse_keyword_none; if (tok == TOK_STRING) { - - const struct { + + const struct + { const wchar_t *txt; parse_keyword_t keyword; - } keywords[] = { + } keywords[] = + { {L"if", parse_keyword_if}, {L"else", parse_keyword_else}, {L"for", parse_keyword_for}, @@ -867,7 +805,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"command", parse_keyword_command}, {L"builtin", parse_keyword_builtin} }; - + for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) { if (! wcscmp(keywords[i].txt, tok_txt)) @@ -888,36 +826,40 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); int tok_start = tok_get_pos(&tok); - + size_t tok_extent = tok_get_extent(&tok); + if (tok_type == TOK_ERROR) { fprintf(stderr, "Tokenizer error\n"); break; } - + parse_token_t token = parse_token_from_tokenizer_token(tok_type); token.tokenizer_type = tok_type; token.source_start = (size_t)tok_start; - token.source_length = wcslen(tok_txt); + token.source_length = tok_extent; token.keyword = keyword_for_token(tok_type, tok_txt); this->parser->accept_token(token, str); + + if (this->parser->fatal_errored) + break; } - wcstring result = dump_tree(this->parser->nodes, str); + wcstring result = L"";//dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); - + if (output != NULL) { output->swap(this->parser->nodes); this->parser->nodes.clear(); } - + if (errors != NULL) { errors->swap(this->parser->errors); this->parser->errors.clear(); } - + return ! this->parser->fatal_errored; } diff --git a/parse_tree.h b/parse_tree.h index 39e370af6..6b1fc0d19 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -26,11 +26,11 @@ struct parse_error_t { /** Text of the error */ wcstring text; - + /** Offset and length of the token in the source code that triggered this error */ size_t source_start; size_t source_length; - + /** Return a string describing the error, suitable for presentation to the user */ wcstring describe(const wcstring &src) const; }; @@ -40,8 +40,8 @@ class parse_ll_t; class parse_t { parse_ll_t * const parser; - - public: + +public: parse_t(); bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); }; @@ -49,7 +49,7 @@ class parse_t enum parse_token_type_t { token_type_invalid, - + // Non-terminal tokens symbol_job_list, symbol_job, @@ -61,33 +61,35 @@ enum parse_token_type_t symbol_while_header, symbol_begin_header, symbol_function_header, - + symbol_if_statement, symbol_if_clause, symbol_else_clause, symbol_else_continuation, - + symbol_switch_statement, symbol_case_item_list, symbol_case_item, - + symbol_boolean_statement, symbol_decorated_statement, symbol_plain_statement, symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - + symbol_argument_list_nonempty, symbol_argument_list, + + symbol_optional_background, // Terminal types parse_token_type_string, parse_token_type_pipe, parse_token_type_redirection, - parse_token_background, + parse_token_type_background, parse_token_type_end, parse_token_type_terminate, - + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; @@ -117,32 +119,32 @@ wcstring keyword_description(parse_keyword_t type); /** Base class for nodes of a parse tree */ class parse_node_t { - public: - +public: + /* Type of the node */ enum parse_token_type_t type; - + /* Start in the source code */ size_t source_start; - + /* Length of our range in the source code */ size_t source_length; /* Children */ node_offset_t child_start; node_offset_t child_count; - + /* Type-dependent data */ uint32_t tag; - + /* Description */ wcstring describe(void) const; - + /* Constructor */ explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) { } - + node_offset_t child_offset(node_offset_t which) const { PARSE_ASSERT(which < child_count); @@ -154,360 +156,6 @@ class parse_node_tree_t : public std::vector { }; -namespace parse_symbols -{ - - #define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - - #define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } - - #define NO_PRODUCTION (-1) - - - template - struct Token - { - SYMBOL(WHICH); - - typedef Token t0; - typedef Token t1; - typedef Token t2; - typedef Token t3; - typedef Token t4; - typedef Token t5; - }; - - /* Placeholder */ - typedef Token none; - - struct EMPTY - { - typedef none t0; - typedef none t1; - typedef none t2; - typedef none t3; - typedef none t4; - typedef none t5; - }; - - template - struct Seq - { - typedef T0 t0; - typedef T1 t1; - typedef T2 t2; - typedef T3 t3; - typedef T4 t4; - typedef T5 t5; - }; - - template - struct OR - { - typedef P0 p0; - typedef P1 p1; - typedef P2 p2; - typedef P3 p3; - typedef P4 p4; - typedef P5 p5; - }; - - template - struct Keyword - { - static inline parse_keyword_t get_token() { return WHICH; } - }; - - struct job; - struct statement; - struct job_continuation; - struct boolean_statement; - struct block_statement; - struct if_statement; - struct if_clause; - struct else_clause; - struct else_continuation; - struct switch_statement; - struct decorated_statement; - struct switch_statement; - struct case_item_list; - struct case_item; - struct argument_list_nonempty; - struct argument_list; - struct block_statement; - struct block_header; - struct for_header; - struct while_header; - struct begin_header; - struct function_header; - struct boolean_statement; - struct decorated_statement; - struct plain_statement; - struct arguments_or_redirections_list; - struct argument_or_redirection; - struct redirection; - struct statement_terminator; - - /* A job_list is a list of jobs, separated by semicolons or newlines */ - struct job_list : OR< - EMPTY, - Seq, - Seq, job_list> - > - { - SYMBOL(symbol_job_list) - }; - - /* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ - struct job : Seq - { - SYMBOL(symbol_job); - }; - - struct job_continuation : OR< - EMPTY, - Seq, statement, job_continuation> - > - { - SYMBOL(symbol_job_continuation); - }; - - /* A statement is a normal command, or an if / while / and etc */ - struct statement : OR< - boolean_statement, - block_statement, - if_statement, - switch_statement, - decorated_statement - > - { - SYMBOL(symbol_statement); - }; - - struct if_statement : Seq > - { - SYMBOL(symbol_if_statement); - PRODUCE(0) - }; - - struct if_clause : Seq, job, statement_terminator, job_list> - { - SYMBOL(symbol_if_clause); - PRODUCE(0) - }; - - struct else_clause : OR< - EMPTY, - Keyword, else_continuation - > - { - SYMBOL(symbol_else_clause); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_else: return 1; - default: return 0; - } - } - }; - - struct else_continuation : OR< - Seq, - Seq - > - { - SYMBOL(symbol_else_continuation); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_if: return 0; - default: return 1; - } - } - }; - - struct switch_statement : Seq, Token, statement_terminator, case_item_list, Keyword - > - { - SYMBOL(symbol_switch_statement); - }; - - struct case_item_list : OR - < - EMPTY, - case_item, case_item_list - > - { - SYMBOL(symbol_case_item_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_case: return 1; - default: return 0; - } - } - }; - - struct case_item : Seq, argument_list, statement_terminator, job_list> - { - SYMBOL(symbol_case_item); - }; - - struct argument_list_nonempty : Seq, argument_list> - { - SYMBOL(symbol_argument_list_nonempty); - }; - - struct argument_list : OR - { - SYMBOL(symbol_argument_list); - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: return 1; - default: return 0; - } - } - }; - - struct block_statement : Seq, arguments_or_redirections_list> - { - SYMBOL(symbol_block_statement); - PRODUCE(0) - }; - - struct block_header : OR - { - SYMBOL(symbol_block_header); - }; - - struct for_header : Seq, Token, Keyword, arguments_or_redirections_list> - { - SYMBOL(symbol_for_header); - }; - - struct while_header : Seq, statement> - { - SYMBOL(symbol_while_header); - }; - - struct begin_header : Keyword - { - SYMBOL(symbol_begin_header); - }; - - struct function_header : Keyword - { - SYMBOL(symbol_function_header); - }; - - /* A boolean statement is AND or OR or NOT */ - struct boolean_statement : OR< - Seq, statement>, - Seq, statement>, - Seq, statement> - > - { - SYMBOL(symbol_boolean_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_and: return 0; - case parse_keyword_or: return 1; - case parse_keyword_not: return 2; - default: return NO_PRODUCTION; - } - } - }; - - /* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ - struct decorated_statement : OR< - Seq, plain_statement>, - Seq, plain_statement>, - plain_statement - > - { - SYMBOL(symbol_decorated_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_command: return 0; - case parse_keyword_builtin: return 1; - default: return 2; - } - } - }; - - struct plain_statement : Seq, arguments_or_redirections_list> - { - SYMBOL(symbol_plain_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - return 0; - } - - }; - - struct arguments_or_redirections_list : OR< - EMPTY, - Seq > - { - SYMBOL(symbol_arguments_or_redirections_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - case parse_token_type_redirection: - return 1; - default: - return 0; - } - } - }; - - struct argument_or_redirection : OR< - Token, - redirection - > - { - SYMBOL(symbol_argument_or_redirection); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: return 0; - case parse_token_type_redirection: return 1; - default: return NO_PRODUCTION; - } - } - }; - - struct redirection : Token - { - SYMBOL(parse_token_type_redirection); - }; - - struct statement_terminator : Token - { - SYMBOL(parse_token_type_end); - }; -} - /* Fish grammar: @@ -520,45 +168,45 @@ namespace parse_symbols # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation job = statement job_continuation - job_continuation = | + job_continuation = | statement job_continuation # A statement is a normal command, or an if / while / and etc statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement - + # A block is a conditional, loop, or begin/end - if_statement = if_clause else_clause + if_statement = if_clause else_clause arguments_or_redirections_list if_clause = job STATEMENT_TERMINATOR job_list else_clause = | else_continuation else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list - + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list case_item_list = | case_item case_item_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list - + argument_list_nonempty = argument_list argument_list = | argument_list_nonempty - block_statement = block_header STATEMENT_TERMINATOR job_list arguments_or_redirections_list + block_statement = block_header job_list arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement begin_header = BEGIN function_header = FUNCTION function_name argument_list - + # A boolean statement is AND or OR or NOT boolean_statement = AND statement | OR statement | NOT statement - + # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = COMMAND arguments_or_redirections_list + plain_statement = COMMAND arguments_or_redirections_list optional_background arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list @@ -567,6 +215,8 @@ namespace parse_symbols terminator = | + optional_background = | + */ #endif diff --git a/parse_tree_construction.h b/parse_tree_construction.h new file mode 100644 index 000000000..fb9e8dfbf --- /dev/null +++ b/parse_tree_construction.h @@ -0,0 +1,586 @@ +/**\file parse_tree.h + + Programmatic representation of fish code. +*/ + +#ifndef FISH_PARSE_TREE_CONSTRUCTION_H +#define FISH_PARSE_TREE_CONSTRUCTION_H + +#include "parse_tree.h" + +/* Terrifying template black magic. */ + +namespace parse_symbols +{ + +#define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } + +#define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } + +#define NO_PRODUCTION (-1) + +struct Symbol +{ + typedef int magic_symbol_type_t; +}; + +template +struct Token : public Symbol +{ + SYMBOL(WHICH); +}; + +/* Placeholder */ +typedef Token none; + +typedef Token EMPTY; + +template +struct Seq +{ + typedef T0 t0; + typedef T1 t1; + typedef T2 t2; + typedef T3 t3; + typedef T4 t4; + typedef T5 t5; + + typedef int magic_seq_type_t; +}; + +template +struct OR +{ + typedef P0 p0; + typedef P1 p1; + typedef P2 p2; + typedef P3 p3; + typedef P4 p4; + typedef P5 p5; + + typedef int magic_or_type_t; +}; + +template +struct Keyword : public Symbol +{ + static inline parse_keyword_t get_token() + { + return WHICH; + } +}; + +struct job; +struct statement; +struct job_continuation; +struct boolean_statement; +struct block_statement; +struct if_statement; +struct if_clause; +struct else_clause; +struct else_continuation; +struct switch_statement; +struct decorated_statement; +struct switch_statement; +struct case_item_list; +struct case_item; +struct argument_list_nonempty; +struct argument_list; +struct block_statement; +struct block_header; +struct for_header; +struct while_header; +struct begin_header; +struct function_header; +struct boolean_statement; +struct decorated_statement; +struct plain_statement; +struct arguments_or_redirections_list; +struct argument_or_redirection; +struct redirection; +struct statement_terminator; +struct optional_background; + +/* A job_list is a list of jobs, separated by semicolons or newlines */ +struct job_list : public Symbol +{ + typedef OR< + EMPTY, + Seq, + Seq, job_list> + > productions; + + SYMBOL(symbol_job_list) + + static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_string: + // 'end' is special + switch (token_keyword) + { + case parse_keyword_end: + case parse_keyword_else: + // End this job list + return 0; + + default: + // Normal string + return 1; + } + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + return 1; + + case parse_token_type_end: + // Empty line + return 2; + + case parse_token_type_terminate: + // no more commands, just transition to empty + return 0; + break; + + default: + return NO_PRODUCTION; + } + } + +}; + +/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ +struct job : public Symbol +{ + typedef Seq sole_production; + SYMBOL(symbol_job); +}; + +struct job_continuation : public Symbol +{ + typedef OR< + EMPTY, + Seq, statement, job_continuation> + > productions; + + SYMBOL(symbol_job_continuation); + + static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_pipe: + // Pipe, continuation + return 1; + + default: + // Not a pipe, no job continuation + return 0; + } + + } +}; + +/* A statement is a normal command, or an if / while / and etc */ +struct statement : public Symbol +{ + typedef OR< + boolean_statement, + block_statement, + if_statement, + switch_statement, + decorated_statement + > productions; + + SYMBOL(symbol_statement); + + static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_string: + switch (token_keyword) + { + case parse_keyword_and: + case parse_keyword_or: + case parse_keyword_not: + return 0; + + case parse_keyword_for: + case parse_keyword_while: + case parse_keyword_function: + case parse_keyword_begin: + return 1; + + case parse_keyword_if: + return 2; + + case parse_keyword_else: + //symbol_stack_pop(); + return NO_PRODUCTION; + + case parse_keyword_switch: + return 3; + + case parse_keyword_end: + PARSER_DIE(); //todo + return NO_PRODUCTION; + + // 'in' is only special within a for_header + case parse_keyword_in: + case parse_keyword_none: + case parse_keyword_command: + case parse_keyword_builtin: + case parse_keyword_case: + return 4; + } + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_terminate: + return NO_PRODUCTION; + //parse_error(L"statement", token); + + default: + return NO_PRODUCTION; + } + } + +}; + +struct if_statement : public Symbol +{ + typedef Seq, arguments_or_redirections_list> sole_production; + SYMBOL(symbol_if_statement); +}; + +struct if_clause : public Symbol +{ + typedef Seq, job, statement_terminator, job_list> sole_production; + SYMBOL(symbol_if_clause); +}; + +struct else_clause : public Symbol +{ + typedef OR< + EMPTY, + Seq, else_continuation> + > productions; + + SYMBOL(symbol_else_clause); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_else: + return 1; + default: + return 0; + } + } +}; + +struct else_continuation : public Symbol +{ + typedef OR< + Seq, + Seq + > productions; + + SYMBOL(symbol_else_continuation); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_if: + return 0; + default: + return 1; + } + } +}; + +struct switch_statement : public Symbol +{ + typedef Seq, + Token, + statement_terminator, + case_item_list, + Keyword + > sole_production; + + SYMBOL(symbol_switch_statement); +}; + +struct case_item_list : public Symbol +{ + typedef OR + < + EMPTY, + Seq, + Seq, case_item_list> + > productions; + + SYMBOL(symbol_case_item_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_case: return 1; + + default: + if (tok == parse_token_type_end) + { + /* empty line */ + return 2; + } + else + { + return 0; + } + + } + } +}; + +struct case_item : public Symbol +{ + typedef Seq, argument_list, statement_terminator, job_list> sole_production; + + SYMBOL(symbol_case_item); +}; + +struct argument_list_nonempty : public Symbol +{ + typedef Seq, argument_list> sole_production; + SYMBOL(symbol_argument_list_nonempty); +}; + +struct argument_list : public Symbol +{ + typedef OR productions; + + SYMBOL(symbol_argument_list); + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + return 1; + default: + return 0; + } + } +}; + +struct block_statement : public Symbol +{ + typedef Seq, arguments_or_redirections_list> sole_production; + + SYMBOL(symbol_block_statement); +}; + +struct block_header : public Symbol +{ + typedef OR productions; + + SYMBOL(symbol_block_header); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + // todo + case parse_keyword_else: + return NO_PRODUCTION; + case parse_keyword_for: + return 0; + case parse_keyword_while: + return 1; + case parse_keyword_function: + return 2; + case parse_keyword_begin: + return 3; + default: + return NO_PRODUCTION; + } + } +}; + +struct for_header : public Symbol +{ + typedef Seq, Token, Keyword, arguments_or_redirections_list> sole_production; + + SYMBOL(symbol_for_header); +}; + +struct while_header : public Symbol +{ + typedef Seq, statement> sole_production; + + SYMBOL(symbol_while_header); +}; + +struct begin_header : public Symbol +{ + typedef Keyword sole_production; + SYMBOL(symbol_begin_header); +}; + +struct function_header : public Symbol +{ + typedef Seq< Keyword, Token, argument_list> sole_production; + SYMBOL(symbol_function_header); +}; + +/* A boolean statement is AND or OR or NOT */ +struct boolean_statement : public Symbol +{ + typedef OR< + Seq, statement>, + Seq, statement>, + Seq, statement> + > productions; + + SYMBOL(symbol_boolean_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_and: + return 0; + case parse_keyword_or: + return 1; + case parse_keyword_not: + return 2; + default: + return NO_PRODUCTION; + } + } +}; + +/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ +struct decorated_statement : public Symbol +{ + + typedef OR< + Seq, plain_statement>, + Seq, plain_statement>, + plain_statement + > productions; + + SYMBOL(symbol_decorated_statement); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (key) + { + case parse_keyword_command: + return 0; + case parse_keyword_builtin: + return 1; + default: + return 2; + } + } +}; + +struct plain_statement : public Symbol +{ + + typedef Seq, arguments_or_redirections_list, optional_background> sole_production; + + SYMBOL(symbol_plain_statement); + +}; + +struct arguments_or_redirections_list : public Symbol +{ + typedef OR< + EMPTY, + Seq > + productions; + + SYMBOL(symbol_arguments_or_redirections_list); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + case parse_token_type_redirection: + return 1; + default: + return 0; + } + } +}; + +struct argument_or_redirection : public Symbol +{ + typedef OR< + Token, + redirection + > productions; + + + SYMBOL(symbol_argument_or_redirection); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_string: + return 0; + case parse_token_type_redirection: + return 1; + default: + return NO_PRODUCTION; + } + } +}; + +struct redirection : public Symbol +{ + typedef Token production; + SYMBOL(parse_token_type_redirection); +}; + +struct statement_terminator : public Symbol +{ + typedef Token production; + SYMBOL(parse_token_type_end); +}; + +struct optional_background : public Symbol +{ + typedef OR< + EMPTY, + Token + > productions; + + SYMBOL(symbol_optional_background); + + static int production(parse_token_type_t tok, parse_keyword_t key) + { + switch (tok) + { + case parse_token_type_background: + return 1; + default: + return 0; + } + } +}; + +} + +#endif diff --git a/parse_util.cpp b/parse_util.cpp index 5e6f4459b..6f2913455 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -245,10 +245,10 @@ void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wc const wchar_t * const cursor = buff + cursor_pos; CHECK(buff,); - + const size_t bufflen = wcslen(buff); assert(cursor_pos <= bufflen); - + /* ap and bp are the beginning and end of the tightest command substitition found so far */ const wchar_t *ap = buff, *bp = buff + bufflen; const wchar_t *pos = buff; @@ -260,13 +260,13 @@ void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wc /* No subshell found, all done */ break; } - + /* Intrepret NULL to mean the end */ if (end == NULL) { end = const_cast(buff) + bufflen; } - + if (begin < cursor && end >= cursor) { /* This command substitution surrounds the cursor, so it's a tighter fit */ @@ -288,7 +288,7 @@ void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wc assert(pos <= buff + bufflen); } } - + if (a != NULL) *a = ap; if (b != NULL) *b = bp; } diff --git a/tokenizer.cpp b/tokenizer.cpp index 831197ee5..6d99b46cd 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -651,13 +651,19 @@ wcstring tok_first(const wchar_t *str) return result; } -int tok_get_pos(tokenizer_t *tok) +int tok_get_pos(const tokenizer_t *tok) { CHECK(tok, 0); - return (int)tok->last_pos; } +size_t tok_get_extent(const tokenizer_t *tok) +{ + CHECK(tok, 0); + size_t current_pos = tok->buff - tok->orig_buff; + return current_pos > tok->last_pos ? current_pos - tok->last_pos : 0; +} + void tok_set_pos(tokenizer_t *tok, int pos) { diff --git a/tokenizer.h b/tokenizer.h index f2d6c0c0b..0f3ff3693 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -142,7 +142,10 @@ int tok_has_next(tokenizer_t *tok); /** Returns the position of the beginning of the current token in the original string */ -int tok_get_pos(tokenizer_t *tok); +int tok_get_pos(const tokenizer_t *tok); + +/** Returns the extent of the current token */ +size_t tok_get_extent(const tokenizer_t *tok); /** Returns the original string to tokenizer From 4f8d4f378cffa71b4e80bfa2049e2152b429615c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 25 Jul 2013 15:24:22 -0700 Subject: [PATCH 018/108] AST no templates --- fish.xcodeproj/project.pbxproj | 8 ++- parse_productions.cpp | 63 +++++++++++++++++++ ...tree_construction.h => parse_productions.h | 61 ++++++++++++++++++ parse_tree.cpp | 2 +- parse_tree.h | 4 +- 5 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 parse_productions.cpp rename parse_tree_construction.h => parse_productions.h (93%) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 5ae10bfc4..708e65e30 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -152,6 +152,7 @@ D0F019FD15A977CA0034B3B1 /* config.fish in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0C4FD9415A7D7EE00212EF1 /* config.fish */; }; D0F01A0315A978910034B3B1 /* osx_fish_launcher.m in Sources */ = {isa = PBXBuildFile; fileRef = D0D02AFA159871B2008E62BD /* osx_fish_launcher.m */; }; D0F01A0515A978A10034B3B1 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D0CBD583159EEE010024809C /* Foundation.framework */; }; + D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -465,7 +466,8 @@ D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; }; D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = ""; }; D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; - D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree_construction.h; sourceTree = ""; }; + D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_productions.h; sourceTree = ""; }; + D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_productions.cpp; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -589,8 +591,9 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, + D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */, + D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */, D0C52F361765284C00BFAB82 /* parse_tree.h */, - D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, D0C52F341765281F00BFAB82 /* parse_exec.h */, D0C52F331765281F00BFAB82 /* parse_exec.cpp */, @@ -1116,6 +1119,7 @@ D0D02A89159839DF008E62BD /* fish.cpp in Sources */, D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */, + D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/parse_productions.cpp b/parse_productions.cpp new file mode 100644 index 000000000..82bdd0b91 --- /dev/null +++ b/parse_productions.cpp @@ -0,0 +1,63 @@ +#include "parse_productions.h" + +using namespace parse_productions; + +#define PRODUCTIONS(sym) static const Production_t sym##_productions + +PRODUCTIONS(job_list) = + { + {}, + {symbol_job, symbol_job_list}, + {parse_token_type_end, symbol_job_list} + }; + + + +/* A job_list is a list of jobs, separated by semicolons or newlines */ + +DEC(job_list) { + symbol_job_list, + { + {}, + {symbol_job, symbol_job_list}, + {parse_token_type_end, symbol_job_list} + }, + resolve_job_list +}; + +static int resolve_job_list(parse_token_type_t token_type, parse_keyword_t token_keyword) + { + switch (token_type) + { + case parse_token_type_string: + // 'end' is special + switch (token_keyword) + { + case parse_keyword_end: + case parse_keyword_else: + // End this job list + return 0; + + default: + // Normal string + return 1; + } + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + return 1; + + case parse_token_type_end: + // Empty line + return 2; + + case parse_token_type_terminate: + // no more commands, just transition to empty + return 0; + break; + + default: + return NO_PRODUCTION; + } + } \ No newline at end of file diff --git a/parse_tree_construction.h b/parse_productions.h similarity index 93% rename from parse_tree_construction.h rename to parse_productions.h index fb9e8dfbf..5ded6af00 100644 --- a/parse_tree_construction.h +++ b/parse_productions.h @@ -10,6 +10,67 @@ /* Terrifying template black magic. */ +/* + +- Get info for symbol +- Resolve production from info +- Get productions for children +- Get symbols for productions + +Production may be: + +1. Single value +2. Sequence of values (possibly empty) +3. Options of Single / Sequence + +Info to specify: + +1. Number of different productions +2. Resolver function +3. Symbols for associated productions + +Choice: should info be a class or a data? + +data: + +struct Symbol_t +{ + enum parse_token_type_t token_type; + int (*resolver)(parse_token_type_t tok, parse_keyword_t key); //may be trivial + production productions[5]; +} + +struct Production_t +{ + enum parse_token_type_t symbols[5]; +} + +*/ + +namespace parse_productions +{ + +#define MAX_PRODUCTIONS 5 +#define MAX_SYMBOLS_PER_PRODUCTION 5 + + + +struct Production_t +{ + enum parse_token_type_t symbols[MAX_SYMBOLS_PER_PRODUCTION]; +}; + +struct Symbol_t +{ + enum parse_token_type_t token_type; + int (*resolver)(parse_token_type_t tok, parse_keyword_t key); + Production_t productions[MAX_PRODUCTIONS]; +}; + + + +} + namespace parse_symbols { diff --git a/parse_tree.cpp b/parse_tree.cpp index aea3e729f..4df277d48 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,4 +1,4 @@ -#include "parse_tree_construction.h" +#include "parse_productions.h" #include "tokenizer.h" #include diff --git a/parse_tree.h b/parse_tree.h index 6b1fc0d19..dfe9f24e9 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -3,8 +3,8 @@ Programmatic representation of fish code. */ -#ifndef FISH_PARSE_TREE_H -#define FISH_PARSE_TREE_H +#ifndef FISH_PARSE_PRODUCTIONS_H +#define FISH_PARSE_PRODUCTIONS_H #include From 9dc91925e7bf4dc43936f7657a1a85cbd1ec4909 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 26 Jul 2013 23:59:12 -0700 Subject: [PATCH 019/108] Fewer templates --- parse_productions.cpp | 387 ++++++++++++++++++++++++++++++++++++------ parse_productions.h | 10 +- parse_tree.h | 3 +- 3 files changed, 346 insertions(+), 54 deletions(-) diff --git a/parse_productions.cpp b/parse_productions.cpp index 82bdd0b91..fba24c597 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -2,62 +2,351 @@ using namespace parse_productions; -#define PRODUCTIONS(sym) static const Production_t sym##_productions - -PRODUCTIONS(job_list) = - { - {}, - {symbol_job, symbol_job_list}, - {parse_token_type_end, symbol_job_list} - }; - - +#define PRODUCTIONS(sym) static const ProductionList_t sym##_productions +#define RESOLVE(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) +#define RESOLVE_ONLY(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } /* A job_list is a list of jobs, separated by semicolons or newlines */ - -DEC(job_list) { - symbol_job_list, - { - {}, - {symbol_job, symbol_job_list}, - {parse_token_type_end, symbol_job_list} - }, - resolve_job_list +PRODUCTIONS(job_list) = +{ + {}, + {symbol_job, symbol_job_list}, + {parse_token_type_end, symbol_job_list} }; -static int resolve_job_list(parse_token_type_t token_type, parse_keyword_t token_keyword) +RESOLVE(job_list) +{ + switch (token_type) { - switch (token_type) - { - case parse_token_type_string: - // 'end' is special - switch (token_keyword) - { - case parse_keyword_end: - case parse_keyword_else: - // End this job list - return 0; + case parse_token_type_string: + // 'end' is special + switch (token_keyword) + { + case parse_keyword_end: + case parse_keyword_else: + // End this job list + return 0; - default: - // Normal string - return 1; - } + default: + // Normal string + return 1; + } - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - return 1; + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + return 1; - case parse_token_type_end: - // Empty line - return 2; + case parse_token_type_end: + // Empty line + return 2; - case parse_token_type_terminate: - // no more commands, just transition to empty - return 0; - break; + case parse_token_type_terminate: + // no more commands, just transition to empty + return 0; + break; + + default: + return NO_PRODUCTION; + } +} + +/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ + +PRODUCTIONS(job) = +{ + {symbol_statement, symbol_job_continuation} +}; +RESOLVE_ONLY(job) + +PRODUCTIONS(job_continuation) = +{ + {}, + {parse_token_type_pipe, symbol_statement, symbol_job_continuation} +}; +RESOLVE(job_continuation) +{ + switch (token_type) + { + case parse_token_type_pipe: + // Pipe, continuation + return 1; + + default: + // Not a pipe, no job continuation + return 0; + } +} + +/* A statement is a normal command, or an if / while / and etc */ +PRODUCTIONS(statement) = +{ + {symbol_boolean_statement}, + {symbol_block_statement}, + {symbol_if_statement}, + {symbol_switch_statement}, + {symbol_decorated_statement} +}; +RESOLVE(statement) +{ + switch (token_type) + { + case parse_token_type_string: + switch (token_keyword) + { + case parse_keyword_and: + case parse_keyword_or: + case parse_keyword_not: + return 0; + + case parse_keyword_for: + case parse_keyword_while: + case parse_keyword_function: + case parse_keyword_begin: + return 1; + + case parse_keyword_if: + return 2; + + case parse_keyword_else: + //symbol_stack_pop(); + return NO_PRODUCTION; + + case parse_keyword_switch: + return 3; + + case parse_keyword_end: + PARSER_DIE(); //todo + return NO_PRODUCTION; + + // 'in' is only special within a for_header + case parse_keyword_in: + case parse_keyword_none: + case parse_keyword_command: + case parse_keyword_builtin: + case parse_keyword_case: + return 4; + } + break; + + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_terminate: + return NO_PRODUCTION; + //parse_error(L"statement", token); + + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(if_statement) = +{ + {symbol_if_clause, symbol_else_clause, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(if_statement) + +PRODUCTIONS(if_clause) = +{ + { PRODUCE_KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } +}; +RESOLVE_ONLY(if_clause) + +PRODUCTIONS(else_clause) = +{ + { }, + { PRODUCE_KEYWORD(parse_keyword_else), symbol_else_continuation } +}; +RESOLVE(else_clause) +{ + switch (token_keyword) + { + case parse_keyword_else: + return 1; + default: + return 0; + } +} + +PRODUCTIONS(else_continuation) = +{ + {symbol_if_clause, symbol_else_clause}, + {parse_token_type_end, symbol_job_list} +}; +RESOLVE(else_continuation) +{ + switch (token_keyword) + { + case parse_keyword_if: + return 0; + default: + return 1; + } +} + +PRODUCTIONS(switch_statement) = +{ + { PRODUCE_KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, PRODUCE_KEYWORD(parse_keyword_end)} +}; +RESOLVE_ONLY(switch_statement) + +PRODUCTIONS(case_item_list) = +{ + {}, + {symbol_case_item, symbol_case_item_list}, + {parse_token_type_end, symbol_case_item_list} +}; +RESOLVE(case_item_list) +{ + if (token_keyword == parse_keyword_case) return 1; + else if (token_type == parse_token_type_end) return 2; //empty line + else return 0; +} + +PRODUCTIONS(case_item) = +{ + {PRODUCE_KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} +}; +RESOLVE_ONLY(case_item) + +PRODUCTIONS(argument_list_nonempty) = +{ + {parse_token_type_string, symbol_argument_list} +}; +RESOLVE_ONLY(argument_list_nonempty) + +PRODUCTIONS(argument_list) = +{ + {}, + {symbol_argument_list_nonempty} +}; +RESOLVE(argument_list) +{ + switch (token_type) + { + case parse_token_type_string: return 1; + default: return 0; + } +} + +PRODUCTIONS(block_statement) = +{ + {symbol_block_header, parse_token_type_end, symbol_job_list, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(block_statement) + +PRODUCTIONS(block_header) = +{ + {symbol_for_header}, + {symbol_while_header}, + {symbol_function_header}, + {symbol_begin_header} +}; +RESOLVE(block_header) +{ + switch (token_keyword) + { + case parse_keyword_else: + return NO_PRODUCTION; + case parse_keyword_for: + return 0; + case parse_keyword_while: + return 1; + case parse_keyword_function: + return 2; + case parse_keyword_begin: + return 3; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(for_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_for), parse_token_type_string, PRODUCE_KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} +}; +RESOLVE_ONLY(for_header) + +PRODUCTIONS(while_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_while), symbol_statement} +}; +RESOLVE_ONLY(while_header) + +PRODUCTIONS(begin_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_begin)} +}; +RESOLVE_ONLY(begin_header) + +PRODUCTIONS(function_header) = +{ + {PRODUCE_KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} +}; +RESOLVE_ONLY(function_header) + +/* A boolean statement is AND or OR or NOT */ +PRODUCTIONS(boolean_statement) = +{ + {PRODUCE_KEYWORD(parse_keyword_and), symbol_statement}, + {PRODUCE_KEYWORD(parse_keyword_or), symbol_statement}, + {PRODUCE_KEYWORD(parse_keyword_not), symbol_statement} +}; +RESOLVE(boolean_statement) +{ + switch (token_keyword) + { + case parse_keyword_and: + return 0; + case parse_keyword_or: + return 1; + case parse_keyword_not: + return 2; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(decorated_statement) = +{ + {PRODUCE_KEYWORD(parse_keyword_command), symbol_plain_statement}, + {PRODUCE_KEYWORD(parse_keyword_builtin), symbol_plain_statement}, + {symbol_plain_statement} +}; +RESOLVE(decorated_statement) +{ + switch (token_keyword) + { + case parse_keyword_command: + return 0; + case parse_keyword_builtin: + return 1; + default: + return 2; + } +} + +PRODUCTIONS(plain_statement) = +{ + {parse_token_type_string, symbol_arguments_or_redirections_list, symbol_optional_background} +}; +RESOLVE_ONLY(plain_statement) + +PRODUCTIONS(arguments_or_redirections_list) = +{ + {}, + {symbol_argument_or_redirection, symbol_arguments_or_redirections_list} +}; +RESOLVE(arguments_or_redirections_list) +{ + switch (token_type) + { + case parse_token_type_string: + case parse_token_type_redirection: + return 1; + default: + return 0; + } +} - default: - return NO_PRODUCTION; - } - } \ No newline at end of file diff --git a/parse_productions.h b/parse_productions.h index 5ded6af00..d3743014b 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -55,10 +55,12 @@ namespace parse_productions -struct Production_t -{ - enum parse_token_type_t symbols[MAX_SYMBOLS_PER_PRODUCTION]; -}; +/* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ +typedef unsigned char Production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +typedef Production_t ProductionList_t[MAX_PRODUCTIONS]; + +#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) struct Symbol_t { diff --git a/parse_tree.h b/parse_tree.h index dfe9f24e9..ef7678f5c 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -89,7 +89,8 @@ enum parse_token_type_t parse_token_type_background, parse_token_type_end, parse_token_type_terminate, - + + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; From b133137a1f0341f9e21b622448bf5d5056c53046 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 28 Jul 2013 15:19:38 -0700 Subject: [PATCH 020/108] Removed templates (yay) --- parse_productions.cpp | 147 +++++++++- parse_productions.h | 661 +++--------------------------------------- parse_tree.cpp | 345 +++++----------------- parse_tree.h | 3 + 4 files changed, 252 insertions(+), 904 deletions(-) diff --git a/parse_productions.cpp b/parse_productions.cpp index fba24c597..e63f56023 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -1,10 +1,34 @@ #include "parse_productions.h" using namespace parse_productions; +#define NO_PRODUCTION ((production_option_idx_t)(-1)) -#define PRODUCTIONS(sym) static const ProductionList_t sym##_productions -#define RESOLVE(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) -#define RESOLVE_ONLY(sym) static int resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } +static bool production_is_empty(const production_t production) +{ + return production[0] == token_type_invalid; +} + +// Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it +static bool production_is_valid(const production_options_t production_list, production_option_idx_t which) +{ + if (which < 0 || which >= MAX_PRODUCTIONS) + return false; + + bool nonempty_found = false; + for (int i=which; i < MAX_PRODUCTIONS; i++) + { + if (! production_is_empty(production_list[i])) + { + nonempty_found = true; + break; + } + } + return nonempty_found; +} + +#define PRODUCTIONS(sym) static const production_options_t productions_##sym +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } /* A job_list is a list of jobs, separated by semicolons or newlines */ PRODUCTIONS(job_list) = @@ -44,7 +68,6 @@ RESOLVE(job_list) case parse_token_type_terminate: // no more commands, just transition to empty return 0; - break; default: return NO_PRODUCTION; @@ -350,3 +373,119 @@ RESOLVE(arguments_or_redirections_list) } } +PRODUCTIONS(argument_or_redirection) = +{ + {parse_token_type_string}, + {parse_token_type_redirection} +}; +RESOLVE(argument_or_redirection) +{ + switch (token_type) + { + case parse_token_type_string: + return 0; + case parse_token_type_redirection: + return 1; + default: + return NO_PRODUCTION; + } +} + +PRODUCTIONS(optional_background) = +{ + {}, + { parse_token_type_background } +}; + +RESOLVE(optional_background) +{ + switch (token_type) + { + case parse_token_type_background: + return 1; + default: + return 0; + } +} + +#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag) +{ + bool log_it = false; + if (log_it) + { + fprintf(stderr, "Resolving production for %ls with input type %ls <%ls>\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str()); + } + + /* Fetch the list of productions and the function to resolve them */ + const production_options_t *production_list = NULL; + production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL; + switch (node_type) + { + TEST(job_list) + TEST(job) + TEST(statement) + TEST(job_continuation) + TEST(boolean_statement) + TEST(block_statement) + TEST(if_statement) + TEST(if_clause) + TEST(else_clause) + TEST(else_continuation) + TEST(switch_statement) + TEST(decorated_statement) + TEST(case_item_list) + TEST(case_item) + TEST(argument_list_nonempty) + TEST(argument_list) + TEST(block_header) + TEST(for_header) + TEST(while_header) + TEST(begin_header) + TEST(function_header) + TEST(plain_statement) + TEST(arguments_or_redirections_list) + TEST(argument_or_redirection) + TEST(optional_background) + + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_end: + case parse_token_type_terminate: + fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); + PARSER_DIE(); + break; + + case token_type_invalid: + fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); + PARSER_DIE(); + break; + + } + PARSE_ASSERT(production_list != NULL); + PARSE_ASSERT(resolver != NULL); + + const production_t *result = NULL; + production_option_idx_t which = resolver(input_type, input_keyword, out_tag); + + if (log_it) + { + fprintf(stderr, "\tresolved to %u\n", (unsigned)which); + } + + + if (which == NO_PRODUCTION) + { + fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + result = NULL; + } + else + { + PARSE_ASSERT(production_is_valid(*production_list, which)); + result = &((*production_list)[which]); + } + *out_which_production = which; + return result; +} diff --git a/parse_productions.h b/parse_productions.h index d3743014b..d7b7c19d4 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -8,45 +8,6 @@ #include "parse_tree.h" -/* Terrifying template black magic. */ - -/* - -- Get info for symbol -- Resolve production from info -- Get productions for children -- Get symbols for productions - -Production may be: - -1. Single value -2. Sequence of values (possibly empty) -3. Options of Single / Sequence - -Info to specify: - -1. Number of different productions -2. Resolver function -3. Symbols for associated productions - -Choice: should info be a class or a data? - -data: - -struct Symbol_t -{ - enum parse_token_type_t token_type; - int (*resolver)(parse_token_type_t tok, parse_keyword_t key); //may be trivial - production productions[5]; -} - -struct Production_t -{ - enum parse_token_type_t symbols[5]; -} - -*/ - namespace parse_productions { @@ -54,596 +15,54 @@ namespace parse_productions #define MAX_SYMBOLS_PER_PRODUCTION 5 +typedef uint32_t production_tag_t; /* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ -typedef unsigned char Production_t[MAX_SYMBOLS_PER_PRODUCTION]; +typedef uint8_t production_element_t; -typedef Production_t ProductionList_t[MAX_PRODUCTIONS]; +/* An index into a production option list */ +typedef uint8_t production_option_idx_t; + +inline parse_token_type_t production_element_type(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + return parse_token_type_string; + } + else + { + return static_cast(elem); + } +} + +inline parse_keyword_t production_element_keyword(production_element_t elem) +{ + if (elem > LAST_TOKEN_OR_SYMBOL) + { + // First keyword is LAST_TOKEN_OR_SYMBOL + 1 + return static_cast(elem - LAST_TOKEN_OR_SYMBOL - 1); + } + else + { + return parse_keyword_none; + } +} + + +inline bool production_element_is_valid(production_element_t elem) +{ + return elem != token_type_invalid; +} + +typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +typedef production_t production_options_t[MAX_PRODUCTIONS]; #define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) -struct Symbol_t -{ - enum parse_token_type_t token_type; - int (*resolver)(parse_token_type_t tok, parse_keyword_t key); - Production_t productions[MAX_PRODUCTIONS]; -}; - - +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag); } -namespace parse_symbols -{ - -#define SYMBOL(x) static inline parse_token_type_t get_token() { return x; } - -#define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; } - -#define NO_PRODUCTION (-1) - -struct Symbol -{ - typedef int magic_symbol_type_t; -}; - -template -struct Token : public Symbol -{ - SYMBOL(WHICH); -}; - -/* Placeholder */ -typedef Token none; - -typedef Token EMPTY; - -template -struct Seq -{ - typedef T0 t0; - typedef T1 t1; - typedef T2 t2; - typedef T3 t3; - typedef T4 t4; - typedef T5 t5; - - typedef int magic_seq_type_t; -}; - -template -struct OR -{ - typedef P0 p0; - typedef P1 p1; - typedef P2 p2; - typedef P3 p3; - typedef P4 p4; - typedef P5 p5; - - typedef int magic_or_type_t; -}; - -template -struct Keyword : public Symbol -{ - static inline parse_keyword_t get_token() - { - return WHICH; - } -}; - -struct job; -struct statement; -struct job_continuation; -struct boolean_statement; -struct block_statement; -struct if_statement; -struct if_clause; -struct else_clause; -struct else_continuation; -struct switch_statement; -struct decorated_statement; -struct switch_statement; -struct case_item_list; -struct case_item; -struct argument_list_nonempty; -struct argument_list; -struct block_statement; -struct block_header; -struct for_header; -struct while_header; -struct begin_header; -struct function_header; -struct boolean_statement; -struct decorated_statement; -struct plain_statement; -struct arguments_or_redirections_list; -struct argument_or_redirection; -struct redirection; -struct statement_terminator; -struct optional_background; - -/* A job_list is a list of jobs, separated by semicolons or newlines */ -struct job_list : public Symbol -{ - typedef OR< - EMPTY, - Seq, - Seq, job_list> - > productions; - - SYMBOL(symbol_job_list) - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_string: - // 'end' is special - switch (token_keyword) - { - case parse_keyword_end: - case parse_keyword_else: - // End this job list - return 0; - - default: - // Normal string - return 1; - } - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - return 1; - - case parse_token_type_end: - // Empty line - return 2; - - case parse_token_type_terminate: - // no more commands, just transition to empty - return 0; - break; - - default: - return NO_PRODUCTION; - } - } - -}; - -/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */ -struct job : public Symbol -{ - typedef Seq sole_production; - SYMBOL(symbol_job); -}; - -struct job_continuation : public Symbol -{ - typedef OR< - EMPTY, - Seq, statement, job_continuation> - > productions; - - SYMBOL(symbol_job_continuation); - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_pipe: - // Pipe, continuation - return 1; - - default: - // Not a pipe, no job continuation - return 0; - } - - } -}; - -/* A statement is a normal command, or an if / while / and etc */ -struct statement : public Symbol -{ - typedef OR< - boolean_statement, - block_statement, - if_statement, - switch_statement, - decorated_statement - > productions; - - SYMBOL(symbol_statement); - - static int production(parse_token_type_t token_type, parse_keyword_t token_keyword) - { - switch (token_type) - { - case parse_token_type_string: - switch (token_keyword) - { - case parse_keyword_and: - case parse_keyword_or: - case parse_keyword_not: - return 0; - - case parse_keyword_for: - case parse_keyword_while: - case parse_keyword_function: - case parse_keyword_begin: - return 1; - - case parse_keyword_if: - return 2; - - case parse_keyword_else: - //symbol_stack_pop(); - return NO_PRODUCTION; - - case parse_keyword_switch: - return 3; - - case parse_keyword_end: - PARSER_DIE(); //todo - return NO_PRODUCTION; - - // 'in' is only special within a for_header - case parse_keyword_in: - case parse_keyword_none: - case parse_keyword_command: - case parse_keyword_builtin: - case parse_keyword_case: - return 4; - } - break; - - case parse_token_type_pipe: - case parse_token_type_redirection: - case parse_token_type_background: - case parse_token_type_terminate: - return NO_PRODUCTION; - //parse_error(L"statement", token); - - default: - return NO_PRODUCTION; - } - } - -}; - -struct if_statement : public Symbol -{ - typedef Seq, arguments_or_redirections_list> sole_production; - SYMBOL(symbol_if_statement); -}; - -struct if_clause : public Symbol -{ - typedef Seq, job, statement_terminator, job_list> sole_production; - SYMBOL(symbol_if_clause); -}; - -struct else_clause : public Symbol -{ - typedef OR< - EMPTY, - Seq, else_continuation> - > productions; - - SYMBOL(symbol_else_clause); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_else: - return 1; - default: - return 0; - } - } -}; - -struct else_continuation : public Symbol -{ - typedef OR< - Seq, - Seq - > productions; - - SYMBOL(symbol_else_continuation); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_if: - return 0; - default: - return 1; - } - } -}; - -struct switch_statement : public Symbol -{ - typedef Seq, - Token, - statement_terminator, - case_item_list, - Keyword - > sole_production; - - SYMBOL(symbol_switch_statement); -}; - -struct case_item_list : public Symbol -{ - typedef OR - < - EMPTY, - Seq, - Seq, case_item_list> - > productions; - - SYMBOL(symbol_case_item_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_case: return 1; - - default: - if (tok == parse_token_type_end) - { - /* empty line */ - return 2; - } - else - { - return 0; - } - - } - } -}; - -struct case_item : public Symbol -{ - typedef Seq, argument_list, statement_terminator, job_list> sole_production; - - SYMBOL(symbol_case_item); -}; - -struct argument_list_nonempty : public Symbol -{ - typedef Seq, argument_list> sole_production; - SYMBOL(symbol_argument_list_nonempty); -}; - -struct argument_list : public Symbol -{ - typedef OR productions; - - SYMBOL(symbol_argument_list); - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - return 1; - default: - return 0; - } - } -}; - -struct block_statement : public Symbol -{ - typedef Seq, arguments_or_redirections_list> sole_production; - - SYMBOL(symbol_block_statement); -}; - -struct block_header : public Symbol -{ - typedef OR productions; - - SYMBOL(symbol_block_header); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - // todo - case parse_keyword_else: - return NO_PRODUCTION; - case parse_keyword_for: - return 0; - case parse_keyword_while: - return 1; - case parse_keyword_function: - return 2; - case parse_keyword_begin: - return 3; - default: - return NO_PRODUCTION; - } - } -}; - -struct for_header : public Symbol -{ - typedef Seq, Token, Keyword, arguments_or_redirections_list> sole_production; - - SYMBOL(symbol_for_header); -}; - -struct while_header : public Symbol -{ - typedef Seq, statement> sole_production; - - SYMBOL(symbol_while_header); -}; - -struct begin_header : public Symbol -{ - typedef Keyword sole_production; - SYMBOL(symbol_begin_header); -}; - -struct function_header : public Symbol -{ - typedef Seq< Keyword, Token, argument_list> sole_production; - SYMBOL(symbol_function_header); -}; - -/* A boolean statement is AND or OR or NOT */ -struct boolean_statement : public Symbol -{ - typedef OR< - Seq, statement>, - Seq, statement>, - Seq, statement> - > productions; - - SYMBOL(symbol_boolean_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_and: - return 0; - case parse_keyword_or: - return 1; - case parse_keyword_not: - return 2; - default: - return NO_PRODUCTION; - } - } -}; - -/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */ -struct decorated_statement : public Symbol -{ - - typedef OR< - Seq, plain_statement>, - Seq, plain_statement>, - plain_statement - > productions; - - SYMBOL(symbol_decorated_statement); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (key) - { - case parse_keyword_command: - return 0; - case parse_keyword_builtin: - return 1; - default: - return 2; - } - } -}; - -struct plain_statement : public Symbol -{ - - typedef Seq, arguments_or_redirections_list, optional_background> sole_production; - - SYMBOL(symbol_plain_statement); - -}; - -struct arguments_or_redirections_list : public Symbol -{ - typedef OR< - EMPTY, - Seq > - productions; - - SYMBOL(symbol_arguments_or_redirections_list); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - case parse_token_type_redirection: - return 1; - default: - return 0; - } - } -}; - -struct argument_or_redirection : public Symbol -{ - typedef OR< - Token, - redirection - > productions; - - - SYMBOL(symbol_argument_or_redirection); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_string: - return 0; - case parse_token_type_redirection: - return 1; - default: - return NO_PRODUCTION; - } - } -}; - -struct redirection : public Symbol -{ - typedef Token production; - SYMBOL(parse_token_type_redirection); -}; - -struct statement_terminator : public Symbol -{ - typedef Token production; - SYMBOL(parse_token_type_end); -}; - -struct optional_background : public Symbol -{ - typedef OR< - EMPTY, - Token - > productions; - - SYMBOL(symbol_optional_background); - - static int production(parse_token_type_t tok, parse_keyword_t key) - { - switch (tok) - { - case parse_token_type_background: - return 1; - default: - return 0; - } - } -}; - -} #endif diff --git a/parse_tree.cpp b/parse_tree.cpp index 4df277d48..a25c549f6 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -2,7 +2,7 @@ #include "tokenizer.h" #include -using namespace parse_symbols; +using namespace parse_productions; wcstring parse_error_t::describe(const wcstring &src) const { @@ -260,6 +260,7 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & } } +__attribute__((unused)) static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) @@ -277,11 +278,11 @@ struct parse_stack_element_t enum parse_keyword_t keyword; node_offset_t node_idx; - parse_stack_element_t(parse_token_type_t t) : type(t), keyword(parse_keyword_none), node_idx(-1) + explicit parse_stack_element_t(parse_token_type_t t, node_offset_t idx) : type(t), keyword(parse_keyword_none), node_idx(idx) { } - - parse_stack_element_t(parse_keyword_t k) : type(parse_token_type_string), keyword(k), node_idx(-1) + + explicit parse_stack_element_t(production_element_t e, node_offset_t idx) : type(production_element_type(e)), keyword(production_element_keyword(e)), node_idx(idx) { } @@ -311,26 +312,13 @@ class parse_ll_t parse_ll_t() : fatal_errored(false) { // initial node - parse_stack_element_t elem = symbol_job_list; - elem.node_idx = 0; - symbol_stack.push_back(elem); // goal token + symbol_stack.push_back(parse_stack_element_t(symbol_job_list, 0)); // goal token nodes.push_back(parse_node_t(symbol_job_list)); } bool top_node_match_token(parse_token_t token); - // implementation of certain parser constructions void accept_token(parse_token_t token, const wcstring &src); - void accept_token_job_list(parse_token_t token); - void accept_token_job(parse_token_t token); - void accept_token_job_continuation(parse_token_t token); - void accept_token_else_clause(parse_token_t token); - void accept_token_else_continuation(parse_token_t token); - void accept_token_plain_statement(parse_token_t token); - void accept_token_argument_list(parse_token_t token); - void accept_token_arguments_or_redirections_list(parse_token_t token); - void accept_token_argument_or_redirection(parse_token_t token); - bool accept_token_string(parse_token_t token); void token_unhandled(parse_token_t token, const char *function); @@ -373,111 +361,67 @@ class parse_ll_t symbol_stack.pop_back(); } - - // Pop from the top of the symbol stack, then push, updating node counts. Note that these are pushed in reverse order, so the first argument will be on the top of the stack. - inline void symbol_stack_pop_push_int(parse_stack_element_t tok1 = token_type_invalid, parse_stack_element_t tok2 = token_type_invalid, parse_stack_element_t tok3 = token_type_invalid, parse_stack_element_t tok4 = token_type_invalid, parse_stack_element_t tok5 = token_type_invalid) + // Pop from the top of the symbol stack, then push the given production, updating node counts. Note that production_t has type "pointer to array" so some care is required. + inline void symbol_stack_pop_push_production(const production_t *production) { - - // Logging? - if (0) + bool logit = false; + if (logit) { - fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size()); - if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str()); - if (tok4.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok4.describe().c_str()); - if (tok3.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok3.describe().c_str()); - if (tok2.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok2.describe().c_str()); - if (tok1.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok1.describe().c_str()); + size_t count = 0; + fprintf(stderr, "Applying production:\n"); + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (production_element_is_valid(elem)) + { + parse_token_type_t type = production_element_type(elem); + parse_keyword_t keyword = production_element_keyword(elem); + fprintf(stderr, "\t%ls <%ls>\n", token_type_description(type).c_str(), keyword_description(keyword).c_str()); + count++; + } + } + if (! count) fprintf(stderr, "\t\n"); } - // Get the node for the top symbol and tell it about its children - size_t node_idx = symbol_stack.back().node_idx; - parse_node_t &node = nodes.at(node_idx); - + + // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) + const size_t child_start = nodes.size(); + size_t child_count = 0; + for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) + { + production_element_t elem = (*production)[i]; + if (production_element_is_valid(elem)) + { + // Generate the parse node. Note that this push_back may invalidate node. + parse_token_type_t child_type = production_element_type(elem); + nodes.push_back(parse_node_t(child_type)); + child_count++; + } + } + + // Update the parent + const size_t parent_node_idx = symbol_stack.back().node_idx; + parse_node_t &parent_node = nodes.at(parent_node_idx); + // Should have no children yet - PARSE_ASSERT(node.child_count == 0); + PARSE_ASSERT(parent_node.child_count == 0); - // Tell the node where its children start - node.child_start = nodes.size(); - - // Add nodes for the children - // Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) - if (tok1.type != token_type_invalid) add_child_to_node(node_idx, &tok1); - if (tok2.type != token_type_invalid) add_child_to_node(node_idx, &tok2); - if (tok3.type != token_type_invalid) add_child_to_node(node_idx, &tok3); - if (tok4.type != token_type_invalid) add_child_to_node(node_idx, &tok4); - if (tok5.type != token_type_invalid) add_child_to_node(node_idx, &tok5); - - // The above set the node_idx. Now replace the top of the stack. + // Tell the node about its children + parent_node.child_start = child_start; + parent_node.child_count = child_count; + + // Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order. symbol_stack.pop_back(); - if (tok5.type != token_type_invalid) symbol_stack.push_back(tok5); - if (tok4.type != token_type_invalid) symbol_stack.push_back(tok4); - if (tok3.type != token_type_invalid) symbol_stack.push_back(tok3); - if (tok2.type != token_type_invalid) symbol_stack.push_back(tok2); - if (tok1.type != token_type_invalid) symbol_stack.push_back(tok1); - } - - template - inline void symbol_stack_pop_push2(typename T::magic_seq_type_t x = 0) - { - symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token()); - } - - template - inline void symbol_stack_pop_push2(typename T::magic_symbol_type_t x = 0) - { - symbol_stack_pop_push_int(T::get_token()); - } - - // Singular. Sole productions are always of type Seq. - template - inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_seq_type_t magic=0) - { - typedef typename T::sole_production seq; - symbol_stack_pop_push_int(seq::t0::get_token(), seq::t1::get_token(), seq::t2::get_token(), seq::t3::get_token(), seq::t4::get_token()); - } - - // Plural productions, of type Or. - template - inline void symbol_stack_produce(parse_token_t tok, typename T::productions::magic_or_type_t magic=0) - { - typedef typename T::productions ors; - int which = T::production(tok.type, tok.keyword); - switch (which) + symbol_stack.reserve(symbol_stack.size() + child_count); + size_t idx = child_count; + while (idx--) { - case 0: - symbol_stack_pop_push2(); - break; - case 1: - symbol_stack_pop_push2(); - break; - case 2: - symbol_stack_pop_push2(); - break; - case 3: - symbol_stack_pop_push2(); - break; - case 4: - symbol_stack_pop_push2(); - break; - - case NO_PRODUCTION: - parse_error(tok, L"Failed to produce with stack top '%ls' for token '%ls'\n", symbol_stack.back().describe().c_str(), tok.describe().c_str()); - break; - - default: - parse_error(tok, L"Unexpected production %d for token %ls\n", which, tok.describe().c_str()); - break; + production_element_t elem = (*production)[idx]; + PARSE_ASSERT(production_element_is_valid(elem)); + symbol_stack.push_back(parse_stack_element_t(elem, child_start + idx)); } } - // Non-sequence basic productions - template - inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_symbol_type_t magic=0) - { - symbol_stack_pop_push_int(T::sole_production::get_token()); - } - - }; void parse_ll_t::dump_stack(void) const @@ -551,57 +495,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::accept_token_else_clause(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_else_clause); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_else_continuation(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_else_continuation); - symbol_stack_produce(token); -} - - -void parse_ll_t::accept_token_argument_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_argument_list); - symbol_stack_produce(token); -} - - -void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_arguments_or_redirections_list); - symbol_stack_produce(token); -} - -void parse_ll_t::accept_token_argument_or_redirection(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == symbol_argument_or_redirection); - symbol_stack_produce(token); -} - -bool parse_ll_t::accept_token_string(parse_token_t token) -{ - PARSE_ASSERT(stack_top_type() == parse_token_type_string); - bool result = false; - switch (token.type) - { - case parse_token_type_string: - // Got our string - symbol_stack_pop(); - result = true; - break; - - default: - token_unhandled(token, __FUNCTION__); - break; - } - return result; -} - bool parse_ll_t::top_node_match_token(parse_token_t token) { PARSE_ASSERT(! symbol_stack.empty()); @@ -654,122 +547,16 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) consumed = true; break; } - - switch (stack_top_type()) - { - /* Symbols */ - case symbol_job_list: - symbol_stack_produce(token); - break; - - case symbol_job: - symbol_stack_produce(token); - break; - - case symbol_job_continuation: - symbol_stack_produce(token); - break; - - case symbol_statement: - symbol_stack_produce(token); - break; - - case symbol_if_statement: - symbol_stack_produce(token); - break; - - case symbol_if_clause: - symbol_stack_produce(token); - break; - - case symbol_else_clause: - accept_token_else_clause(token); - break; - - case symbol_else_continuation: - accept_token_else_continuation(token); - break; - - case symbol_block_statement: - symbol_stack_produce(token); - break; - - case symbol_block_header: - symbol_stack_produce(token); - break; - - case symbol_for_header: - symbol_stack_produce(token); - break; - - case symbol_while_header: - symbol_stack_produce(token); - break; - - case symbol_begin_header: - symbol_stack_produce(token); - break; - - case symbol_function_header: - symbol_stack_produce(token); - break; - - case symbol_switch_statement: - symbol_stack_produce(token); - break; - - case symbol_case_item_list: - symbol_stack_produce(token); - break; - - case symbol_case_item: - symbol_stack_produce(token); - break; - - case symbol_boolean_statement: - top_node_set_tag(token.keyword); - symbol_stack_produce(token); - break; - - case symbol_decorated_statement: - top_node_set_tag(token.keyword); - symbol_stack_produce(token); - break; - - case symbol_plain_statement: - symbol_stack_produce(token); - break; - - case symbol_argument_list_nonempty: - symbol_stack_produce(token); - break; - - case symbol_argument_list: - accept_token_argument_list(token); - break; - - case symbol_arguments_or_redirections_list: - accept_token_arguments_or_redirections_list(token); - break; - - case symbol_argument_or_redirection: - accept_token_argument_or_redirection(token); - break; - - case symbol_optional_background: - symbol_stack_produce(token); - break; - - /* Tokens */ - case parse_token_type_string: - consumed = accept_token_string(token); - break; - - default: - fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str()); - exit_without_destructors(EXIT_FAILURE); - break; - } + + // Get the production for the top of the stack + parse_stack_element_t &stack_elem = symbol_stack.back(); + parse_node_t &node = nodes.at(stack_elem.node_idx); + const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag); + PARSE_ASSERT(production != NULL); + + // Manipulate the symbol stack. + // Note that stack_elem is invalidated by popping the stack. + symbol_stack_pop_push_production(production); } } diff --git a/parse_tree.h b/parse_tree.h index ef7678f5c..c53864258 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -137,6 +137,9 @@ public: /* Type-dependent data */ uint32_t tag; + + /* Which production was used */ + uint8_t production_idx; /* Description */ wcstring describe(void) const; From 680ac41bb1ee83e708e7309f2141a76203d90c7c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 28 Jul 2013 15:44:09 -0700 Subject: [PATCH 021/108] Clean up some error handling --- parse_productions.cpp | 10 ++++++++-- parse_productions.h | 19 +++++++++++-------- parse_tree.cpp | 38 +++++++++++++++----------------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/parse_productions.cpp b/parse_productions.cpp index e63f56023..61f7636de 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -30,6 +30,9 @@ static bool production_is_valid(const production_options_t production_list, prod #define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) #define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } +#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) + + /* A job_list is a list of jobs, separated by semicolons or newlines */ PRODUCTIONS(job_list) = { @@ -409,7 +412,7 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag, wcstring *out_error_text) { bool log_it = false; if (log_it) @@ -478,7 +481,10 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n if (which == NO_PRODUCTION) { - fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + if (log_it) + { + fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + } result = NULL; } else diff --git a/parse_productions.h b/parse_productions.h index d7b7c19d4..a0d43f629 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -23,6 +23,13 @@ typedef uint8_t production_element_t; /* An index into a production option list */ typedef uint8_t production_option_idx_t; +/* A production is an array of production elements */ +typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; + +/* A production options is an array of (possible) productions */ +typedef production_t production_options_t[MAX_PRODUCTIONS]; + +/* Resolve the type from a production element */ inline parse_token_type_t production_element_type(production_element_t elem) { if (elem > LAST_TOKEN_OR_SYMBOL) @@ -35,6 +42,7 @@ inline parse_token_type_t production_element_type(production_element_t elem) } } +/* Resolve the keyword from a production element */ inline parse_keyword_t production_element_keyword(production_element_t elem) { if (elem > LAST_TOKEN_OR_SYMBOL) @@ -48,19 +56,14 @@ inline parse_keyword_t production_element_keyword(production_element_t elem) } } - +/* Check if an element is valid */ inline bool production_element_is_valid(production_element_t elem) { return elem != token_type_invalid; } -typedef production_element_t const production_t[MAX_SYMBOLS_PER_PRODUCTION]; - -typedef production_t production_options_t[MAX_PRODUCTIONS]; - -#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) - -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag); +/* Fetch a production */ +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index a25c549f6..7a809167e 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -14,7 +14,7 @@ wcstring parse_error_t::describe(const wcstring &src) const // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline size_t newline = src.find_last_of(L'\n', source_start); - fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); + //fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); if (newline != wcstring::npos) { line_start = newline;// + 1; @@ -26,7 +26,7 @@ wcstring parse_error_t::describe(const wcstring &src) const line_end = src.size(); } assert(line_end >= line_start); - fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); + //fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); assert(source_start >= line_start); // Append the line of text @@ -320,8 +320,6 @@ class parse_ll_t void accept_token(parse_token_t token, const wcstring &src); - void token_unhandled(parse_token_t token, const char *function); - void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); @@ -455,18 +453,6 @@ void parse_ll_t::dump_stack(void) const } } -void parse_ll_t::token_unhandled(parse_token_t token, const char *function) -{ - fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function); - this->dump_stack(); - parse_error_t err; - err.text = format_string(L"Unhandled token with type %ls in function %s", token_type_description(token.type).c_str(), function); - err.source_start = token.source_start; - err.source_length = token.source_length; - this->errors.push_back(err); - this->fatal_errored = true; -} - void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { this->dump_stack(); @@ -551,12 +537,18 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag); - PARSE_ASSERT(production != NULL); - - // Manipulate the symbol stack. - // Note that stack_elem is invalidated by popping the stack. - symbol_stack_pop_push_production(production); + const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */); + if (production == NULL) + { + this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + // parse_error sets fatal_errored, which ends the loop + } + else + { + // Manipulate the symbol stack. + // Note that stack_elem is invalidated by popping the stack. + symbol_stack_pop_push_production(production); + } } } @@ -632,7 +624,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ break; } - wcstring result = L"";//dump_tree(this->parser->nodes, str); + wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); From 876b9d49b4ae89c02275ad7c3c2e26f27e38f298 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 8 Aug 2013 14:44:52 -0700 Subject: [PATCH 022/108] Added fish_tests target to fish.xcodeproj --- fish.xcodeproj/project.pbxproj | 215 +++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 708e65e30..c397f3216 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -73,6 +73,48 @@ D07D266E15E33B86009E43F6 /* tools in Copy Files */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; D07D267215E34171009E43F6 /* config.fish in Copy Files */ = {isa = PBXBuildFile; fileRef = D0CBD580159EE48F0024809C /* config.fish */; }; D0879AC816BF9AAB00E98E56 /* fish_term_icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */; }; + D08A329417B4458D00F3A533 /* fish_tests.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D08A329317B4458D00F3A533 /* fish_tests.cpp */; }; + D08A329517B445C200F3A533 /* function.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854413B3ACEE0099B651 /* function.cpp */; }; + D08A329617B445FD00F3A533 /* builtin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853513B3ACEE0099B651 /* builtin.cpp */; }; + D08A329717B4463B00F3A533 /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; + D08A329817B4463B00F3A533 /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; + D08A329917B4463B00F3A533 /* exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853C13B3ACEE0099B651 /* exec.cpp */; }; + D08A329A17B4463B00F3A533 /* expand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853D13B3ACEE0099B651 /* expand.cpp */; }; + D08A329B17B4463B00F3A533 /* highlight.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854713B3ACEE0099B651 /* highlight.cpp */; }; + D08A329C17B4463B00F3A533 /* history.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854813B3ACEE0099B651 /* history.cpp */; }; + D08A329D17B4463B00F3A533 /* kill.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854F13B3ACEE0099B651 /* kill.cpp */; }; + D08A329E17B4463B00F3A533 /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855413B3ACEE0099B651 /* parser.cpp */; }; + D08A329F17B4463B00F3A533 /* proc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855713B3ACEE0099B651 /* proc.cpp */; }; + D08A32A017B4463B00F3A533 /* reader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855813B3ACEE0099B651 /* reader.cpp */; }; + D08A32A117B4463B00F3A533 /* sanity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855913B3ACEE0099B651 /* sanity.cpp */; }; + D08A32A217B4463B00F3A533 /* tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855D13B3ACEE0099B651 /* tokenizer.cpp */; }; + D08A32A317B4463B00F3A533 /* wgetopt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855F13B3ACEE0099B651 /* wgetopt.cpp */; }; + D08A32A417B4463B00F3A533 /* wildcard.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0856013B3ACEE0099B651 /* wildcard.cpp */; }; + D08A32A517B4463B00F3A533 /* wutil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0856113B3ACEE0099B651 /* wutil.cpp */; }; + D08A32A617B4464300F3A533 /* input.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854A13B3ACEE0099B651 /* input.cpp */; }; + D08A32A717B446A300F3A533 /* autoload.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */; }; + D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */; }; + D08A32A917B446A300F3A533 /* color.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0B6B0FE14E88BA400AD6C10 /* color.cpp */; }; + D08A32AA17B446A300F3A533 /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853613B3ACEE0099B651 /* common.cpp */; }; + D08A32AB17B446A300F3A533 /* env_universal_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853813B3ACEE0099B651 /* env_universal_common.cpp */; }; + D08A32AC17B446A300F3A533 /* env_universal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853913B3ACEE0099B651 /* env_universal.cpp */; }; + D08A32AD17B446A300F3A533 /* event.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853B13B3ACEE0099B651 /* event.cpp */; }; + D08A32AE17B446A300F3A533 /* input_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854913B3ACEE0099B651 /* input_common.cpp */; }; + D08A32AF17B446A300F3A533 /* intern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854B13B3ACEE0099B651 /* intern.cpp */; }; + D08A32B017B446A300F3A533 /* io.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854C13B3ACEE0099B651 /* io.cpp */; }; + D08A32B117B446A300F3A533 /* iothread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0854D13B3ACEE0099B651 /* iothread.cpp */; }; + D08A32B217B446A300F3A533 /* output.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855113B3ACEE0099B651 /* output.cpp */; }; + D08A32B317B446A300F3A533 /* parse_util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855213B3ACEE0099B651 /* parse_util.cpp */; }; + D08A32B417B446A300F3A533 /* parser_keywords.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855313B3ACEE0099B651 /* parser_keywords.cpp */; }; + D08A32B517B446A300F3A533 /* path.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855513B3ACEE0099B651 /* path.cpp */; }; + D08A32B617B446A300F3A533 /* postfork.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D09B1C1914FC7B5B00F91077 /* postfork.cpp */; }; + D08A32B717B446A300F3A533 /* screen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855A13B3ACEE0099B651 /* screen.cpp */; }; + D08A32B817B446A300F3A533 /* signal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855C13B3ACEE0099B651 /* signal.cpp */; }; + D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; + D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; + D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; + D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8C15983CFA008E62BD /* libncurses.dylib */; }; + D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8A15983CDF008E62BD /* libiconv.dylib */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; @@ -295,6 +337,15 @@ name = "Copy Files"; runOnlyForDeploymentPostprocessing = 1; }; + D08A328B17B4455100F3A533 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; D0F019F015A977010034B3B1 /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; @@ -340,6 +391,8 @@ D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; + D08A328D17B4455100F3A533 /* fish_tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_tests; sourceTree = BUILT_PRODUCTS_DIR; }; + D08A329317B4458D00F3A533 /* fish_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fish_tests.cpp; sourceTree = ""; }; D09B1C1914FC7B5B00F91077 /* postfork.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = postfork.cpp; sourceTree = ""; }; D09B1C1A14FC7B5B00F91077 /* postfork.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = postfork.h; sourceTree = ""; }; D0A0850313B3ACEE0099B651 /* builtin.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = builtin.h; sourceTree = ""; }; @@ -471,6 +524,15 @@ /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ + D08A328A17B4455100F3A533 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */, + D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D0D02AB915985EF9008E62BD /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -534,6 +596,13 @@ name = "Other Build Products"; sourceTree = ""; }; + D08A328E17B4455100F3A533 /* fish_tests */ = { + isa = PBXGroup; + children = ( + ); + path = fish_tests; + sourceTree = ""; + }; D0A084F013B3AC130099B651 = { isa = PBXGroup; children = ( @@ -543,6 +612,7 @@ D0D02A8E15983D5F008E62BD /* Libraries */, D0D02AAB15985C14008E62BD /* Resources */, D031890A15E36DB500D9CC39 /* Other Build Products */, + D08A328E17B4455100F3A533 /* fish_tests */, D0D2693215983562005D9B9C /* Products */, ); sourceTree = ""; @@ -672,6 +742,7 @@ D0A0856613B3ACEE0099B651 /* xdgmimemagic.cpp */, D0A0852F13B3ACEE0099B651 /* xdgmimeparent.h */, D0A0856713B3ACEE0099B651 /* xdgmimeparent.cpp */, + D08A329317B4458D00F3A533 /* fish_tests.cpp */, ); name = Sources; sourceTree = ""; @@ -713,6 +784,7 @@ D0D02ABC15985EF9008E62BD /* fishd */, D0D02AD01598642A008E62BD /* fish_indent */, D0D02AE415986537008E62BD /* fish_pager */, + D08A328D17B4455100F3A533 /* fish_tests */, ); name = Products; sourceTree = ""; @@ -745,6 +817,23 @@ /* End PBXLegacyTarget section */ /* Begin PBXNativeTarget section */ + D08A328C17B4455100F3A533 /* fish_tests */ = { + isa = PBXNativeTarget; + buildConfigurationList = D08A329217B4455100F3A533 /* Build configuration list for PBXNativeTarget "fish_tests" */; + buildPhases = ( + D08A328917B4455100F3A533 /* Sources */, + D08A328A17B4455100F3A533 /* Frameworks */, + D08A328B17B4455100F3A533 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = fish_tests; + productName = fish_tests; + productReference = D08A328D17B4455100F3A533 /* fish_tests */; + productType = "com.apple.product-type.tool"; + }; D0D02A9915985A75008E62BD /* fish.app */ = { isa = PBXNativeTarget; buildConfigurationList = D0D02AA415985A75008E62BD /* Build configuration list for PBXNativeTarget "fish.app" */; @@ -854,6 +943,7 @@ D0D02ABB15985EF9008E62BD /* fishd */, D0D02ACF1598642A008E62BD /* fish_indent */, D0D02AE315986537008E62BD /* fish_pager */, + D08A328C17B4455100F3A533 /* fish_tests */, D0A564E6168CFDD800AF6161 /* man_pages */, D0A084F713B3AC130099B651 /* Makefile */, ); @@ -1034,6 +1124,53 @@ /* End PBXShellScriptBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ + D08A328917B4455100F3A533 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */, + D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */, + D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */, + D08A32A717B446A300F3A533 /* autoload.cpp in Sources */, + D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */, + D08A32A917B446A300F3A533 /* color.cpp in Sources */, + D08A32AA17B446A300F3A533 /* common.cpp in Sources */, + D08A32AB17B446A300F3A533 /* env_universal_common.cpp in Sources */, + D08A32AC17B446A300F3A533 /* env_universal.cpp in Sources */, + D08A32AD17B446A300F3A533 /* event.cpp in Sources */, + D08A32AE17B446A300F3A533 /* input_common.cpp in Sources */, + D08A32AF17B446A300F3A533 /* intern.cpp in Sources */, + D08A32B017B446A300F3A533 /* io.cpp in Sources */, + D08A32B117B446A300F3A533 /* iothread.cpp in Sources */, + D08A32B217B446A300F3A533 /* output.cpp in Sources */, + D08A32B317B446A300F3A533 /* parse_util.cpp in Sources */, + D08A32B417B446A300F3A533 /* parser_keywords.cpp in Sources */, + D08A32B517B446A300F3A533 /* path.cpp in Sources */, + D08A32B617B446A300F3A533 /* postfork.cpp in Sources */, + D08A32B717B446A300F3A533 /* screen.cpp in Sources */, + D08A32B817B446A300F3A533 /* signal.cpp in Sources */, + D08A32A617B4464300F3A533 /* input.cpp in Sources */, + D08A329717B4463B00F3A533 /* complete.cpp in Sources */, + D08A329817B4463B00F3A533 /* env.cpp in Sources */, + D08A329917B4463B00F3A533 /* exec.cpp in Sources */, + D08A329A17B4463B00F3A533 /* expand.cpp in Sources */, + D08A329B17B4463B00F3A533 /* highlight.cpp in Sources */, + D08A329C17B4463B00F3A533 /* history.cpp in Sources */, + D08A329D17B4463B00F3A533 /* kill.cpp in Sources */, + D08A329E17B4463B00F3A533 /* parser.cpp in Sources */, + D08A329F17B4463B00F3A533 /* proc.cpp in Sources */, + D08A32A017B4463B00F3A533 /* reader.cpp in Sources */, + D08A32A117B4463B00F3A533 /* sanity.cpp in Sources */, + D08A32A217B4463B00F3A533 /* tokenizer.cpp in Sources */, + D08A32A317B4463B00F3A533 /* wgetopt.cpp in Sources */, + D08A32A417B4463B00F3A533 /* wildcard.cpp in Sources */, + D08A32A517B4463B00F3A533 /* wutil.cpp in Sources */, + D08A329617B445FD00F3A533 /* builtin.cpp in Sources */, + D08A329417B4458D00F3A533 /* fish_tests.cpp in Sources */, + D08A329517B445C200F3A533 /* function.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D0D02AB815985EF9008E62BD /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -1345,6 +1482,74 @@ }; name = Release; }; + D08A328F17B4455100F3A533 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + D08A329017B4455100F3A533 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; + D08A329117B4455100F3A533 /* Release_C++11 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_64_BIT)"; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = YES; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.8; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = "Release_C++11"; + }; D0A084F813B3AC130099B651 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -1617,6 +1822,16 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + D08A329217B4455100F3A533 /* Build configuration list for PBXNativeTarget "fish_tests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + D08A328F17B4455100F3A533 /* Debug */, + D08A329017B4455100F3A533 /* Release */, + D08A329117B4455100F3A533 /* Release_C++11 */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; D0A084F513B3AC130099B651 /* Build configuration list for PBXProject "fish" */ = { isa = XCConfigurationList; buildConfigurations = ( From 6a6593335d2311432ca5d05d2081cb30c4fec34f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 8 Aug 2013 15:06:32 -0700 Subject: [PATCH 023/108] Teach fish_tests about new parsing files in Makefile --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index b3e745a6a..c36cbe7d6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -100,7 +100,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o parse_tree.o parse_exec.o + builtin_test.o parse_tree.o parse_productions.o parse_exec.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o From 8e07e55c1f928ed8a0da6360e00b2b8139594a63 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 8 Aug 2013 15:06:46 -0700 Subject: [PATCH 024/108] More work on new parser --- builtin.cpp | 2 +- common.cpp | 2 +- common.h | 4 +- fish_tests.cpp | 50 ++++- highlight.cpp | 412 ++++++++++++++++++++++++++++++++++++++++++ highlight.h | 1 + parse_productions.cpp | 26 ++- parse_tree.cpp | 200 ++++++++++++++++++-- parse_tree.h | 43 ++++- 9 files changed, 708 insertions(+), 32 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index d2a80a8c4..4ae9e5b51 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4063,7 +4063,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) parse_node_tree_t parse_tree; parse_error_list_t errors; parse_t parser; - bool success = parser.parse(src, &parse_tree, &errors); + bool success = parser.parse(src, parse_flag_none, &parse_tree, &errors, true); if (! success) { stdout_buffer.append(L"Parsing failed:\n"); diff --git a/common.cpp b/common.cpp index 7a9f7a514..c9a6b2279 100644 --- a/common.cpp +++ b/common.cpp @@ -507,7 +507,7 @@ const wchar_t *wcsfuncname(const wchar_t *str) } -int wcsvarchr(wchar_t chr) +bool wcsvarchr(wchar_t chr) { return iswalnum(chr) || chr == L'_'; } diff --git a/common.h b/common.h index 57fe7fa1a..abbf12f34 100644 --- a/common.h +++ b/common.h @@ -608,10 +608,10 @@ const wchar_t *wcsfuncname(const wchar_t *str); /** Test if the given string is valid in a variable name - \return 1 if this is a valid name, 0 otherwise + \return true if this is a valid name, false otherwise */ -int wcsvarchr(wchar_t chr); +bool wcsvarchr(wchar_t chr); /** diff --git a/fish_tests.cpp b/fish_tests.cpp index 6ebd3d220..dd16deb99 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1801,23 +1801,65 @@ void history_tests_t::test_history_speed(void) delete hist; } +static void test_new_parser_correctness(void) +{ + say(L"Testing new parser!"); + const struct parser_test_t + { + const wchar_t *src; + bool ok; + } + parser_tests[] = + { + {L"; ; ; ", true}, + {L"if ; end", false}, + {L"if true ; end", true}, + {L"if true; end ; end", false}, + {L"if end; end ; end", false}, + {L"end", false} + }; + + for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++) + { + const parser_test_t *test = &parser_tests[i]; + + parse_node_tree_t parse_tree; + parse_t parser; + bool success = parser.parse(test->src, parse_flag_none, &parse_tree, NULL); + say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no"); + if (success && ! test->ok) + { + err(L"\"%ls\" should NOT have parsed, but did", test->src); + } + else if (! success && test->ok) + { + err(L"\"%ls\" should have parsed, but failed", test->src); + } + } + say(L"Parse tests complete"); + +} + +__attribute__((unused)) static void test_new_parser(void) { say(L"Testing new parser!"); const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; parse_t parser; - bool success = parser.parse(src, &parse_tree, NULL); + bool success = parser.parse(src, parse_flag_none, &parse_tree, NULL); if (! success) { say(L"Parsing failed"); } else { +#if 0 parse_execution_context_t ctx(parse_tree, src); say(L"Simulating execution:"); wcstring simulation = ctx.simulate(); say(simulation.c_str()); +#endif } } @@ -1827,13 +1869,12 @@ static void test_new_parser(void) int main(int argc, char **argv) { setlocale(LC_ALL, ""); - srand(time(0)); + //srand(time(0)); configure_thread_assertions_for_testing(); program_name=L"(ignore)"; say(L"Testing low-level functionality"); - say(L"Lines beginning with '(ignore):' are not errors, they are warning messages\ngenerated by the fish parser library when given broken input, and can be\nignored. All actual errors begin with 'Error:'."); set_main_thread(); setup_fork_guards(); //proc_init(); @@ -1843,7 +1884,8 @@ int main(int argc, char **argv) reader_init(); env_init(); - test_new_parser(); + test_new_parser_correctness(); + //test_new_parser(); return 0; test_format(); diff --git a/highlight.cpp b/highlight.cpp index 606604386..3c0838902 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -34,6 +34,7 @@ #include "wildcard.h" #include "path.h" #include "history.h" +#include "parse_tree.h" /** Number of elements in the highlight_var array @@ -1307,11 +1308,16 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const } } +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); // PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); + if (1) { + highlight_shell_magic(buff, color, pos, error, vars); + return; + } const size_t length = buff.size(); assert(buff.size() == color.size()); @@ -1440,7 +1446,413 @@ void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, } } +static void color_node(const parse_node_t &node, int color, std::vector &color_array) +{ + // Can only color nodes with valid source ranges + if (! node.has_source()) + return; + + // Fill the color array with our color in the corresponding range + size_t source_end = node.source_start + node.source_length; + assert(source_end >= node.source_start); + assert(source_end <= color_array.size()); + + std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); +} +static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) +{ + const size_t buff_len = buffstr.size(); + std::fill(colors, colors + buff_len, normal_status); + + enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted; + int bracket_count=0; + for (size_t in_pos=0; in_pos < buff_len; in_pos++) + { + const wchar_t c = buffstr.at(in_pos); + switch (mode) + { + case e_unquoted: + { + if (c == L'\\') + { + int fill_color = HIGHLIGHT_ESCAPE; //may be set to HIGHLIGHT_ERROR + const size_t backslash_pos = in_pos; + size_t fill_end = backslash_pos; + + // Move to the escaped character + in_pos++; + const wchar_t escaped_char = (in_pos < buff_len ? buffstr.at(in_pos) : L'\0'); + + if (escaped_char == L'\0') + { + fill_end = in_pos; + fill_color = HIGHLIGHT_ERROR; + } + else if (wcschr(L"~%", escaped_char)) + { + if (in_pos == 1) + { + fill_end = in_pos + 1; + } + } + else if (escaped_char == L',') + { + if (bracket_count) + { + fill_end = in_pos + 1; + } + } + else if (wcschr(L"abefnrtv*?$(){}[]'\"<>^ \\#;|&", escaped_char)) + { + fill_end = in_pos + 1; + } + else if (wcschr(L"c", escaped_char)) + { + // Like \ci. So highlight three characters + fill_end = in_pos + 1; + } + else if (wcschr(L"uUxX01234567", escaped_char)) + { + long long res=0; + int chars=2; + int base=16; + + wchar_t max_val = ASCII_MAX; + + switch (escaped_char) + { + case L'u': + { + chars=4; + max_val = UCS2_MAX; + in_pos++; + break; + } + + case L'U': + { + chars=8; + max_val = WCHAR_MAX; + in_pos++; + break; + } + + case L'x': + { + in_pos++; + break; + } + + case L'X': + { + max_val = BYTE_MAX; + in_pos++; + break; + } + + default: + { + // a digit like \12 + base=8; + chars=3; + break; + } + } + + // Consume + for (int i=0; i < chars && in_pos < buff_len; i++) + { + long d = convert_digit(buffstr.at(in_pos), base); + if (d < 0) + break; + res = (res * base) + d; + in_pos++; + } + //in_pos is now at the first character that could not be converted (or buff_len) + assert(in_pos >= backslash_pos && in_pos <= buff_len); + fill_end = in_pos; + + // It's an error if we exceeded the max value + if (res > max_val) + fill_color = HIGHLIGHT_ERROR; + + // Subtract one from in_pos, so that the increment in the loop will move to the next character + in_pos--; + } + assert(fill_end >= backslash_pos); + std::fill(colors + backslash_pos, colors + fill_end, fill_color); + } + else + { + // Not a backslash + switch (c) + { + case L'~': + case L'%': + { + if (in_pos == 0) + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + } + break; + } + + case L'$': + { + assert(in_pos < buff_len); + int dollar_color = HIGHLIGHT_ERROR; + if (in_pos + 1 < buff_len) + { + wchar_t next = buffstr.at(in_pos + 1); + if (next == L'$' || wcsvarchr(next)) + dollar_color = HIGHLIGHT_OPERATOR; + } + colors[in_pos] = dollar_color; + break; + } + + + case L'*': + case L'?': + case L'(': + case L')': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + break; + } + + case L'{': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + bracket_count++; + break; + } + + case L'}': + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + bracket_count--; + break; + } + + case L',': + { + if (bracket_count > 0) + { + colors[in_pos] = HIGHLIGHT_OPERATOR; + } + + break; + } + + case L'\'': + { + colors[in_pos] = HIGHLIGHT_QUOTE; + mode = e_single_quoted; + break; + } + + case L'\"': + { + colors[in_pos] = HIGHLIGHT_QUOTE; + mode = e_double_quoted; + break; + } + + } + } + break; + } + + /* + Mode 1 means single quoted string, i.e 'foo' + */ + case e_single_quoted: + { + colors[in_pos] = HIGHLIGHT_QUOTE; + if (c == L'\\') + { + // backslash + if (in_pos + 1 < buff_len) + { + const wchar_t escaped_char = buffstr.at(in_pos + 1); + if (escaped_char == L'\\' || escaped_char == L'\'') + { + colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash + colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char + in_pos += 1; //skip over backslash + } + } + } + else if (c == L'\'') + { + mode = e_unquoted; + } + break; + } + + /* + Mode 2 means double quoted string, i.e. "foo" + */ + case e_double_quoted: + { + colors[in_pos] = HIGHLIGHT_QUOTE; + switch (c) + { + case L'"': + { + mode = e_unquoted; + break; + } + + case L'\\': + { + // backslash + if (in_pos + 1 < buff_len) + { + const wchar_t escaped_char = buffstr.at(in_pos + 1); + if (escaped_char == L'\\' || escaped_char == L'\'' || escaped_char == L'$') + { + colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash + colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char + in_pos += 1; //skip over backslash + } + } + break; + } + + case L'$': + { + int dollar_color = HIGHLIGHT_ERROR; + if (in_pos + 1 < buff_len) + { + wchar_t next = buffstr.at(in_pos + 1); + if (next == L'$' || wcsvarchr(next)) + dollar_color = HIGHLIGHT_OPERATOR; + } + colors[in_pos] = dollar_color; + break; + } + + } + break; + } + } + } +} + +// Color all of the arguments of the given command +static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) +{ + const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(parent, symbol_argument); + + wcstring param; + for (node_offset_t i=0; i < nodes.size(); i++) + { + const parse_node_t *child = nodes.at(i); + assert(child != NULL && child->type == symbol_argument); + param.assign(src, child->source_start, child->source_length); + color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_NORMAL); + } +} + +static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector &color_array) +{ + for (node_offset_t idx=0; idx < parent.child_count; idx++) + { + const parse_node_t *child = tree.get_child(parent, idx); + if (child != NULL && child->type == type && child->has_source()) + { + color_node(*child, color, color_array); + } + } +} + +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + ASSERT_IS_BACKGROUND_THREAD(); + + const size_t length = buff.size(); + assert(buff.size() == color.size()); + + if (length == 0) + return; + + std::fill(color.begin(), color.end(), -1); + + /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ + const wcstring working_directory = env_get_pwd_slash(); + + /* Parse the buffer */ + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); + + /* Walk the node tree */ + for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) + { + const parse_node_t &node = *iter; + + switch (node.type) + { + // Color direct string descendants, e.g. 'for' and 'in'. + case symbol_for_header: + case symbol_while_header: + case symbol_begin_header: + case symbol_function_header: + case symbol_if_clause: + case symbol_else_clause: + case symbol_case_item: + case symbol_switch_statement: + case symbol_boolean_statement: + case symbol_decorated_statement: + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + break; + + case symbol_redirection: + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); + break; + + case parse_token_type_background: + case parse_token_type_end: + color_node(node, HIGHLIGHT_END, color); + break; + + case symbol_plain_statement: + { + // Color the command + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + + // Color arguments + const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); + if (arguments != NULL) + { + color_arguments(buff, parse_tree, *arguments, color); + } + } + break; + + + case symbol_arguments_or_redirections_list: + case symbol_argument_list: + /* Nothing, these are handled by their parents */ + break; + + case parse_special_type_parse_error: + case parse_special_type_tokenizer_error: + color_node(node, HIGHLIGHT_ERROR, color); + break; + + case parse_special_type_comment: + color_node(node, HIGHLIGHT_COMMENT, color); + break; + + default: + break; + } + } +} /** Perform quote and parenthesis highlighting on the specified string. diff --git a/highlight.h b/highlight.h index 6747bba51..ea8557918 100644 --- a/highlight.h +++ b/highlight.h @@ -84,6 +84,7 @@ struct file_detection_context_t; \param error a list in which a description of each error will be inserted. May be 0, in whcich case no error descriptions will be generated. */ void highlight_shell(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_magic(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); /** Perform syntax highlighting for the text in buff. Matching quotes and paranthesis are highlighted. The result is diff --git a/parse_productions.cpp b/parse_productions.cpp index 61f7636de..b5efa11ca 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -135,14 +135,12 @@ RESOLVE(statement) return 2; case parse_keyword_else: - //symbol_stack_pop(); return NO_PRODUCTION; case parse_keyword_switch: return 3; case parse_keyword_end: - PARSER_DIE(); //todo return NO_PRODUCTION; // 'in' is only special within a for_header @@ -378,7 +376,7 @@ RESOLVE(arguments_or_redirections_list) PRODUCTIONS(argument_or_redirection) = { - {parse_token_type_string}, + {symbol_argument}, {parse_token_type_redirection} }; RESOLVE(argument_or_redirection) @@ -394,6 +392,18 @@ RESOLVE(argument_or_redirection) } } +PRODUCTIONS(argument) = +{ + {parse_token_type_string} +}; +RESOLVE_ONLY(argument) + +PRODUCTIONS(redirection) = +{ + {parse_token_type_redirection} +}; +RESOLVE_ONLY(redirection) + PRODUCTIONS(optional_background) = { {}, @@ -449,6 +459,8 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n TEST(plain_statement) TEST(arguments_or_redirections_list) TEST(argument_or_redirection) + TEST(argument) + TEST(redirection) TEST(optional_background) case parse_token_type_string: @@ -461,6 +473,14 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSER_DIE(); break; + case parse_special_type_parse_error: + case parse_special_type_tokenizer_error: + case parse_special_type_comment: + fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); + PARSER_DIE(); + break; + + case token_type_invalid: fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); PARSER_DIE(); diff --git a/parse_tree.cpp b/parse_tree.cpp index 7a809167e..bab295042 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -101,6 +101,11 @@ wcstring token_type_description(parse_token_type_t type) return L"arguments_or_redirections_list"; case symbol_argument_or_redirection: return L"argument_or_redirection"; + case symbol_argument: + return L"symbol_argument"; + case symbol_redirection: + return L"symbol_redirection"; + case parse_token_type_string: return L"token_string"; @@ -116,6 +121,14 @@ wcstring token_type_description(parse_token_type_t type) return L"token_terminate"; case symbol_optional_background: return L"optional_background"; + + case parse_special_type_parse_error: + return L"parse_error"; + case parse_special_type_tokenizer_error: + return L"tokenizer_error"; + case parse_special_type_comment: + return L"comment"; + } return format_string(L"Unknown token type %ld", static_cast(type)); } @@ -216,6 +229,14 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ case TOK_REDIRECT_NOCLOB: result.type = parse_token_type_redirection; break; + + case TOK_ERROR: + result.type = parse_special_type_tokenizer_error; + break; + + case TOK_COMMENT: + result.type = parse_special_type_comment; + break; default: @@ -248,9 +269,16 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & } if (node.type == parse_token_type_string) { - result->append(L": \""); - result->append(src, node.source_start, node.source_length); - result->append(L"\""); + if (node.source_start == -1) + { + append_format(*result, L" (no source)"); + } + else + { + result->append(L": \""); + result->append(src, node.source_start, node.source_length); + result->append(L"\""); + } } result->push_back(L'\n'); ++*line; @@ -311,20 +339,24 @@ class parse_ll_t // Constructor parse_ll_t() : fatal_errored(false) { - // initial node - symbol_stack.push_back(parse_stack_element_t(symbol_job_list, 0)); // goal token - nodes.push_back(parse_node_t(symbol_job_list)); + this->reset(); } bool top_node_match_token(parse_token_t token); void accept_token(parse_token_t token, const wcstring &src); + + // Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. + void reset(void); void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); void dump_stack(void) const; + + // Figure out the ranges of intermediate nodes + void determine_node_ranges(); // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() @@ -453,9 +485,41 @@ void parse_ll_t::dump_stack(void) const } } +// Give each node a source range equal to the union of the ranges of its children +// Terminal nodes already have source ranges (and no children) +// Since children always appear after their parents, we can implement this very simply by walking backwards +void parse_ll_t::determine_node_ranges(void) +{ + const size_t source_start_invalid = -1; + size_t idx = nodes.size(); + while (idx--) + { + parse_node_t *parent = &nodes.at(idx); + + // Skip nodes that already have a source range. These are terminal nodes. + if (parent->source_start != source_start_invalid) + continue; + + // Ok, this node needs a source range. Get all of its children, and then set its range. + size_t min_start = source_start_invalid, max_end = 0; //note source_start_invalid is huge + for (node_offset_t i=0; i < parent->child_count; i++) + { + const parse_node_t &child = nodes.at(parent->child_offset(i)); + min_start = std::min(min_start, child.source_start); + max_end = std::max(max_end, child.source_start + child.source_length); + } + + if (min_start != source_start_invalid) { + assert(max_end >= min_start); + parent->source_start = min_start; + parent->source_length = max_end - min_start; + } + } +} + void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { - this->dump_stack(); + //this->dump_stack(); parse_error_t err; va_list va; @@ -481,8 +545,27 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } +void parse_ll_t::reset(void) +{ + // add a new job_list node and then reset our symbol list to point at it + node_offset_t where = nodes.size(); + nodes.push_back(parse_node_t(symbol_job_list)); + + symbol_stack.clear(); + symbol_stack.push_back(parse_stack_element_t(symbol_job_list, where)); // goal token + this->fatal_errored = false; +} + + bool parse_ll_t::top_node_match_token(parse_token_t token) { + if (symbol_stack.empty()) + { + // This can come about with an unbalanced 'end' or 'else', which causes us to terminate the outermost job list. + this->fatal_errored = true; + return false; + } + PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); bool result = false; @@ -520,10 +603,23 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); - PARSE_ASSERT(! symbol_stack.empty()); + bool consumed = false; + + // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. + if (token.type == parse_special_type_parse_error || token.type == parse_special_type_tokenizer_error || token.type == parse_special_type_comment) + { + parse_node_t err_node(token.type); + err_node.source_start = token.source_start; + err_node.source_length = token.source_length; + nodes.push_back(err_node); + consumed = true; + } + while (! consumed && ! this->fatal_errored) { + PARSE_ASSERT(! symbol_stack.empty()); + if (top_node_match_token(token)) { if (logit) @@ -534,6 +630,10 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) break; } + // top_node_match_token may indicate an error if our stack is empty + if (this->fatal_errored) + break; + // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); @@ -548,6 +648,12 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) // Manipulate the symbol stack. // Note that stack_elem is invalidated by popping the stack. symbol_stack_pop_push_production(production); + + // If we end up with an empty stack, something bad happened, like an unbalanced end + if (symbol_stack.empty()) + { + this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?", stack_elem.describe().c_str(), token.describe().c_str()); + } } } } @@ -556,6 +662,11 @@ parse_t::parse_t() : parser(new parse_ll_t()) { } +parse_t::~parse_t() +{ + delete parser; +} + static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) { parse_keyword_t result = parse_keyword_none; @@ -597,21 +708,20 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors) +bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { - tokenizer_t tok = tokenizer_t(str.c_str(), 0); + tok_flags_t tok_options = TOK_SQUASH_ERRORS; + if (parse_flags & parse_flag_include_comments) + tok_options |= TOK_SHOW_COMMENTS; + + tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok)) { token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); int tok_start = tok_get_pos(&tok); size_t tok_extent = tok_get_extent(&tok); - - if (tok_type == TOK_ERROR) - { - fprintf(stderr, "Tokenizer error\n"); - break; - } + assert(tok_extent < 10000000); //paranoia parse_token_t token = parse_token_from_tokenizer_token(tok_type); token.tokenizer_type = tok_type; @@ -621,12 +731,31 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ this->parser->accept_token(token, str); if (this->parser->fatal_errored) - break; + { + if (parse_flags & parse_flag_continue_after_error) + { + /* Mark an error and then keep going */ + token.type = parse_special_type_parse_error; + token.keyword = parse_keyword_none; + this->parser->accept_token(token, str); + this->parser->reset(); + } + else + { + /* Bail out */ + break; + } + } } + // Teach each node where its source range is + this->parser->determine_node_ranges(); + +#if 0 wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); +#endif if (output != NULL) { @@ -642,3 +771,40 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_ return ! this->parser->fatal_errored; } + +const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const +{ + const parse_node_t *result = NULL; + PARSE_ASSERT(which < parent.child_count); + node_offset_t child_offset = parent.child_offset(which); + if (child_offset < this->size()) + { + result = &this->at(child_offset); + } + + // If we are given an expected type, then the node must be null or that type + if (result != NULL) + { + assert(expected_type == token_type_invalid || expected_type == result->type); + } + + return result; +} + +static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) +{ + if (parent.type == type) result->push_back(&parent); + for (size_t i=0; i < parent.child_count; i++) + { + const parse_node_t *child = tree.get_child(parent, i); + assert(child != NULL); + find_nodes_recursive(tree, *child, type, result); + } +} + +parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const +{ + parse_node_list_t result; + find_nodes_recursive(*this, parent, type, &result); + return result; +} diff --git a/parse_tree.h b/parse_tree.h index c53864258..25b63a0ca 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -15,7 +15,7 @@ #include #define PARSE_ASSERT(a) assert(a) -#define PARSER_DIE() exit_without_destructors(-1) +#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0) class parse_node_t; class parse_node_tree_t; @@ -36,6 +36,18 @@ struct parse_error_t }; typedef std::vector parse_error_list_t; +enum +{ + parse_flag_none = 0, + + /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ + parse_flag_continue_after_error = 1 << 0, + + /* Include comment tokens */ + parse_flag_include_comments = 1 << 1 +}; +typedef unsigned int parse_tree_flags_t; + class parse_ll_t; class parse_t { @@ -43,7 +55,8 @@ class parse_t public: parse_t(); - bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors); + ~parse_t(); + bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); }; enum parse_token_type_t @@ -80,6 +93,9 @@ enum parse_token_type_t symbol_argument_list_nonempty, symbol_argument_list, + symbol_argument, + symbol_redirection, + symbol_optional_background, // Terminal types @@ -90,6 +106,11 @@ enum parse_token_type_t parse_token_type_end, parse_token_type_terminate, + // Very special terminal types that don't appear in the production list + parse_special_type_parse_error, + parse_special_type_tokenizer_error, + parse_special_type_comment, + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; @@ -145,7 +166,7 @@ public: wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), child_start(0), child_count(0), tag(0) { } @@ -154,10 +175,23 @@ public: PARSE_ASSERT(which < child_count); return child_start + which; } + + bool has_source() const + { + return source_start != (size_t)(-1); + } }; class parse_node_tree_t : public std::vector { + public: + + /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + + /* Find all the nodes of a given type underneath a given node */ + typedef std::vector parse_node_list_t; + parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; }; @@ -214,7 +248,8 @@ class parse_node_tree_t : public std::vector arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list - argument_or_redirection = redirection | + argument_or_redirection = argument | redirection + argument = redirection = terminator = | From e58b73179f4727c79465c6f273aef377b9bb8bee Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 11 Aug 2013 00:35:00 -0700 Subject: [PATCH 025/108] More work on new parser --- builtin.cpp | 5 +- fish_tests.cpp | 192 ++++++++++++++++++++++++- highlight.cpp | 61 +++++--- parse_productions.cpp | 80 ++++++----- parse_tree.cpp | 320 +++++++++++++++++++++++++++--------------- parse_tree.h | 89 +++++++----- 6 files changed, 532 insertions(+), 215 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 4ae9e5b51..90fb099bb 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4075,7 +4075,10 @@ int builtin_parse(parser_t &parser, wchar_t **argv) } else { - if (0) { + const wcstring dump = parse_dump_tree(parse_tree, src); + fprintf(stderr, "%ls", dump.c_str()); + if (0) + { parse_execution_context_t ctx(parse_tree, src); parse_execution_simulator_t sim; sim.context = &ctx; diff --git a/fish_tests.cpp b/fish_tests.cpp index dd16deb99..9d8f2b803 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1816,13 +1816,16 @@ static void test_new_parser_correctness(void) {L"if true ; end", true}, {L"if true; end ; end", false}, {L"if end; end ; end", false}, - {L"end", false} + {L"if end", false}, + {L"end", false}, + {L"for i i", false}, + {L"for i in a b c ; end", true} }; - + for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++) { const parser_test_t *test = &parser_tests[i]; - + parse_node_tree_t parse_tree; parse_t parser; bool success = parser.parse(test->src, parse_flag_none, &parse_tree, NULL); @@ -1837,7 +1840,87 @@ static void test_new_parser_correctness(void) } } say(L"Parse tests complete"); +} +struct parser_fuzz_token_t +{ + parse_token_type_t token_type; + parse_keyword_t keyword; + + parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none) + { + } +}; + +static bool increment(std::vector &tokens) +{ + size_t i, end = tokens.size(); + for (i=0; i < end; i++) + { + bool wrapped = false; + + struct parser_fuzz_token_t &token = tokens[i]; + bool incremented_in_keyword = false; + if (token.token_type == parse_token_type_string) + { + // try incrementing the keyword + token.keyword++; + if (token.keyword <= LAST_KEYWORD) + { + incremented_in_keyword = true; + } + else + { + token.keyword = parse_keyword_none; + incremented_in_keyword = false; + } + } + + if (! incremented_in_keyword) + { + token.token_type++; + if (token.token_type > LAST_TERMINAL_TYPE) + { + token.token_type = FIRST_TERMINAL_TYPE; + wrapped = true; + } + } + + if (! wrapped) + { + break; + } + } + return i == end; +} + +static void test_new_parser_fuzzing(void) +{ + say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t)); + double start = timef(); + // ensure nothing crashes + size_t max = 5; + for (size_t len=1; len <= max; len++) + { + fprintf(stderr, "%lu / %lu\n", len, max); + std::vector tokens(len); + do + { + parse_t parser; + parse_node_tree_t parse_tree; + parse_error_list_t errors; + for (size_t i=0; i < len; i++) + { + const parser_fuzz_token_t &token = tokens[i]; + parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors); + } + + // keep going until we wrap + } + while (! increment(tokens)); + } + double end = timef(); + say(L"All fuzzed in %f seconds!", end - start); } __attribute__((unused)) @@ -1863,6 +1946,104 @@ static void test_new_parser(void) } } +static void test_highlighting(void) +{ + say(L"Testing syntax highlighting"); + if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed"); + if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed"); + if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed"); + + // Here are the components of our source and the colors we expect those to be + struct highlight_component_t { + const wchar_t *txt; + int color; + }; + + const highlight_component_t components1[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {L"&", HIGHLIGHT_END}, + {NULL, -1} + }; + + const highlight_component_t components2[] = + { + {L"command", HIGHLIGHT_COMMAND}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"abc", HIGHLIGHT_PARAM}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {L"&", HIGHLIGHT_END}, + {NULL, -1} + }; + + const highlight_component_t components3[] = + { + {L"if command ls", HIGHLIGHT_COMMAND}, + {L"; ", HIGHLIGHT_END}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"abc", HIGHLIGHT_PARAM}, + {L"; ", HIGHLIGHT_END}, + {L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR}, + {L"; ", HIGHLIGHT_END}, + {L"end", HIGHLIGHT_COMMAND}, + {NULL, -1} + }; + + const highlight_component_t *tests[] = {components1, components2, components3}; + for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) + { + const highlight_component_t *components = tests[which]; + // Count how many we have + size_t component_count = 0; + while (components[component_count].txt != NULL) + { + component_count++; + } + + // Generate the text + wcstring text; + std::vector expected_colors; + for (size_t i=0; i < component_count; i++) + { + if (i > 0) + { + text.push_back(L' '); + expected_colors.push_back(0); + } + text.append(components[i].txt); + + // hackish space handling + const size_t text_len = wcslen(components[i].txt); + for (size_t j=0; j < text_len; j++) + { + bool is_space = (components[i].txt[j] == L' '); + expected_colors.push_back(is_space ? 0 : components[i].color); + } + } + assert(expected_colors.size() == text.size()); + + std::vector colors(text.size()); + highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t()); + + if (expected_colors.size() != colors.size()) + { + err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size()); + } + assert(expected_colors.size() == colors.size()); + for (size_t i=0; i < text.size(); i++) + { + if (expected_colors.at(i) != colors.at(i)) + { + const wcstring spaces(i, L' '); + err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); + } + } + } + + system("rm -Rf /tmp/fish_highlight_test"); +} + /** Main test */ @@ -1884,9 +2065,10 @@ int main(int argc, char **argv) reader_init(); env_init(); - test_new_parser_correctness(); + //test_new_parser_fuzzing(); + //test_new_parser_correctness(); + //test_highlighting(); //test_new_parser(); - return 0; test_format(); test_escape(); diff --git a/highlight.cpp b/highlight.cpp index 3c0838902..f2a7d6e0c 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1314,7 +1314,8 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); - if (1) { + if (0) + { highlight_shell_magic(buff, color, pos, error, vars); return; } @@ -1451,12 +1452,12 @@ static void color_node(const parse_node_t &node, int color, std::vector &co // Can only color nodes with valid source ranges if (! node.has_source()) return; - + // Fill the color array with our color in the corresponding range size_t source_end = node.source_start + node.source_length; assert(source_end >= node.source_start); assert(source_end <= color_array.size()); - + std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); } @@ -1464,7 +1465,7 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c { const size_t buff_len = buffstr.size(); std::fill(colors, colors + buff_len, normal_status); - + enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted; int bracket_count=0; for (size_t in_pos=0; in_pos < buff_len; in_pos++) @@ -1479,11 +1480,11 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c int fill_color = HIGHLIGHT_ESCAPE; //may be set to HIGHLIGHT_ERROR const size_t backslash_pos = in_pos; size_t fill_end = backslash_pos; - + // Move to the escaped character in_pos++; const wchar_t escaped_char = (in_pos < buff_len ? buffstr.at(in_pos) : L'\0'); - + if (escaped_char == L'\0') { fill_end = in_pos; @@ -1559,7 +1560,7 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c break; } } - + // Consume for (int i=0; i < chars && in_pos < buff_len; i++) { @@ -1572,11 +1573,11 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c //in_pos is now at the first character that could not be converted (or buff_len) assert(in_pos >= backslash_pos && in_pos <= buff_len); fill_end = in_pos; - + // It's an error if we exceeded the max value if (res > max_val) fill_color = HIGHLIGHT_ERROR; - + // Subtract one from in_pos, so that the increment in the loop will move to the next character in_pos--; } @@ -1746,7 +1747,7 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) { const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(parent, symbol_argument); - + wcstring param; for (node_offset_t i=0; i < nodes.size(); i++) { @@ -1783,20 +1784,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ const wcstring working_directory = env_get_pwd_slash(); - + /* Parse the buffer */ parse_node_tree_t parse_tree; parse_t parser; parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); - + /* Walk the node tree */ for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) { const parse_node_t &node = *iter; - + switch (node.type) { - // Color direct string descendants, e.g. 'for' and 'in'. + // Color direct string descendants, e.g. 'for' and 'in'. case symbol_for_header: case symbol_while_header: case symbol_begin_header: @@ -1809,21 +1810,35 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_decorated_statement: color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); break; - + + case symbol_if_statement: + { + // Color the 'end' + color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + + // Color arguments and redirections + const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list); + if (arguments != NULL) + { + color_arguments(buff, parse_tree, *arguments, color); + } + } + break; + case symbol_redirection: color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); break; - + case parse_token_type_background: case parse_token_type_end: color_node(node, HIGHLIGHT_END, color); break; - + case symbol_plain_statement: { // Color the command color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); - + // Color arguments const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); if (arguments != NULL) @@ -1832,22 +1847,22 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t } } break; - - + + case symbol_arguments_or_redirections_list: case symbol_argument_list: /* Nothing, these are handled by their parents */ break; - + case parse_special_type_parse_error: case parse_special_type_tokenizer_error: color_node(node, HIGHLIGHT_ERROR, color); break; - + case parse_special_type_comment: color_node(node, HIGHLIGHT_COMMENT, color); break; - + default: break; } diff --git a/parse_productions.cpp b/parse_productions.cpp index b5efa11ca..3165a2f0d 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -13,7 +13,7 @@ static bool production_is_valid(const production_options_t production_list, prod { if (which < 0 || which >= MAX_PRODUCTIONS) return false; - + bool nonempty_found = false; for (int i=which; i < MAX_PRODUCTIONS; i++) { @@ -249,8 +249,10 @@ RESOLVE(argument_list) { switch (token_type) { - case parse_token_type_string: return 1; - default: return 0; + case parse_token_type_string: + return 1; + default: + return 0; } } @@ -429,40 +431,40 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n { fprintf(stderr, "Resolving production for %ls with input type %ls <%ls>\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str()); } - + /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL; switch (node_type) { - TEST(job_list) - TEST(job) - TEST(statement) - TEST(job_continuation) - TEST(boolean_statement) - TEST(block_statement) - TEST(if_statement) - TEST(if_clause) - TEST(else_clause) - TEST(else_continuation) - TEST(switch_statement) - TEST(decorated_statement) - TEST(case_item_list) - TEST(case_item) - TEST(argument_list_nonempty) - TEST(argument_list) - TEST(block_header) - TEST(for_header) - TEST(while_header) - TEST(begin_header) - TEST(function_header) - TEST(plain_statement) - TEST(arguments_or_redirections_list) - TEST(argument_or_redirection) - TEST(argument) - TEST(redirection) - TEST(optional_background) - + TEST(job_list) + TEST(job) + TEST(statement) + TEST(job_continuation) + TEST(boolean_statement) + TEST(block_statement) + TEST(if_statement) + TEST(if_clause) + TEST(else_clause) + TEST(else_continuation) + TEST(switch_statement) + TEST(decorated_statement) + TEST(case_item_list) + TEST(case_item) + TEST(argument_list_nonempty) + TEST(argument_list) + TEST(block_header) + TEST(for_header) + TEST(while_header) + TEST(begin_header) + TEST(function_header) + TEST(plain_statement) + TEST(arguments_or_redirections_list) + TEST(argument_or_redirection) + TEST(argument) + TEST(redirection) + TEST(optional_background) + case parse_token_type_string: case parse_token_type_pipe: case parse_token_type_redirection: @@ -472,33 +474,33 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n fprintf(stderr, "Terminal token type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); PARSER_DIE(); break; - + case parse_special_type_parse_error: case parse_special_type_tokenizer_error: case parse_special_type_comment: fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__); PARSER_DIE(); break; - - + + case token_type_invalid: fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__); PARSER_DIE(); break; - + } PARSE_ASSERT(production_list != NULL); PARSE_ASSERT(resolver != NULL); - + const production_t *result = NULL; production_option_idx_t which = resolver(input_type, input_keyword, out_tag); - + if (log_it) { fprintf(stderr, "\tresolved to %u\n", (unsigned)which); } - + if (which == NO_PRODUCTION) { if (log_it) diff --git a/parse_tree.cpp b/parse_tree.cpp index bab295042..b64c6d9f3 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -4,6 +4,7 @@ using namespace parse_productions; +/** Returns a string description of this parse error */ wcstring parse_error_t::describe(const wcstring &src) const { wcstring result = text; @@ -41,6 +42,7 @@ wcstring parse_error_t::describe(const wcstring &src) const return result; } +/** Returns a string description of the given token type */ wcstring token_type_description(parse_token_type_t type) { switch (type) @@ -121,7 +123,7 @@ wcstring token_type_description(parse_token_type_t type) return L"token_terminate"; case symbol_optional_background: return L"optional_background"; - + case parse_special_type_parse_error: return L"parse_error"; case parse_special_type_tokenizer_error: @@ -172,70 +174,68 @@ wcstring keyword_description(parse_keyword_t k) } } +/** Returns a string description of the given parse node */ wcstring parse_node_t::describe(void) const { wcstring result = token_type_description(type); return result; } +/** A struct representing the token type passed to */ struct parse_token_t { enum parse_token_type_t type; // The type of the token as represented by the parser - enum token_type tokenizer_type; // The type of the token as represented by the tokenizer enum parse_keyword_t keyword; // Any keyword represented by this parser size_t source_start; size_t source_length; - wcstring describe() const; + wcstring describe() const + { + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) + { + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); + } + return result; + } }; -wcstring parse_token_t::describe(void) const +/* Convert from tokenizer_t's token type to a parse_token_t type */ +static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) { - wcstring result = token_type_description(type); - if (keyword != parse_keyword_none) - { - append_format(result, L" <%ls>", keyword_description(keyword).c_str()); - } - return result; -} - -// Convert from tokenizer_t's token type to our token -static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) -{ - parse_token_t result = {}; - result.tokenizer_type = tokenizer_token_type; + parse_token_type_t result = token_type_invalid; switch (tokenizer_token_type) { case TOK_STRING: - result.type = parse_token_type_string; + result = parse_token_type_string; break; case TOK_PIPE: - result.type = parse_token_type_pipe; + result = parse_token_type_pipe; break; case TOK_END: - result.type = parse_token_type_end; + result = parse_token_type_end; break; case TOK_BACKGROUND: - result.type = parse_token_type_background; + result = parse_token_type_background; break; - + case TOK_REDIRECT_OUT: case TOK_REDIRECT_APPEND: case TOK_REDIRECT_IN: case TOK_REDIRECT_FD: case TOK_REDIRECT_NOCLOB: - result.type = parse_token_type_redirection; + result = parse_token_type_redirection; break; - + case TOK_ERROR: - result.type = parse_special_type_tokenizer_error; + result = parse_special_type_tokenizer_error; break; - + case TOK_COMMENT: - result.type = parse_special_type_comment; + result = parse_special_type_comment; break; @@ -247,6 +247,7 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_ return result; } +/* Helper function for dump_tree */ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) { assert(start < nodes.size()); @@ -288,8 +289,8 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & } } -__attribute__((unused)) -static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) +/* Gives a debugging textual description of a parse tree */ +wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) return L"(empty!)"; @@ -300,6 +301,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) return result; } +/* Struct representing elements of the symbol stack, used in the internal state of the LL parser */ struct parse_stack_element_t { enum parse_token_type_t type; @@ -309,7 +311,7 @@ struct parse_stack_element_t explicit parse_stack_element_t(parse_token_type_t t, node_offset_t idx) : type(t), keyword(parse_keyword_none), node_idx(idx) { } - + explicit parse_stack_element_t(production_element_t e, node_offset_t idx) : type(production_element_type(e)), keyword(production_element_keyword(e)), node_idx(idx) { } @@ -323,40 +325,31 @@ struct parse_stack_element_t } return result; } - }; +/* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */ class parse_ll_t { - friend class parse_t; - - std::vector symbol_stack; // LL parser stack + /* Traditional symbol stack of the LL parser */ + std::vector symbol_stack; + + /* Parser output. This is a parse tree, but stored in an array. */ parse_node_tree_t nodes; + /* Whether we ran into a fatal error, including parse errors or tokenizer errors */ bool fatal_errored; + + /* List of errors we have encountered */ parse_error_list_t errors; - // Constructor - parse_ll_t() : fatal_errored(false) - { - this->reset(); - } - - bool top_node_match_token(parse_token_t token); - - void accept_token(parse_token_t token, const wcstring &src); - - // Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. - void reset(void); + /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */ + bool top_node_handle_terminal_types(parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); void dump_stack(void) const; - - // Figure out the ranges of intermediate nodes - void determine_node_ranges(); // Get the node corresponding to the top element of the stack parse_node_t &node_for_top_symbol() @@ -413,33 +406,38 @@ class parse_ll_t if (! count) fprintf(stderr, "\t\n"); } - + // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) const size_t child_start = nodes.size(); size_t child_count = 0; for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) { production_element_t elem = (*production)[i]; - if (production_element_is_valid(elem)) + if (!production_element_is_valid(elem)) + { + // All done, bail out + break; + } + else { // Generate the parse node. Note that this push_back may invalidate node. - parse_token_type_t child_type = production_element_type(elem); - nodes.push_back(parse_node_t(child_type)); - child_count++; + parse_token_type_t child_type = production_element_type(elem); + nodes.push_back(parse_node_t(child_type)); + child_count++; } } - + // Update the parent const size_t parent_node_idx = symbol_stack.back().node_idx; parse_node_t &parent_node = nodes.at(parent_node_idx); - + // Should have no children yet PARSE_ASSERT(parent_node.child_count == 0); // Tell the node about its children parent_node.child_start = child_start; parent_node.child_count = child_count; - + // Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order. symbol_stack.pop_back(); symbol_stack.reserve(symbol_stack.size() + child_count); @@ -452,6 +450,36 @@ class parse_ll_t } } + public: + + /* Constructor */ + parse_ll_t() : fatal_errored(false) + { + this->symbol_stack.reserve(16); + this->nodes.reserve(64); + this->reset_symbols_and_nodes(); + } + + /* Input */ + void accept_token(parse_token_t token); + + /* Indicate if we hit a fatal error */ + bool has_fatal_error(void) const + { + return this->fatal_errored; + } + + /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */ + void reset_symbols(void); + + /* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */ + void reset_symbols_and_nodes(void); + + /* Once parsing is complete, determine the ranges of intermediate nodes */ + void determine_node_ranges(); + + /* Acquire output after parsing. This transfers directly from within self */ + void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors); }; void parse_ll_t::dump_stack(void) const @@ -495,11 +523,11 @@ void parse_ll_t::determine_node_ranges(void) while (idx--) { parse_node_t *parent = &nodes.at(idx); - + // Skip nodes that already have a source range. These are terminal nodes. if (parent->source_start != source_start_invalid) continue; - + // Ok, this node needs a source range. Get all of its children, and then set its range. size_t min_start = source_start_invalid, max_end = 0; //note source_start_invalid is huge for (node_offset_t i=0; i < parent->child_count; i++) @@ -508,8 +536,9 @@ void parse_ll_t::determine_node_ranges(void) min_start = std::min(min_start, child.source_start); max_end = std::max(max_end, child.source_start + child.source_length); } - - if (min_start != source_start_invalid) { + + if (min_start != source_start_invalid) + { assert(max_end >= min_start); parent->source_start = min_start; parent->source_length = max_end - min_start; @@ -517,11 +546,27 @@ void parse_ll_t::determine_node_ranges(void) } } +void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors) +{ + if (output != NULL) + { + std::swap(*output, this->nodes); + } + this->nodes.clear(); + + if (errors != NULL) + { + std::swap(*errors, this->errors); + } + this->errors.clear(); + this->symbol_stack.clear(); +} + void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { //this->dump_stack(); parse_error_t err; - + va_list va; va_start(va, fmt); err.text = vformat_string(fmt, va); @@ -545,19 +590,42 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; } -void parse_ll_t::reset(void) +void parse_ll_t::reset_symbols(void) { - // add a new job_list node and then reset our symbol list to point at it + /* Add a new job_list node, and then reset our symbol list to point at it */ node_offset_t where = nodes.size(); nodes.push_back(parse_node_t(symbol_job_list)); - + symbol_stack.clear(); symbol_stack.push_back(parse_stack_element_t(symbol_job_list, where)); // goal token this->fatal_errored = false; } +/* Reset both symbols and nodes */ +void parse_ll_t::reset_symbols_and_nodes(void) +{ + nodes.clear(); + this->reset_symbols(); +} -bool parse_ll_t::top_node_match_token(parse_token_t token) +static bool type_is_terminal_type(parse_token_type_t type) +{ + switch (type) + { + case parse_token_type_string: + case parse_token_type_pipe: + case parse_token_type_redirection: + case parse_token_type_background: + case parse_token_type_end: + case parse_token_type_terminate: + return true; + + default: + return false; + } +} + +bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) { if (symbol_stack.empty()) { @@ -565,47 +633,64 @@ bool parse_ll_t::top_node_match_token(parse_token_t token) this->fatal_errored = true; return false; } - + PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); - bool result = false; + bool handled = false; parse_stack_element_t &stack_top = symbol_stack.back(); - if (stack_top.type == token.type) + if (type_is_terminal_type(stack_top.type)) { - // So far so good. See if we need a particular keyword. - if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword) + // The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type) + handled = true; + + // Now see if we actually matched + bool matched = false; + if (stack_top.type == token.type) + { + switch (stack_top.type) + { + case parse_token_type_string: + // We matched if the keywords match, or no keyword was required + matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword); + break; + + default: + // For other types, we only require that the types match + matched = true; + break; + } + } + + if (matched) { // Success. Tell the node that it matched this token parse_node_t &node = node_for_top_symbol(); node.source_start = token.source_start; node.source_length = token.source_length; - - // We consumed this symbol - symbol_stack.pop_back(); - result = true; } - else if (token.type == parse_token_type_pipe) + else { - // Pipes are primitive - symbol_stack.pop_back(); - result = true; + // Failure + this->fatal_errored = true; } + + // We handled the token, so pop the symbol stack + symbol_stack.pop_back(); } - return result; + return handled; } -void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) +void parse_ll_t::accept_token(parse_token_t token) { bool logit = false; if (logit) { - const wcstring txt = wcstring(src, token.source_start, token.source_length); fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); } PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); - + bool consumed = false; - + // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. if (token.type == parse_special_type_parse_error || token.type == parse_special_type_tokenizer_error || token.type == parse_special_type_comment) { @@ -619,8 +704,8 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) while (! consumed && ! this->fatal_errored) { PARSE_ASSERT(! symbol_stack.empty()); - - if (top_node_match_token(token)) + + if (top_node_handle_terminal_types(token)) { if (logit) { @@ -629,11 +714,11 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) consumed = true; break; } - + // top_node_match_token may indicate an error if our stack is empty if (this->fatal_errored) break; - + // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); @@ -648,7 +733,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) // Manipulate the symbol stack. // Note that stack_elem is invalidated by popping the stack. symbol_stack_pop_push_production(production); - + // If we end up with an empty stack, something bad happened, like an unbalanced end if (symbol_stack.empty()) { @@ -713,9 +798,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n tok_flags_t tok_options = TOK_SQUASH_ERRORS; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; - + tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); - for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok)) + for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok)) { token_type tok_type = static_cast(tok_last_type(&tok)); const wchar_t *tok_txt = tok_last(&tok); @@ -723,22 +808,22 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n size_t tok_extent = tok_get_extent(&tok); assert(tok_extent < 10000000); //paranoia - parse_token_t token = parse_token_from_tokenizer_token(tok_type); - token.tokenizer_type = tok_type; + parse_token_t token; + token.type = parse_token_type_from_tokenizer_token(tok_type); token.source_start = (size_t)tok_start; token.source_length = tok_extent; token.keyword = keyword_for_token(tok_type, tok_txt); - this->parser->accept_token(token, str); - - if (this->parser->fatal_errored) + this->parser->accept_token(token); + + if (this->parser->has_fatal_error()) { if (parse_flags & parse_flag_continue_after_error) { /* Mark an error and then keep going */ token.type = parse_special_type_parse_error; token.keyword = parse_keyword_none; - this->parser->accept_token(token, str); - this->parser->reset(); + this->parser->accept_token(token); + this->parser->reset_symbols(); } else { @@ -757,19 +842,32 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); #endif - if (output != NULL) - { - output->swap(this->parser->nodes); - this->parser->nodes.clear(); - } + // Acquire the output from the parser + this->parser->acquire_output(output, errors); + + // Indicate if we had a fatal error + return ! this->parser->has_fatal_error(); +} - if (errors != NULL) - { - errors->swap(this->parser->errors); - this->parser->errors.clear(); - } +bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) +{ + // Only strings can have keywords. So if we have a keyword, the type must be a string + assert(keyword == parse_keyword_none || token_type == parse_token_type_string); - return ! this->parser->fatal_errored; + parse_token_t token; + token.type = token_type; + token.keyword = keyword; + token.source_start = -1; + token.source_length = 0; + + this->parser->accept_token(token); + + return ! this->parser->has_fatal_error(); +} + +void parse_t::clear() +{ + this->parser->reset_symbols_and_nodes(); } const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const @@ -781,13 +879,13 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod { result = &this->at(child_offset); } - + // If we are given an expected type, then the node must be null or that type if (result != NULL) { assert(expected_type == token_type_invalid || expected_type == result->type); } - + return result; } diff --git a/parse_tree.h b/parse_tree.h index 25b63a0ca..18e3cffa2 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -36,29 +36,6 @@ struct parse_error_t }; typedef std::vector parse_error_list_t; -enum -{ - parse_flag_none = 0, - - /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ - parse_flag_continue_after_error = 1 << 0, - - /* Include comment tokens */ - parse_flag_include_comments = 1 << 1 -}; -typedef unsigned int parse_tree_flags_t; - -class parse_ll_t; -class parse_t -{ - parse_ll_t * const parser; - -public: - parse_t(); - ~parse_t(); - bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); -}; - enum parse_token_type_t { token_type_invalid, @@ -92,10 +69,10 @@ enum parse_token_type_t symbol_argument_list_nonempty, symbol_argument_list, - + symbol_argument, symbol_redirection, - + symbol_optional_background, // Terminal types @@ -105,12 +82,15 @@ enum parse_token_type_t parse_token_type_background, parse_token_type_end, parse_token_type_terminate, - + // Very special terminal types that don't appear in the production list parse_special_type_parse_error, parse_special_type_tokenizer_error, parse_special_type_comment, - + + FIRST_TERMINAL_TYPE = parse_token_type_string, + LAST_TERMINAL_TYPE = parse_token_type_terminate, + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, FIRST_PARSE_TOKEN_TYPE = parse_token_type_string }; @@ -132,9 +112,46 @@ enum parse_keyword_t parse_keyword_or, parse_keyword_not, parse_keyword_command, - parse_keyword_builtin + parse_keyword_builtin, + + LAST_KEYWORD = parse_keyword_builtin }; + +enum +{ + parse_flag_none = 0, + + /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ + parse_flag_continue_after_error = 1 << 0, + + /* Include comment tokens */ + parse_flag_include_comments = 1 << 1 +}; +typedef unsigned int parse_tree_flags_t; + +class parse_ll_t; +class parse_t +{ + parse_ll_t * const parser; + +public: + parse_t(); + ~parse_t(); + + /* Parse a string */ + bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + + /* Parse a single token */ + bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors); + + /* Reset, ready to parse something else */ + void clear(); + +}; + +wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); + wcstring token_type_description(parse_token_type_t type); wcstring keyword_description(parse_keyword_t type); @@ -158,7 +175,7 @@ public: /* Type-dependent data */ uint32_t tag; - + /* Which production was used */ uint8_t production_idx; @@ -175,7 +192,7 @@ public: PARSE_ASSERT(which < child_count); return child_start + which; } - + bool has_source() const { return source_start != (size_t)(-1); @@ -184,11 +201,11 @@ public: class parse_node_tree_t : public std::vector { - public: - +public: + /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; - + /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; @@ -200,8 +217,8 @@ class parse_node_tree_t : public std::vector # A job_list is a list of jobs, separated by semicolons or newlines job_list = | - job_list | job job_list + job_list # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation @@ -251,9 +268,9 @@ class parse_node_tree_t : public std::vector argument_or_redirection = argument | redirection argument = redirection = - + terminator = | - + optional_background = | */ From 14741518a7fc52f110dcd5ca71216b423520b789 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 6 Oct 2013 16:23:45 -0700 Subject: [PATCH 026/108] Command highlighting now works --- fish_tests.cpp | 17 +++-- highlight.cpp | 168 +++++++++++++++++++++++++++++++++++++----- highlight.h | 4 + parse_productions.cpp | 42 +++++------ parse_tree.cpp | 117 +++++++++++++++++++++++------ parse_tree.h | 25 ++++++- 6 files changed, 299 insertions(+), 74 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 010303191..894408591 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1922,11 +1922,10 @@ static void test_new_parser_fuzzing(void) { parse_t parser; parse_node_tree_t parse_tree; - parse_error_list_t errors; for (size_t i=0; i < len; i++) { const parser_fuzz_token_t &token = tokens[i]; - parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors); + parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL); } // keep going until we wrap @@ -1940,7 +1939,7 @@ static void test_new_parser_fuzzing(void) __attribute__((unused)) static void test_new_parser(void) { - say(L"Testing new parser!"); + say(L"Testing new parser"); const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; parse_t parser; @@ -2050,7 +2049,7 @@ static void test_highlighting(void) if (expected_colors.at(i) != colors.at(i)) { const wcstring spaces(i, L' '); - err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); + err(L"Wrong color at index %lu in text (expected %#x, actual %#x):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str()); } } } @@ -2079,10 +2078,12 @@ int main(int argc, char **argv) reader_init(); env_init(); - //test_new_parser_fuzzing(); - //test_new_parser_correctness(); - //test_highlighting(); - //test_new_parser(); + test_highlighting(); + return 0; + test_new_parser_fuzzing(); + test_new_parser_correctness(); + test_highlighting(); + test_new_parser(); test_format(); test_escape(); diff --git a/highlight.cpp b/highlight.cpp index dc221a2f2..9837d95cc 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -667,18 +667,19 @@ static void highlight_param(const wcstring &buffstr, std::vector &colors, w } } -static int has_expand_reserved(const wchar_t *str) +static bool has_expand_reserved(const wcstring &str) { - while (*str) + bool result = false; + for (size_t i=0; i < str.size(); i++) { - if (*str >= EXPAND_RESERVED && - *str <= EXPAND_RESERVED_END) + wchar_t wc = str.at(i); + if (wc >= EXPAND_RESERVED && wc <= EXPAND_RESERVED_END) { - return 1; + result = true; + break; } - str++; } - return 0; + return result; } /* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ @@ -712,7 +713,7 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command /* Command. First check that the command actually exists. */ wcstring local_cmd = tok_last(&tok); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); - if (! expanded || has_expand_reserved(cmd.c_str())) + if (! expanded || has_expand_reserved(cmd)) { /* We can't expand this cmd, ignore it */ } @@ -1027,7 +1028,7 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const */ cmd = tok_last(&tok); bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (! expanded || has_expand_reserved(cmd.c_str())) + if (! expanded || has_expand_reserved(cmd)) { color.at(tok_get_pos(&tok)) = HIGHLIGHT_ERROR; } @@ -1308,17 +1309,22 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const } } -void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); - -// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { - ASSERT_IS_BACKGROUND_THREAD(); - if (0) + if (1) { highlight_shell_magic(buff, color, pos, error, vars); - return; } + else + { + highlight_shell_classic(buff, color, pos, error, vars); + } +} + +// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + ASSERT_IS_BACKGROUND_THREAD(); const size_t length = buff.size(); assert(buff.size() == color.size()); @@ -1461,6 +1467,7 @@ static void color_node(const parse_node_t &node, int color, std::vector &co std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); } +/* This function is a disaster badly in need of refactoring */ static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) { const size_t buff_len = buffstr.size(); @@ -1743,6 +1750,28 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c } } +// Indicates whether the source range of the given node forms a valid path in the given working_directory +static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, const wcstring &working_directory) +{ + if (! node.has_source()) + return false; + + + /* Get the node source, unescape it, and then pass it to is_potential_path along with the working directory (as a one element list) */ + bool result = false; + wcstring token(src, node.source_start, node.source_length); + if (unescape_string(token, 1)) + { + /* Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. Put it back. */ + if (! token.empty() && token.at(0) == HOME_DIRECTORY) + token.at(0) = L'~'; + + const wcstring_list_t working_directory_list(1, working_directory); + result = is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE); + } + return result; +} + // Color all of the arguments of the given command static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) { @@ -1754,22 +1783,87 @@ static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); param.assign(src, child->source_start, child->source_length); - color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_NORMAL); + color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_PARAM); } } +/* Color all the children of the command with the given type */ static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector &color_array) { for (node_offset_t idx=0; idx < parent.child_count; idx++) { const parse_node_t *child = tree.get_child(parent, idx); - if (child != NULL && child->type == type && child->has_source()) + if (child != NULL && child->type == type) { color_node(*child, color, color_array); } } } +/* Color a possibly decorated command */ +static void color_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &cmd_node, enum parse_statement_decoration_t decoration, std::vector &color_array, const wcstring &working_directory, const env_vars_snapshot_t &vars) +{ + if (! cmd_node.has_source()) + return; + + /* Get the source of the command */ + wcstring cmd(src, cmd_node.source_start, cmd_node.source_length); + + /* Try expanding it. If we cannot, it's an error. */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); + if (! expanded || has_expand_reserved(cmd)) + { + color_node(cmd_node, HIGHLIGHT_ERROR, color_array); + return; + } + + /* Determine which types we check, based on the decoration */ + bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, implicit_cd_ok = true; + if (decoration == parse_statement_decoration_command) + { + builtin_ok = false; + function_ok = false; + abbreviation_ok = false; + command_ok = true; + implicit_cd_ok = false; + } + else if (decoration == parse_statement_decoration_builtin) + { + builtin_ok = true; + function_ok = false; + abbreviation_ok = false; + command_ok = false; + implicit_cd_ok = false; + } + + /* Check them */ + bool is_valid = false; + + /* Builtins */ + if (! is_valid && builtin_ok) + is_valid = builtin_exists(cmd); + + /* Functions */ + if (! is_valid && function_ok) + is_valid = function_exists_no_autoload(cmd, vars); + + /* Abbreviations */ + if (! is_valid && abbreviation_ok) + is_valid = expand_abbreviation(cmd, NULL); + + /* Regular commands */ + if (! is_valid && command_ok) + is_valid = path_get_path(cmd, NULL, vars); + + /* Implicit cd */ + if (! is_valid && implicit_cd_ok) + is_valid = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars); + + /* Color the node */ + int color = is_valid ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR; + color_node(cmd_node, color, color_array); +} + void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { ASSERT_IS_BACKGROUND_THREAD(); @@ -1780,7 +1874,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t if (length == 0) return; - std::fill(color.begin(), color.end(), -1); + std::fill(color.begin(), color.end(), 0); /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ const wcstring working_directory = env_get_pwd_slash(); @@ -1790,6 +1884,11 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t parse_t parser; parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); +#if 0 + const wcstring dump = parse_dump_tree(parse_tree, buff); + fprintf(stderr, "%ls\n", dump.c_str()); +#endif + /* Walk the node tree */ for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) { @@ -1837,7 +1936,12 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_plain_statement: { // Color the command - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + const parse_node_t *cmd = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd != NULL) + { + enum parse_statement_decoration_t decoration = static_cast(node.tag); + color_command(buff, parse_tree, *cmd, decoration, color, working_directory, vars); + } // Color arguments const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); @@ -1867,6 +1971,32 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t break; } } + + if (pos <= buff.size()) + { + /* If the cursor is over an argument, and that argument is a valid path, underline it */ + for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) + { + const parse_node_t &node = *iter; + /* See if this node contains the cursor */ + if (node.type == symbol_argument && node.source_contains_location(pos)) + { + /* See if this is a valid path */ + if (node_is_potential_path(buff, node, working_directory)) + { + /* It is, underline it. */ + for (size_t i=node.source_start; i < node.source_start + node.source_length; i++) + { + /* Don't color HIGHLIGHT_ERROR because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red. */ + if (! (color.at(i) & HIGHLIGHT_ERROR)) + { + color.at(i) |= HIGHLIGHT_VALID_PATH; + } + } + } + } + } + } } /** diff --git a/highlight.h b/highlight.h index ea8557918..eb123258c 100644 --- a/highlight.h +++ b/highlight.h @@ -134,5 +134,9 @@ enum typedef unsigned int path_flags_t; bool is_potential_path(const wcstring &const_path, const wcstring_list_t &directories, path_flags_t flags, wcstring *out_path = NULL); +/* For testing */ +void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); + #endif diff --git a/parse_productions.cpp b/parse_productions.cpp index 3165a2f0d..4876ba58e 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -30,7 +30,7 @@ static bool production_is_valid(const production_options_t production_list, prod #define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) #define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } -#define PRODUCE_KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) +#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) /* A job_list is a list of jobs, separated by semicolons or newlines */ @@ -167,20 +167,20 @@ RESOLVE(statement) PRODUCTIONS(if_statement) = { - {symbol_if_clause, symbol_else_clause, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(if_statement) PRODUCTIONS(if_clause) = { - { PRODUCE_KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } + { KEYWORD(parse_keyword_if), symbol_job, parse_token_type_end, symbol_job_list } }; RESOLVE_ONLY(if_clause) PRODUCTIONS(else_clause) = { { }, - { PRODUCE_KEYWORD(parse_keyword_else), symbol_else_continuation } + { KEYWORD(parse_keyword_else), symbol_else_continuation } }; RESOLVE(else_clause) { @@ -211,7 +211,7 @@ RESOLVE(else_continuation) PRODUCTIONS(switch_statement) = { - { PRODUCE_KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, PRODUCE_KEYWORD(parse_keyword_end)} + { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)} }; RESOLVE_ONLY(switch_statement) @@ -230,7 +230,7 @@ RESOLVE(case_item_list) PRODUCTIONS(case_item) = { - {PRODUCE_KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} + {KEYWORD(parse_keyword_case), symbol_argument_list, parse_token_type_end, symbol_job_list} }; RESOLVE_ONLY(case_item) @@ -258,7 +258,7 @@ RESOLVE(argument_list) PRODUCTIONS(block_statement) = { - {symbol_block_header, parse_token_type_end, symbol_job_list, PRODUCE_KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(block_statement) @@ -290,34 +290,34 @@ RESOLVE(block_header) PRODUCTIONS(for_header) = { - {PRODUCE_KEYWORD(parse_keyword_for), parse_token_type_string, PRODUCE_KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} + {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(for_header) PRODUCTIONS(while_header) = { - {PRODUCE_KEYWORD(parse_keyword_while), symbol_statement} + {KEYWORD(parse_keyword_while), symbol_statement} }; RESOLVE_ONLY(while_header) PRODUCTIONS(begin_header) = { - {PRODUCE_KEYWORD(parse_keyword_begin)} + {KEYWORD(parse_keyword_begin)} }; RESOLVE_ONLY(begin_header) PRODUCTIONS(function_header) = { - {PRODUCE_KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} + {KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} }; RESOLVE_ONLY(function_header) /* A boolean statement is AND or OR or NOT */ PRODUCTIONS(boolean_statement) = { - {PRODUCE_KEYWORD(parse_keyword_and), symbol_statement}, - {PRODUCE_KEYWORD(parse_keyword_or), symbol_statement}, - {PRODUCE_KEYWORD(parse_keyword_not), symbol_statement} + {KEYWORD(parse_keyword_and), symbol_statement}, + {KEYWORD(parse_keyword_or), symbol_statement}, + {KEYWORD(parse_keyword_not), symbol_statement} }; RESOLVE(boolean_statement) { @@ -336,19 +336,19 @@ RESOLVE(boolean_statement) PRODUCTIONS(decorated_statement) = { - {PRODUCE_KEYWORD(parse_keyword_command), symbol_plain_statement}, - {PRODUCE_KEYWORD(parse_keyword_builtin), symbol_plain_statement}, - {symbol_plain_statement} + {symbol_plain_statement}, + {KEYWORD(parse_keyword_command), symbol_plain_statement}, + {KEYWORD(parse_keyword_builtin), symbol_plain_statement}, }; RESOLVE(decorated_statement) { switch (token_keyword) { - case parse_keyword_command: - return 0; - case parse_keyword_builtin: - return 1; default: + return 0; + case parse_keyword_command: + return 1; + case parse_keyword_builtin: return 2; } } diff --git a/parse_tree.cpp b/parse_tree.cpp index b64c6d9f3..698297160 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -339,9 +339,12 @@ class parse_ll_t /* Whether we ran into a fatal error, including parse errors or tokenizer errors */ bool fatal_errored; + /* Whether we should collect error messages or not */ + bool should_generate_error_messages; + /* List of errors we have encountered */ parse_error_list_t errors; - + /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */ bool top_node_handle_terminal_types(parse_token_t token); @@ -453,7 +456,7 @@ class parse_ll_t public: /* Constructor */ - parse_ll_t() : fatal_errored(false) + parse_ll_t() : fatal_errored(false), should_generate_error_messages(true) { this->symbol_stack.reserve(16); this->nodes.reserve(64); @@ -469,6 +472,12 @@ class parse_ll_t return this->fatal_errored; } + /* Indicate whether we want to generate error messages */ + void set_should_generate_error_messages(bool flag) + { + this->should_generate_error_messages = flag; + } + /* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */ void reset_symbols(void); @@ -564,30 +573,36 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { - //this->dump_stack(); - parse_error_t err; - - va_list va; - va_start(va, fmt); - err.text = vformat_string(fmt, va); - va_end(va); - - err.source_start = token.source_start; - err.source_length = token.source_length; - this->errors.push_back(err); this->fatal_errored = true; + if (this->should_generate_error_messages) + { + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + + err.source_start = token.source_start; + err.source_length = token.source_length; + this->errors.push_back(err); + } } void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { - wcstring desc = token_type_description(token.type); - parse_error_t error; - error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); - error.source_start = token.source_start; - error.source_start = token.source_length; - errors.push_back(error); fatal_errored = true; + if (this->should_generate_error_messages) + { + wcstring desc = token_type_description(token.type); + parse_error_t error; + error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); + error.source_start = token.source_start; + error.source_start = token.source_length; + errors.push_back(error); + } } void parse_ll_t::reset_symbols(void) @@ -725,7 +740,14 @@ void parse_ll_t::accept_token(parse_token_t token) const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */); if (production == NULL) { - this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + if (should_generate_error_messages) + { + this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + } + else + { + this->parse_error(token, NULL); + } // parse_error sets fatal_errored, which ends the loop } else @@ -737,7 +759,7 @@ void parse_ll_t::accept_token(parse_token_t token) // If we end up with an empty stack, something bad happened, like an unbalanced end if (symbol_stack.empty()) { - this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?", stack_elem.describe().c_str(), token.describe().c_str()); + this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); } } } @@ -793,11 +815,46 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } +// Set type-specific tags for nodes +// This is not in parse_ll_t because it knows about different node types +static void tag_nodes(const wcstring &src, parse_node_tree_t *tree) +{ + size_t count = tree->size(); + for (size_t i=0; i < count; i++) + { + const parse_node_t &node = tree->at(i); + switch (node.type) + { + case symbol_decorated_statement: + { + // Set a tag on the plain statement to indicate the decoration type + // The decoration types matches the production + bool is_decorated = (node.production_idx > 0); + + // Get the plain statement and set the tag equal to the production index we used + // This is an enum parse_statement_decoration_t + node_offset_t statement_idx = (is_decorated ? 1 : 0); + parse_node_t *plain_statement = tree->get_child(node, statement_idx, symbol_plain_statement); + if (plain_statement != NULL) + { + plain_statement->tag = static_cast(node.production_idx); + } + } + break; + + default: + break; + } + } +} + bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { tok_flags_t tok_options = TOK_SQUASH_ERRORS; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; + + this->parser->set_should_generate_error_messages(errors != NULL); tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok)) @@ -835,7 +892,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Teach each node where its source range is this->parser->determine_node_ranges(); - + + // Tag nodes + #if 0 wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); @@ -845,6 +904,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Acquire the output from the parser this->parser->acquire_output(output, errors); + // Set node tags + tag_nodes(str, output); + // Indicate if we had a fatal error return ! this->parser->has_fatal_error(); } @@ -859,6 +921,9 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo token.keyword = keyword; token.source_start = -1; token.source_length = 0; + + bool wants_errors = (errors != NULL); + this->parser->set_should_generate_error_messages(wants_errors); this->parser->accept_token(token); @@ -889,6 +954,14 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod return result; } +/* Hackish non-const version of get_child */ +parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) +{ + const parse_node_tree_t *const_this = this; + const parse_node_t *result = const_this->get_child(parent, which, expected_type); + return const_cast(result); +} + static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) { if (parent.type == type) result->push_back(&parent); diff --git a/parse_tree.h b/parse_tree.h index 18e3cffa2..f577a7def 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -124,7 +124,7 @@ enum /* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */ parse_flag_continue_after_error = 1 << 0, - + /* Include comment tokens */ parse_flag_include_comments = 1 << 1 }; @@ -155,7 +155,7 @@ wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); wcstring token_type_description(parse_token_type_t type); wcstring keyword_description(parse_keyword_t type); -/** Base class for nodes of a parse tree */ +/** Class for nodes of a parse tree */ class parse_node_t { public: @@ -193,24 +193,40 @@ public: return child_start + which; } + /* Indicate if this node has a range of source code associated with it */ bool has_source() const { return source_start != (size_t)(-1); } + + /* Indicate if this node's source range contains a given location. The funny math makes this modulo-overflow safe, though overflow is not expected. */ + bool source_contains_location(size_t where) const + { + return this->has_source() && where >= source_start && where - source_start < source_length; + } }; +/* The parse tree itself */ class parse_node_tree_t : public std::vector { public: /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid); /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; }; +/* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */ +enum parse_statement_decoration_t +{ + parse_statement_decoration_none, + parse_statement_decoration_command, + parse_statement_decoration_builtin +}; /* Fish grammar: @@ -259,9 +275,10 @@ public: boolean_statement = AND statement | OR statement | NOT statement # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" +# The tag of a plain statement indicates which mode to use - decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement - plain_statement = COMMAND arguments_or_redirections_list optional_background + decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement + plain_statement = arguments_or_redirections_list optional_background arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list From 20ccda69f4319cadbfb242f139e48a84699b503d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 7 Oct 2013 01:04:37 -0700 Subject: [PATCH 027/108] Command highlighting works --- highlight.cpp | 69 +++++++++++++++------------ parse_exec.cpp | 2 +- parse_productions.cpp | 9 +--- parse_tree.cpp | 105 +++++++++++++----------------------------- parse_tree.h | 38 +++++++++------ 5 files changed, 98 insertions(+), 125 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 9837d95cc..a8e8326e1 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1800,23 +1800,9 @@ static void color_children(const parse_node_tree_t &tree, const parse_node_t &pa } } -/* Color a possibly decorated command */ -static void color_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &cmd_node, enum parse_statement_decoration_t decoration, std::vector &color_array, const wcstring &working_directory, const env_vars_snapshot_t &vars) +/* Determine if a command is valid */ +static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration, const wcstring &working_directory, const env_vars_snapshot_t &vars) { - if (! cmd_node.has_source()) - return; - - /* Get the source of the command */ - wcstring cmd(src, cmd_node.source_start, cmd_node.source_length); - - /* Try expanding it. If we cannot, it's an error. */ - bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); - if (! expanded || has_expand_reserved(cmd)) - { - color_node(cmd_node, HIGHLIGHT_ERROR, color_array); - return; - } - /* Determine which types we check, based on the decoration */ bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, implicit_cd_ok = true; if (decoration == parse_statement_decoration_command) @@ -1859,9 +1845,8 @@ static void color_command(const wcstring &src, const parse_node_tree_t &tree, co if (! is_valid && implicit_cd_ok) is_valid = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars); - /* Color the node */ - int color = is_valid ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR; - color_node(cmd_node, color, color_array); + /* Return what we got */ + return is_valid; } void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) @@ -1874,6 +1859,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t if (length == 0) return; + /* Start out at zero */ std::fill(color.begin(), color.end(), 0); /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ @@ -1925,25 +1911,45 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t break; case symbol_redirection: + { color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); - break; + } + break; case parse_token_type_background: case parse_token_type_end: + { color_node(node, HIGHLIGHT_END, color); - break; + } + break; case symbol_plain_statement: { - // Color the command - const parse_node_t *cmd = parse_tree.get_child(node, 0, parse_token_type_string); - if (cmd != NULL) + // Get the decoration from the parent + enum parse_statement_decoration_t decoration = parse_statement_decoration_none; + const parse_node_t *decorated_statement = parse_tree.get_parent(node, symbol_decorated_statement); + if (decorated_statement != NULL) { - enum parse_statement_decoration_t decoration = static_cast(node.tag); - color_command(buff, parse_tree, *cmd, decoration, color, working_directory, vars); + decoration = static_cast(decorated_statement->production_idx); } - // Color arguments + /* Color the command */ + const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + bool is_valid_cmd = false; + wcstring cmd(buff, cmd_node->source_start, cmd_node->source_length); + + /* Try expanding it. If we cannot, it's an error. */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS); + if (expanded && ! has_expand_reserved(cmd)) + { + is_valid_cmd = command_is_valid(cmd, decoration, working_directory, vars); + } + color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR, color); + } + + /* Color arguments */ const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); if (arguments != NULL) { @@ -1978,8 +1984,13 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) { const parse_node_t &node = *iter; - /* See if this node contains the cursor */ - if (node.type == symbol_argument && node.source_contains_location(pos)) + + /* Must be an argument with source */ + if (node.type != symbol_argument || ! node.has_source()) + continue; + + /* See if this node contains the cursor. We check <= source_length so that, when backspacing (and the cursor is just beyond the last token), we may still underline it */ + if (pos >= node.source_start && pos - node.source_start <= node.source_length) { /* See if this is a valid path */ if (node_is_potential_path(buff, node, working_directory)) diff --git a/parse_exec.cpp b/parse_exec.cpp index 3f2074f4a..c424ad957 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -259,7 +259,7 @@ class parse_exec_t for (;;) { const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_argument_list || node.type == symbol_argument_list_nonempty); + PARSE_ASSERT(node.type == symbol_argument_list); if (node.type == symbol_argument_list) { // argument list, may be empty diff --git a/parse_productions.cpp b/parse_productions.cpp index 4876ba58e..0900977f7 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -234,16 +234,10 @@ PRODUCTIONS(case_item) = }; RESOLVE_ONLY(case_item) -PRODUCTIONS(argument_list_nonempty) = -{ - {parse_token_type_string, symbol_argument_list} -}; -RESOLVE_ONLY(argument_list_nonempty) - PRODUCTIONS(argument_list) = { {}, - {symbol_argument_list_nonempty} + {symbol_argument, symbol_argument_list} }; RESOLVE(argument_list) { @@ -451,7 +445,6 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n TEST(decorated_statement) TEST(case_item_list) TEST(case_item) - TEST(argument_list_nonempty) TEST(argument_list) TEST(block_header) TEST(for_header) diff --git a/parse_tree.cpp b/parse_tree.cpp index 698297160..0a85a1d95 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -88,8 +88,6 @@ wcstring token_type_description(parse_token_type_t type) case symbol_case_item: return L"case_item"; - case symbol_argument_list_nonempty: - return L"argument_list_nonempty"; case symbol_argument_list: return L"argument_list"; @@ -369,24 +367,6 @@ class parse_ll_t return symbol_stack.back().type; } - void top_node_set_tag(uint32_t tag) - { - this->node_for_top_symbol().tag = tag; - } - - inline void add_child_to_node(size_t parent_node_idx, parse_stack_element_t *tok) - { - PARSE_ASSERT(tok->type != token_type_invalid); - tok->node_idx = nodes.size(); - nodes.push_back(parse_node_t(tok->type)); - nodes.at(parent_node_idx).child_count += 1; - } - - inline void symbol_stack_pop() - { - symbol_stack.pop_back(); - } - // Pop from the top of the symbol stack, then push the given production, updating node counts. Note that production_t has type "pointer to array" so some care is required. inline void symbol_stack_pop_push_production(const production_t *production) { @@ -408,7 +388,9 @@ class parse_ll_t } if (! count) fprintf(stderr, "\t\n"); } - + + // Get the parent index. But we can't get the parent parse node yet, since it may be made invalid by adding children + const size_t parent_node_idx = symbol_stack.back().node_idx; // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) const size_t child_start = nodes.size(); @@ -425,13 +407,14 @@ class parse_ll_t { // Generate the parse node. Note that this push_back may invalidate node. parse_token_type_t child_type = production_element_type(elem); - nodes.push_back(parse_node_t(child_type)); + parse_node_t child = parse_node_t(child_type); + child.parent = parent_node_idx; + nodes.push_back(child); child_count++; } } // Update the parent - const size_t parent_node_idx = symbol_stack.back().node_idx; parse_node_t &parent_node = nodes.at(parent_node_idx); // Should have no children yet @@ -815,39 +798,6 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -// Set type-specific tags for nodes -// This is not in parse_ll_t because it knows about different node types -static void tag_nodes(const wcstring &src, parse_node_tree_t *tree) -{ - size_t count = tree->size(); - for (size_t i=0; i < count; i++) - { - const parse_node_t &node = tree->at(i); - switch (node.type) - { - case symbol_decorated_statement: - { - // Set a tag on the plain statement to indicate the decoration type - // The decoration types matches the production - bool is_decorated = (node.production_idx > 0); - - // Get the plain statement and set the tag equal to the production index we used - // This is an enum parse_statement_decoration_t - node_offset_t statement_idx = (is_decorated ? 1 : 0); - parse_node_t *plain_statement = tree->get_child(node, statement_idx, symbol_plain_statement); - if (plain_statement != NULL) - { - plain_statement->tag = static_cast(node.production_idx); - } - } - break; - - default: - break; - } - } -} - bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { tok_flags_t tok_options = TOK_SQUASH_ERRORS; @@ -904,9 +854,6 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Acquire the output from the parser this->parser->acquire_output(output, errors); - // Set node tags - tag_nodes(str, output); - // Indicate if we had a fatal error return ! this->parser->has_fatal_error(); } @@ -938,28 +885,38 @@ void parse_t::clear() const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const { const parse_node_t *result = NULL; - PARSE_ASSERT(which < parent.child_count); - node_offset_t child_offset = parent.child_offset(which); - if (child_offset < this->size()) + + /* We may get nodes with no children if we had an imcomplete parse. Don't consider than an error */ + if (parent.child_count > 0) { - result = &this->at(child_offset); - } - - // If we are given an expected type, then the node must be null or that type - if (result != NULL) - { - assert(expected_type == token_type_invalid || expected_type == result->type); + PARSE_ASSERT(which < parent.child_count); + node_offset_t child_offset = parent.child_offset(which); + if (child_offset < this->size()) + { + result = &this->at(child_offset); + + /* If we are given an expected type, then the node must be null or that type */ + assert(expected_type == token_type_invalid || expected_type == result->type); + } } return result; } -/* Hackish non-const version of get_child */ -parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) +const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, parse_token_type_t expected_type) const { - const parse_node_tree_t *const_this = this; - const parse_node_t *result = const_this->get_child(parent, which, expected_type); - return const_cast(result); + const parse_node_t *result = NULL; + if (node.parent != NODE_OFFSET_INVALID) + { + PARSE_ASSERT(node.parent < this->size()); + const parse_node_t &parent = this->at(node.parent); + if (expected_type == token_type_invalid || expected_type == parent.type) + { + // The type matches (or no type was requested) + result = &parent; + } + } + return result; } static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) diff --git a/parse_tree.h b/parse_tree.h index f577a7def..6fcbde0dc 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -67,7 +67,6 @@ enum parse_token_type_t symbol_arguments_or_redirections_list, symbol_argument_or_redirection, - symbol_argument_list_nonempty, symbol_argument_list, symbol_argument, @@ -168,6 +167,9 @@ public: /* Length of our range in the source code */ size_t source_length; + + /* Parent */ + node_offset_t parent; /* Children */ node_offset_t child_start; @@ -183,7 +185,7 @@ public: wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), child_start(0), child_count(0), tag(0) + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), tag(0) { } @@ -198,12 +200,6 @@ public: { return source_start != (size_t)(-1); } - - /* Indicate if this node's source range contains a given location. The funny math makes this modulo-overflow safe, though overflow is not expected. */ - bool source_contains_location(size_t where) const - { - return this->has_source() && where >= source_start && where - source_start < source_length; - } }; /* The parse tree itself */ @@ -212,14 +208,20 @@ class parse_node_tree_t : public std::vector public: /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ - const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; - parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid); + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + + /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ + const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; + /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; }; + +/* Node type specific data, stored in the tag field */ + /* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */ enum parse_statement_decoration_t { @@ -228,6 +230,16 @@ enum parse_statement_decoration_t parse_statement_decoration_builtin }; +/* Argument flags as a bitmask, stored in the tag of argument */ +enum parse_argument_flags_t +{ + /* Indicates that this or a prior argument was --, so this should not be treated as an option */ + parse_argument_no_options = 1 << 0, + + /* Indicates that the argument is for a cd command */ + parse_argument_is_for_cd = 1 << 1 +}; + /* Fish grammar: # A job_list is a list of jobs, separated by semicolons or newlines @@ -260,9 +272,6 @@ enum parse_statement_decoration_t case_item case_item_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list - argument_list_nonempty = argument_list - argument_list = | argument_list_nonempty - block_statement = block_header job_list arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list @@ -280,6 +289,9 @@ enum parse_statement_decoration_t decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement plain_statement = arguments_or_redirections_list optional_background + argument_list = | argument argument_list + + arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list argument_or_redirection = argument | redirection From 4f718e83b343cd2cf49c801968dd36cbce84a772 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 7 Oct 2013 03:56:09 -0700 Subject: [PATCH 028/108] Syntax highlighting now correctly handles cd --- fish_tests.cpp | 27 +++++++++++++++- highlight.cpp | 83 ++++++++++++++++++++++++++++++++++++-------------- parse_tree.cpp | 14 +++++++++ parse_tree.h | 3 ++ 4 files changed, 103 insertions(+), 24 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 894408591..bc631bf32 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2003,7 +2003,32 @@ static void test_highlighting(void) {NULL, -1} }; - const highlight_component_t *tests[] = {components1, components2, components3}; + /* Verify that cd shows errors for non-directories */ + const highlight_component_t components4[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH}, + {NULL, -1} + }; + + const highlight_component_t components5[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"/tmp/fish_highlight_test/foo", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + const highlight_component_t components6[] = + { + {L"cd", HIGHLIGHT_COMMAND}, + {L"--help", HIGHLIGHT_PARAM}, + {L"-h", HIGHLIGHT_PARAM}, + {L"definitely_not_a_directory", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + + + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; diff --git a/highlight.cpp b/highlight.cpp index a8e8326e1..71dba3dcf 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1467,7 +1467,7 @@ static void color_node(const parse_node_t &node, int color, std::vector &co std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); } -/* This function is a disaster badly in need of refactoring */ +/* This function is a disaster badly in need of refactoring. However, note that it does NOT do any I/O */ static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) { const size_t buff_len = buffstr.size(); @@ -1772,10 +1772,45 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node return result; } -// Color all of the arguments of the given command -static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector &color_array) +// Gets the expanded command from a plain statement node +static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) { - const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(parent, symbol_argument); + assert(plain_statement.type == symbol_plain_statement); + bool result = false; + + // Get the command + const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); + + /* Try expanding it. If we cannot, it's an error. */ + if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + /* Success, return the expanded string by reference */ + std::swap(cmd, *out_cmd); + result = true; + } + } + return result; +} + +// Color all of the arguments of the given command +static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &list_node, const wcstring &working_directory, std::vector &color_array) +{ + /* Hack: determine whether the parent is the cd command. */ + bool cmd_is_cd = false; + const parse_node_t *parent = tree.get_parent(list_node, symbol_plain_statement); + if (parent != NULL) + { + wcstring cmd_str; + if (plain_statement_get_expanded_command(src, tree, *parent, &cmd_str)) + { + cmd_is_cd = (cmd_str == L"cd"); + } + } + + const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(list_node, symbol_argument); wcstring param; for (node_offset_t i=0; i < nodes.size(); i++) @@ -1784,6 +1819,19 @@ static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, assert(child != NULL && child->type == symbol_argument); param.assign(src, child->source_start, child->source_length); color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_PARAM); + + if (cmd_is_cd) + { + /* Mark this as an error if it's not 'help' and not a valid cd path */ + if (expand_one(param, EXPAND_SKIP_CMDSUBST)) + { + bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); + if (!is_help && ! is_potential_cd_path(param, working_directory, PATH_EXPAND_TILDE, NULL)) + { + color_node(*child, HIGHLIGHT_ERROR, color_array); + } + } + } } } @@ -1893,20 +1941,10 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_switch_statement: case symbol_boolean_statement: case symbol_decorated_statement: - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); - break; - case symbol_if_statement: { // Color the 'end' color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); - - // Color arguments and redirections - const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list); - if (arguments != NULL) - { - color_arguments(buff, parse_tree, *arguments, color); - } } break; @@ -1948,21 +1986,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t } color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR, color); } - - /* Color arguments */ - const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list); - if (arguments != NULL) - { - color_arguments(buff, parse_tree, *arguments, color); - } } break; case symbol_arguments_or_redirections_list: case symbol_argument_list: - /* Nothing, these are handled by their parents */ - break; + { + /* Only work on root lists, so that we don't re-color child lists */ + if (parse_tree.argument_list_is_root(node)) + { + color_arguments(buff, parse_tree, node, working_directory, color); + } + } + break; case parse_special_type_parse_error: case parse_special_type_tokenizer_error: diff --git a/parse_tree.cpp b/parse_tree.cpp index 0a85a1d95..5baef1c01 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -936,3 +936,17 @@ parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_n find_nodes_recursive(*this, parent, type, &result); return result; } + + +bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const +{ + bool result = true; + assert(node.type == symbol_argument_list || node.type == symbol_arguments_or_redirections_list); + const parse_node_t *parent = this->get_parent(node); + if (parent != NULL) + { + /* We have a parent - check to make sure it's not another list! */ + result = parent->type != symbol_arguments_or_redirections_list && parent->type != symbol_argument_list; + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index 6fcbde0dc..0355117fc 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -217,6 +217,9 @@ public: /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; + + /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ + bool argument_list_is_root(const parse_node_t &node) const; }; From c6eef166646b17e409dda043dee311ad074ff4c9 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 8 Oct 2013 15:05:30 -0700 Subject: [PATCH 029/108] Autosuggestion adoption of new parser --- highlight.cpp | 208 ++++++++++++++----------------------------------- history.h | 1 + parse_tree.cpp | 42 ++++++++++ parse_tree.h | 14 +++- 4 files changed, 113 insertions(+), 152 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 71dba3dcf..5080367cd 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -329,6 +329,30 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d return result; } +/* Given a plain statement node in a parse tree, get the command and return it, expanded appropriately for commands. If we succeed, return true. */ +static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) +{ + assert(plain_statement.type == symbol_plain_statement); + bool result = false; + + // Get the command + const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); + + /* Try expanding it. If we cannot, it's an error. */ + if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + /* Success, return the expanded string by reference */ + std::swap(cmd, *out_cmd); + result = true; + } + } + return result; +} + + rgb_color_t highlight_get_color(int highlight, bool is_background) { size_t idx=0; @@ -683,124 +707,33 @@ static bool has_expand_reserved(const wcstring &str) } /* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ -static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command, wcstring_list_t *out_arguments, int *out_last_arg_pos) +static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, const parse_node_t **out_last_arg) { - if (str.empty()) - return false; - - wcstring cmd; - wcstring_list_t args; - int arg_pos = -1; - - bool had_cmd = false; - tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - for (; tok_has_next(&tok); tok_next(&tok)) + bool result = false; + + /* Parse the buffer */ + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(buff, parse_flag_continue_after_error, &parse_tree, NULL); + + /* Find the last statement */ + const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); + if (last_statement != NULL) { - int last_type = tok_last_type(&tok); - - switch (last_type) + if (plain_statement_get_expanded_command(buff, parse_tree, *last_statement, out_expanded_command)) { - case TOK_STRING: - { - if (had_cmd) - { - /* Parameter to the command. We store these escaped. */ - args.push_back(tok_last(&tok)); - arg_pos = tok_get_pos(&tok); - } - else - { - /* Command. First check that the command actually exists. */ - wcstring local_cmd = tok_last(&tok); - bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); - if (! expanded || has_expand_reserved(cmd)) - { - /* We can't expand this cmd, ignore it */ - } - else - { - bool is_subcommand = false; - int mark = tok_get_pos(&tok); - - if (parser_keywords_is_subcommand(cmd)) - { - int sw; - tok_next(&tok); - - sw = parser_keywords_is_switch(tok_last(&tok)); - if (!parser_keywords_is_block(cmd) && - sw == ARG_SWITCH) - { - /* It's an argument to the subcommand itself */ - } - else - { - if (sw == ARG_SKIP) - mark = tok_get_pos(&tok); - is_subcommand = true; - } - tok_set_pos(&tok, mark); - } - - if (!is_subcommand) - { - /* It's really a command */ - had_cmd = true; - cmd = local_cmd; - } - } - - } - break; - } - - case TOK_REDIRECT_NOCLOB: - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - { - if (!had_cmd) - { - break; - } - tok_next(&tok); - break; - } - - case TOK_PIPE: - case TOK_BACKGROUND: - case TOK_END: - { - had_cmd = false; - cmd.clear(); - args.clear(); - arg_pos = -1; - break; - } - - case TOK_COMMENT: - case TOK_ERROR: - default: - { - break; - } + /* We got it */ + result = true; + + /* Find the last argument */ + *out_last_arg = parse_tree.find_last_node_of_type(symbol_plain_statement, last_statement); } } - - /* Remember our command if we have one */ - if (had_cmd) - { - if (out_command) out_command->swap(cmd); - if (out_arguments) out_arguments->swap(args); - if (out_last_arg_pos) *out_last_arg_pos = arg_pos; - } - return had_cmd; + return result; } - /* We have to return an escaped string here */ -bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &outSuggestion) +bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_directory, wcstring &out_suggestion) { if (str.empty()) return false; @@ -809,23 +742,20 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di /* Parse the string */ wcstring parsed_command; - wcstring_list_t parsed_arguments; - int parsed_last_arg_pos = -1; - if (! autosuggest_parse_command(str, &parsed_command, &parsed_arguments, &parsed_last_arg_pos)) - { + const parse_node_t *last_arg_node = NULL; + if (! autosuggest_parse_command(str, &parsed_command, &last_arg_node)) return false; - } bool result = false; - if (parsed_command == L"cd" && ! parsed_arguments.empty()) + if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) { /* We can possibly handle this specially */ - const wcstring escaped_dir = parsed_arguments.back(); + const wcstring escaped_dir = last_arg_node->get_source(str); wcstring suggested_path; /* We always return true because we recognized the command. This prevents us from falling back to dumber algorithms; for example we won't suggest a non-directory for the cd command. */ result = true; - outSuggestion.clear(); + out_suggestion.clear(); /* Unescape the parameter */ wcstring unescaped_dir = escaped_dir; @@ -844,11 +774,11 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di wcstring escaped_suggested_path = parse_util_escape_string_with_quote(suggested_path, quote); /* Return it */ - outSuggestion = str; - outSuggestion.erase(parsed_last_arg_pos); - if (quote != L'\0') outSuggestion.push_back(quote); - outSuggestion.append(escaped_suggested_path); - if (quote != L'\0') outSuggestion.push_back(quote); + out_suggestion = str; + out_suggestion.erase(last_arg_node->source_start); + if (quote != L'\0') out_suggestion.push_back(quote); + out_suggestion.append(escaped_suggested_path); + if (quote != L'\0') out_suggestion.push_back(quote); } } else @@ -866,15 +796,14 @@ bool autosuggest_validate_from_history(const history_item_t &item, file_detectio /* Parse the string */ wcstring parsed_command; - wcstring_list_t parsed_arguments; - int parsed_last_arg_pos = -1; - if (! autosuggest_parse_command(item.str(), &parsed_command, &parsed_arguments, &parsed_last_arg_pos)) + const parse_node_t *last_arg_node = NULL; + if (! autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node)) return false; - if (parsed_command == L"cd" && ! parsed_arguments.empty()) + if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) { /* We can possibly handle this specially */ - wcstring dir = parsed_arguments.back(); + wcstring dir = last_arg_node->get_source(item.str()); if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { handled = true; @@ -1772,29 +1701,6 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node return result; } -// Gets the expanded command from a plain statement node -static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) -{ - assert(plain_statement.type == symbol_plain_statement); - bool result = false; - - // Get the command - const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); - if (cmd_node != NULL && cmd_node->has_source()) - { - wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); - - /* Try expanding it. If we cannot, it's an error. */ - if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) - { - /* Success, return the expanded string by reference */ - std::swap(cmd, *out_cmd); - result = true; - } - } - return result; -} - // Color all of the arguments of the given command static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &list_node, const wcstring &working_directory, std::vector &color_array) { diff --git a/history.h b/history.h index a19c88440..b9cfc85b0 100644 --- a/history.h +++ b/history.h @@ -61,6 +61,7 @@ public: { return contents; } + bool empty() const { return contents.empty(); diff --git a/parse_tree.cpp b/parse_tree.cpp index 5baef1c01..30ee6856b 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -937,6 +937,48 @@ parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_n return result; } +/* Return true if the given node has the proposed ancestor as an ancestor (or is itself that ancestor) */ +static bool node_has_ancestor(const parse_node_tree_t &tree, const parse_node_t &node, const parse_node_t &proposed_ancestor) +{ + if (&node == &proposed_ancestor) + { + /* Found it */ + return true; + } + else if (node.parent == NODE_OFFSET_INVALID) + { + /* No more parents */ + return false; + } + else + { + /* Recurse to the parent */ + return node_has_ancestor(tree, tree.at(node.parent), proposed_ancestor); + } +} + +const parse_node_t *parse_node_tree_t::find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent) const +{ + const parse_node_t *result = NULL; + // Find nodes of the given type in the tree, working backwards + size_t idx = this->size(); + while (idx--) + { + const parse_node_t &node = this->at(idx); + if (node.type == type) + { + // Types match. Check if it has the right parent + if (parent == NULL || node_has_ancestor(*this, node, *parent)) + { + // Success + result = &node; + break; + } + } + } + return result; +} + bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const { diff --git a/parse_tree.h b/parse_tree.h index 0355117fc..b2059914c 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -200,6 +200,15 @@ public: { return source_start != (size_t)(-1); } + + /* Gets source for the node, or the empty string if it has no source */ + wcstring get_source(const wcstring &str) const + { + if (! has_source()) + return wcstring(); + else + return wcstring(str, this->source_start, this->source_length); + } }; /* The parse tree itself */ @@ -218,6 +227,9 @@ public: typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; + /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ + const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; + /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ bool argument_list_is_root(const parse_node_t &node) const; }; @@ -287,7 +299,7 @@ enum parse_argument_flags_t boolean_statement = AND statement | OR statement | NOT statement # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" -# The tag of a plain statement indicates which mode to use +# TODO: we should be able to construct plain_statements out of e.g. 'command --help' or even just 'command' decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement plain_statement = arguments_or_redirections_list optional_background From 54d7c29221e066ed4150e53f3bcca90013e5d795 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 8 Oct 2013 18:41:35 -0700 Subject: [PATCH 030/108] Syntax highlighting for command substitutions --- fish_tests.cpp | 14 +++- highlight.cpp | 210 +++++++++++++++++++++++++++++++++++++------------ parse_util.cpp | 38 ++++++++- parse_util.h | 19 +++++ 4 files changed, 229 insertions(+), 52 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index bc631bf32..6c77ec08a 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2026,9 +2026,21 @@ static void test_highlighting(void) {L"definitely_not_a_directory", HIGHLIGHT_ERROR}, {NULL, -1} }; + + // Command substitutions + const highlight_component_t components7[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"param1", HIGHLIGHT_PARAM}, + {L"(", HIGHLIGHT_OPERATOR}, + {L"ls", HIGHLIGHT_COMMAND}, + {L"param2", HIGHLIGHT_PARAM}, + {L")", HIGHLIGHT_OPERATOR}, + {NULL, -1} + }; - const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6}; + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; diff --git a/highlight.cpp b/highlight.cpp index 5080367cd..28e32b7a1 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -36,6 +36,8 @@ #include "history.h" #include "parse_tree.h" +#define CURSOR_POSITION_INVALID ((size_t)(-1)) + /** Number of elements in the highlight_var array */ @@ -1382,25 +1384,11 @@ void highlight_shell_classic(const wcstring &buff, std::vector &color, size } } -static void color_node(const parse_node_t &node, int color, std::vector &color_array) -{ - // Can only color nodes with valid source ranges - if (! node.has_source()) - return; - - // Fill the color array with our color in the corresponding range - size_t source_end = node.source_start + node.source_length; - assert(source_end >= node.source_start); - assert(source_end <= color_array.size()); - - std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color); -} - -/* This function is a disaster badly in need of refactoring. However, note that it does NOT do any I/O */ -static void color_argument(const wcstring &buffstr, std::vector::iterator colors, int normal_status) +/* This function is a disaster badly in need of refactoring. */ +static void color_argument_internal(const wcstring &buffstr, std::vector::iterator colors) { const size_t buff_len = buffstr.size(); - std::fill(colors, colors + buff_len, normal_status); + std::fill(colors, colors + buff_len, HIGHLIGHT_PARAM); enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted; int bracket_count=0; @@ -1679,6 +1667,119 @@ static void color_argument(const wcstring &buffstr, std::vector::iterator c } } +/* Syntax highlighter helper */ +class highlighter_t +{ + /* The string we're highlighting. Note this is a reference memmber variable (to avoid copying)! We must not outlive this! */ + const wcstring &buff; + + /* Cursor position */ + const size_t cursor_pos; + + /* Environment variables. Again, a reference member variable! */ + const env_vars_snapshot_t &vars; + + /* Working directory */ + const wcstring working_directory; + + /* The resulting colors */ + typedef std::vector color_array_t; + color_array_t color_array; + + /* The parse tree of the buff */ + parse_node_tree_t parse_tree; + + /* Color an argument */ + void color_argument(const parse_node_t &node); + + /* Color the arguments of the given node */ + void color_arguments(const parse_node_t &list_node); + + /* Color all the children of the command with the given type */ + void color_children(const parse_node_t &parent, parse_token_type_t type, int color); + + /* Colors the source range of a node with a given color */ + void color_node(const parse_node_t &node, int color); + + public: + + /* Constructor */ + highlighter_t(const wcstring &str, size_t pos, const env_vars_snapshot_t &ev, const wcstring &wd) : buff(str), cursor_pos(pos), vars(ev), working_directory(wd), color_array(str.size()) + { + /* Parse the tree */ + this->parse_tree.clear(); + parse_t parser; + parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); + } + + /* Perform highlighting, returning an array of colors */ + const color_array_t &highlight(); +}; + +void highlighter_t::color_node(const parse_node_t &node, int color) +{ + // Can only color nodes with valid source ranges + if (! node.has_source()) + return; + + // Fill the color array with our color in the corresponding range + size_t source_end = node.source_start + node.source_length; + assert(source_end >= node.source_start); + assert(source_end <= color_array.size()); + + std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, color); +} + +void highlighter_t::color_argument(const parse_node_t &node) +{ + if (! node.has_source()) + return; + + const wcstring arg_str = node.get_source(this->buff); + + /* Get an iterator to the colors associated with the argument */ + const size_t arg_start = node.source_start; + const color_array_t::iterator arg_colors = color_array.begin() + arg_start; + + /* Color this argument without concern for command substitutions */ + color_argument_internal(arg_str, arg_colors); + + /* Now do command substitutions */ + size_t cmdsub_cursor = 0, cmdsub_start = 0, cmdsub_end = 0; + wcstring cmdsub_contents; + while (parse_util_locate_cmdsubst_range(arg_str, &cmdsub_cursor, &cmdsub_contents, &cmdsub_start, &cmdsub_end, true /* accept incomplete */) > 0) + { + /* The cmdsub_start is the open paren. cmdsub_end is either the close paren or the end of the string. cmdsub_contents extends from one past cmdsub_start to cmdsub_end */ + assert(cmdsub_end > cmdsub_start); + assert(cmdsub_end - cmdsub_start - 1 == cmdsub_contents.size()); + + /* Found a command substitution. Compute the position of the start and end of the cmdsub contents, within our overall src. */ + const size_t arg_subcmd_start = arg_start + cmdsub_start, arg_subcmd_end = arg_start + cmdsub_end; + + /* Highlight the parens. The open paren must exist; the closed paren may not if it was incomplete. */ + assert(cmdsub_start < arg_str.size()); + this->color_array.at(arg_subcmd_start) = HIGHLIGHT_OPERATOR; + if (arg_subcmd_end < this->buff.size()) + this->color_array.at(arg_subcmd_end) = HIGHLIGHT_OPERATOR; + + /* Compute the cursor's position within the cmdsub. We must be past the open paren (hence >) but can be at the end of the string or closed paren (hence <=) */ + size_t cursor_subpos = CURSOR_POSITION_INVALID; + if (cursor_pos != CURSOR_POSITION_INVALID && cursor_pos > arg_subcmd_start && cursor_pos <= arg_subcmd_end) + { + /* The -1 because the cmdsub_contents does not include the open paren */ + cursor_subpos = cursor_pos - arg_subcmd_start - 1; + } + + /* Highlight it recursively. */ + highlighter_t cmdsub_highlighter(cmdsub_contents, cursor_subpos, this->vars, this->working_directory); + const color_array_t &subcolors = cmdsub_highlighter.highlight(); + + /* Copy out the subcolors back into our array */ + assert(subcolors.size() == cmdsub_contents.size()); + std::copy(subcolors.begin(), subcolors.end(), this->color_array.begin() + arg_subcmd_start + 1); + } +} + // Indicates whether the source range of the given node forms a valid path in the given working_directory static bool node_is_potential_path(const wcstring &src, const parse_node_t &node, const wcstring &working_directory) { @@ -1702,39 +1803,39 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node } // Color all of the arguments of the given command -static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &list_node, const wcstring &working_directory, std::vector &color_array) +void highlighter_t::color_arguments(const parse_node_t &list_node) { - /* Hack: determine whether the parent is the cd command. */ + /* Hack: determine whether the parent is the cd command, so we can show errors for non-directories */ bool cmd_is_cd = false; - const parse_node_t *parent = tree.get_parent(list_node, symbol_plain_statement); + const parse_node_t *parent = this->parse_tree.get_parent(list_node, symbol_plain_statement); if (parent != NULL) { wcstring cmd_str; - if (plain_statement_get_expanded_command(src, tree, *parent, &cmd_str)) + if (plain_statement_get_expanded_command(this->buff, this->parse_tree, *parent, &cmd_str)) { cmd_is_cd = (cmd_str == L"cd"); } } - const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(list_node, symbol_argument); + /* Find all the arguments of this list */ + const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_argument); - wcstring param; for (node_offset_t i=0; i < nodes.size(); i++) { const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); - param.assign(src, child->source_start, child->source_length); - color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_PARAM); + this->color_argument(*child); if (cmd_is_cd) { /* Mark this as an error if it's not 'help' and not a valid cd path */ + wcstring param = child->get_source(this->buff); if (expand_one(param, EXPAND_SKIP_CMDSUBST)) { bool is_help = string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h"); if (!is_help && ! is_potential_cd_path(param, working_directory, PATH_EXPAND_TILDE, NULL)) { - color_node(*child, HIGHLIGHT_ERROR, color_array); + this->color_node(*child, HIGHLIGHT_ERROR); } } } @@ -1742,14 +1843,14 @@ static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, } /* Color all the children of the command with the given type */ -static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector &color_array) +void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, int color) { for (node_offset_t idx=0; idx < parent.child_count; idx++) { - const parse_node_t *child = tree.get_child(parent, idx); + const parse_node_t *child = this->parse_tree.get_child(parent, idx); if (child != NULL && child->type == type) { - color_node(*child, color, color_array); + this->color_node(*child, color); } } } @@ -1803,22 +1904,19 @@ static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoratio return is_valid; } -void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +const highlighter_t::color_array_t & highlighter_t::highlight() { ASSERT_IS_BACKGROUND_THREAD(); - + const size_t length = buff.size(); - assert(buff.size() == color.size()); - + assert(this->buff.size() == this->color_array.size()); + if (length == 0) - return; + return color_array; /* Start out at zero */ - std::fill(color.begin(), color.end(), 0); - - /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ - const wcstring working_directory = env_get_pwd_slash(); - + std::fill(this->color_array.begin(), this->color_array.end(), 0); + /* Parse the buffer */ parse_node_tree_t parse_tree; parse_t parser; @@ -1850,20 +1948,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t case symbol_if_statement: { // Color the 'end' - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); + this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND); } break; case symbol_redirection: { - color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); + this->color_children(node, parse_token_type_string, HIGHLIGHT_REDIRECTION); } break; case parse_token_type_background: case parse_token_type_end: { - color_node(node, HIGHLIGHT_END, color); + this->color_node(node, HIGHLIGHT_END); } break; @@ -1890,7 +1988,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t { is_valid_cmd = command_is_valid(cmd, decoration, working_directory, vars); } - color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR, color); + this->color_node(*cmd_node, is_valid_cmd ? HIGHLIGHT_COMMAND : HIGHLIGHT_ERROR); } } break; @@ -1902,18 +2000,18 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t /* Only work on root lists, so that we don't re-color child lists */ if (parse_tree.argument_list_is_root(node)) { - color_arguments(buff, parse_tree, node, working_directory, color); + this->color_arguments(node); } } break; case parse_special_type_parse_error: case parse_special_type_tokenizer_error: - color_node(node, HIGHLIGHT_ERROR, color); + this->color_node(node, HIGHLIGHT_ERROR); break; case parse_special_type_comment: - color_node(node, HIGHLIGHT_COMMENT, color); + this->color_node(node, HIGHLIGHT_COMMENT); break; default: @@ -1921,7 +2019,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t } } - if (pos <= buff.size()) + if (this->cursor_pos <= this->buff.size()) { /* If the cursor is over an argument, and that argument is a valid path, underline it */ for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter) @@ -1933,7 +2031,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t continue; /* See if this node contains the cursor. We check <= source_length so that, when backspacing (and the cursor is just beyond the last token), we may still underline it */ - if (pos >= node.source_start && pos - node.source_start <= node.source_length) + if (this->cursor_pos >= node.source_start && this->cursor_pos - node.source_start <= node.source_length) { /* See if this is a valid path */ if (node_is_potential_path(buff, node, working_directory)) @@ -1942,15 +2040,27 @@ void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t for (size_t i=node.source_start; i < node.source_start + node.source_length; i++) { /* Don't color HIGHLIGHT_ERROR because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red. */ - if (! (color.at(i) & HIGHLIGHT_ERROR)) + if (! (this->color_array.at(i) & HIGHLIGHT_ERROR)) { - color.at(i) |= HIGHLIGHT_VALID_PATH; + this->color_array.at(i) |= HIGHLIGHT_VALID_PATH; } } } } } } + + return color_array; +} + +void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +{ + /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ + const wcstring working_directory = env_get_pwd_slash(); + + /* Highlight it! */ + highlighter_t highlighter(buff, pos, vars, working_directory); + color = highlighter.highlight(); } /** diff --git a/parse_util.cpp b/parse_util.cpp index 5b807059e..6427e249f 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -164,7 +164,7 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **begin, wchar_t **end CHECK(in, 0); - for (pos = (wchar_t *)in; *pos; pos++) + for (pos = const_cast(in); *pos; pos++) { if (prev != '\\') { @@ -240,6 +240,42 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **begin, wchar_t **end return 1; } +int parse_util_locate_cmdsubst_range(const wcstring &str, size_t *inout_cursor_offset, wcstring *out_contents, size_t *out_start, size_t *out_end, bool accept_incomplete) +{ + /* Clear the return values */ + out_contents->clear(); + *out_start = 0; + *out_end = str.size(); + + /* Nothing to do if the offset is at or past the end of the string. */ + if (*inout_cursor_offset >= str.size()) + return 0; + + /* Defer to the wonky version */ + const wchar_t * const buff = str.c_str(); + const wchar_t * const valid_range_start = buff + *inout_cursor_offset, *valid_range_end = buff + str.size(); + wchar_t *cmdsub_begin = NULL, *cmdsub_end = NULL; + int ret = parse_util_locate_cmdsubst(valid_range_start, &cmdsub_begin, &cmdsub_end, accept_incomplete); + if (ret > 0) + { + /* The command substitutions must not be NULL and must be in the valid pointer range, and the end must be bigger than the beginning */ + assert(cmdsub_begin != NULL && cmdsub_begin >= valid_range_start && cmdsub_begin <= valid_range_end); + assert(cmdsub_end != NULL && cmdsub_end > cmdsub_begin && cmdsub_end >= valid_range_start && cmdsub_end <= valid_range_end); + + /* Assign the substring to the out_contents */ + const wchar_t *interior_begin = cmdsub_begin + 1; + out_contents->assign(interior_begin, cmdsub_end - interior_begin); + + /* Return the start and end */ + *out_start = cmdsub_begin - buff; + *out_end = cmdsub_end - buff; + + /* Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though overflow is not likely */ + *inout_cursor_offset = 1 + *out_end; + } + return ret; +} + void parse_util_cmdsubst_extent(const wchar_t *buff, size_t cursor_pos, const wchar_t **a, const wchar_t **b) { const wchar_t * const cursor = buff + cursor_pos; diff --git a/parse_util.h b/parse_util.h index 24147e180..76b33450e 100644 --- a/parse_util.h +++ b/parse_util.h @@ -27,6 +27,25 @@ int parse_util_locate_cmdsubst(const wchar_t *in, wchar_t **end, bool accept_incomplete); +/** + Alternative API. Iterate over command substitutions. + + \param str the string to search for subshells + \param inout_cursor_offset On input, the location to begin the search. On output, either the end of the string, or just after the closed-paren. + \param out_contents On output, the contents of the command substitution + \param out_start On output, the offset of the start of the command substitution (open paren) + \param out_end On output, the offset of the end of the command substitution (close paren), or the end of the string if it was incomplete + \param accept_incomplete whether to permit missing closing parenthesis + \return -1 on syntax error, 0 if no subshells exist and 1 on sucess +*/ + +int parse_util_locate_cmdsubst_range(const wcstring &str, + size_t *inout_cursor_offset, + wcstring *out_contents, + size_t *out_start, + size_t *out_end, + bool accept_incomplete); + /** Find the beginning and end of the command substitution under the cursor. If no subshell is found, the entire string is returned. If From a51bd03a5c86d5532063610cc185f3d377daa4e1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 8 Oct 2013 18:48:01 -0700 Subject: [PATCH 031/108] Remove parse_exec stuff --- Makefile.in | 2 +- builtin.cpp | 114 ------- fish.xcodeproj/project.pbxproj | 8 - parse_exec.cpp | 593 --------------------------------- parse_exec.h | 173 ---------- 5 files changed, 1 insertion(+), 889 deletions(-) delete mode 100644 parse_exec.cpp delete mode 100644 parse_exec.h diff --git a/Makefile.in b/Makefile.in index 07dec023c..23e24b389 100644 --- a/Makefile.in +++ b/Makefile.in @@ -100,7 +100,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o parse_tree.o parse_productions.o parse_exec.o + builtin_test.o parse_tree.o parse_productions.o FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/builtin.cpp b/builtin.cpp index 06c9a9b89..b84d78e73 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -65,7 +65,6 @@ #include "path.h" #include "history.h" #include "parse_tree.h" -#include "parse_exec.h" /** The default prompt for the read command @@ -3956,104 +3955,6 @@ static int builtin_history(parser_t &parser, wchar_t **argv) #pragma mark Simulator -struct parse_execution_simulator_t : public parse_execution_visitor_t -{ - wcstring_list_t result; - - wcstring &back() - { - assert(! result.empty()); - return result.back(); - } - - void append_src(node_offset_t idx) - { - wcstring tmp; - context->get_source(idx, &tmp); - back().append(tmp); - } - - void append(const wchar_t *s) - { - back().append(s); - } - - bool enter_job_list(void) - { - return true; - } - - bool enter_job(void) - { - result.resize(result.size() + 1); - return true; - } - - void visit_statement(void) - { - } - - virtual void visit_boolean_statement(void) - { - } - - virtual void enter_if_clause(const exec_if_clause_t &statement) - { - } - - virtual void exit_if_clause(const exec_if_clause_t &statement) - { - append_format(back(), L"\nIF successful jump to %lu", (unsigned long)statement.body); - } - - void visit_basic_statement(const exec_basic_statement_t &statement) - { - wcstring &line = this->back(); - if (! line.empty()) - { - line.append(L" "); - } - switch (statement.decoration) - { - case exec_basic_statement_t::decoration_builtin: - line.append(L" "); - break; - - case exec_basic_statement_t::decoration_command: - line.append(L" "); - break; - - default: - break; - } - - line.append(L"cmd:"); - this->append_src(statement.command_idx); - for (size_t i=0; i < statement.arguments().size(); i++) - { - const exec_argument_t &arg = statement.arguments().at(i); - append(L" "); - append(L"arg:"); - append_src(arg.parse_node_idx); - } - } - - void visit_function(const exec_function_header_t &function) - { - wcstring &line = this->back(); - line.append(L"define function: "); - wcstring tmp; - context->get_source(function.name_idx, &tmp); - line.append(tmp); - } - - void exit_job_list(void) - { - } -}; - - - int builtin_parse(parser_t &parser, wchar_t **argv) { struct sigaction act; @@ -4090,21 +3991,6 @@ int builtin_parse(parser_t &parser, wchar_t **argv) { const wcstring dump = parse_dump_tree(parse_tree, src); fprintf(stderr, "%ls", dump.c_str()); - if (0) - { - parse_execution_context_t ctx(parse_tree, src); - parse_execution_simulator_t sim; - sim.context = &ctx; - while (ctx.visit_next_node(&sim)) - { - } - stdout_buffer.append(L"Simulating execution:\n"); - for (size_t i=0; i < sim.result.size(); i++) - { - stdout_buffer.append(sim.result.at(i)); - stdout_buffer.push_back(L'\n'); - } - } } } return STATUS_BUILTIN_OK; diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 3b85e4bca..04d0c7cb7 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -112,13 +112,11 @@ D08A32B817B446A300F3A533 /* signal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0855C13B3ACEE0099B651 /* signal.cpp */; }; D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */; }; D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; - D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; D08A32BC17B4473B00F3A533 /* libncurses.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8C15983CFA008E62BD /* libncurses.dylib */; }; D08A32BD17B4474000F3A533 /* libiconv.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = D0D02A8A15983CDF008E62BD /* libiconv.dylib */; }; D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; }; D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; }; - D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; }; D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; }; D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; }; D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; }; @@ -497,8 +495,6 @@ D0B6B0FE14E88BA400AD6C10 /* color.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = color.cpp; sourceTree = ""; }; D0B6B0FF14E88BA400AD6C10 /* color.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = color.h; sourceTree = ""; }; D0C4FD9415A7D7EE00212EF1 /* config.fish */ = {isa = PBXFileReference; lastKnownFileType = text; name = config.fish; path = etc/config.fish; sourceTree = ""; }; - D0C52F331765281F00BFAB82 /* parse_exec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_exec.cpp; sourceTree = ""; }; - D0C52F341765281F00BFAB82 /* parse_exec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_exec.h; sourceTree = ""; }; D0C52F351765284C00BFAB82 /* parse_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_tree.cpp; sourceTree = ""; }; D0C52F361765284C00BFAB82 /* parse_tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree.h; sourceTree = ""; }; D0C6FCC914CFA4B0004CE8AD /* autoload.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = autoload.cpp; sourceTree = ""; }; @@ -665,8 +661,6 @@ D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */, D0C52F361765284C00BFAB82 /* parse_tree.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, - D0C52F341765281F00BFAB82 /* parse_exec.h */, - D0C52F331765281F00BFAB82 /* parse_exec.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -1130,7 +1124,6 @@ files = ( D08A32B917B446B100F3A533 /* parse_productions.cpp in Sources */, D08A32BA17B446B100F3A533 /* parse_tree.cpp in Sources */, - D08A32BB17B446B100F3A533 /* parse_exec.cpp in Sources */, D08A32A717B446A300F3A533 /* autoload.cpp in Sources */, D08A32A817B446A300F3A533 /* builtin_test.cpp in Sources */, D08A32A917B446A300F3A533 /* color.cpp in Sources */, @@ -1255,7 +1248,6 @@ D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */, D0D02A89159839DF008E62BD /* fish.cpp in Sources */, D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */, - D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */, D0FE8EE8179FB760008C9F21 /* parse_productions.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/parse_exec.cpp b/parse_exec.cpp deleted file mode 100644 index c424ad957..000000000 --- a/parse_exec.cpp +++ /dev/null @@ -1,593 +0,0 @@ -#include "parse_exec.h" -#include - -struct exec_node_t -{ - node_offset_t parse_node_idx; - node_offset_t body_parse_node_idx; - bool visited; - - explicit exec_node_t(node_offset_t pni) : parse_node_idx(pni), body_parse_node_idx(NODE_OFFSET_INVALID), visited(false) - { - } - - explicit exec_node_t(node_offset_t pni, node_offset_t body_pni) : parse_node_idx(pni), body_parse_node_idx(body_pni), visited(false) - { - } -}; - -exec_basic_statement_t::exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) -{ - -} - - -class parse_exec_t -{ - parse_node_tree_t parse_tree; - wcstring src; - - /* The stack of nodes as we execute them */ - std::vector exec_nodes; - - /* The stack of commands being built */ - std::vector assembling_statements; - - /* Current visitor (very transient) */ - struct parse_execution_visitor_t * visitor; - - const parse_node_t &get_child(const parse_node_t &parent, node_offset_t which) const - { - return parse_tree.at(parent.child_offset(which)); - } - - void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID) - { - PARSE_ASSERT(! exec_nodes.empty()); - // Figure out the offset of the children - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - node_offset_t child_node_idx = parse_node.child_start; - - // Remove the top node - exec_nodes.pop_back(); - - // Append the given children, backwards - const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1}; - for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++) - { - node_offset_t idx = idxs[q]; - if (idx != (node_offset_t)(-1)) - { - PARSE_ASSERT(idx < parse_node.child_count); - exec_nodes.push_back(exec_node_t(child_node_idx + idx)); - } - } - - } - - void push(node_offset_t global_idx) - { - exec_nodes.push_back(exec_node_t(global_idx)); - } - - void push(const exec_node_t &node) - { - exec_nodes.push_back(node); - } - - - void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) - { - PARSE_ASSERT(! exec_nodes.empty()); - if (child_count == 0) - { - // No children, just remove the top node - exec_nodes.pop_back(); - } - else - { - // Figure out the offset of the children - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - PARSE_ASSERT(child_idx < parse_node.child_count); - node_offset_t child_node_idx = parse_node.child_start + child_idx; - - // Remove the top node - exec_nodes.pop_back(); - - // Append the given children, backwards - node_offset_t cursor = child_count; - while (cursor--) - { - exec_nodes.push_back(exec_node_t(child_node_idx + cursor)); - } - } - } - - void pop() - { - PARSE_ASSERT(! exec_nodes.empty()); - exec_nodes.pop_back(); - } - - void pop_push_all() - { - exec_node_t &top = exec_nodes.back(); - const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - pop_push(0, parse_node.child_count); - } - - void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(output != NULL); - PARSE_ASSERT(node.type == symbol_argument_or_redirection); - PARSE_ASSERT(node.child_count == 1); - node_offset_t child_idx = node.child_offset(0); - const parse_node_t &child = parse_tree.at(child_idx); - switch (child.type) - { - case parse_token_type_string: - // Argument - { - exec_argument_t arg = exec_argument_t(); - arg.parse_node_idx = child_idx; - output->arguments.push_back(arg); - } - break; - - case parse_token_type_redirection: - // Redirection - { - exec_redirection_t redirect = exec_redirection_t(); - redirect.parse_node_idx = child_idx; - output->redirections.push_back(redirect); - } - break; - - default: - PARSER_DIE(); - break; - } - } - - void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const - { - node_offset_t idx = start_idx; - for (;;) - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_arguments_or_redirections_list); - PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); - if (node.child_count == 0) - { - // No more children - break; - } - else - { - // Skip to next child - assemble_1_argument_or_redirection(node.child_offset(0), output); - idx = node.child_offset(1); - } - } - } - - void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration) - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_plain_statement); - PARSE_ASSERT(node.child_count == 2); - exec_basic_statement_t statement; - statement.set_decoration(decoration); - statement.command_idx = node.child_offset(0); - assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections); - visitor->visit_basic_statement(statement); - } - - void assemble_block_statement(node_offset_t parse_node_idx) - { - - const parse_node_t &node = parse_tree.at(parse_node_idx); - PARSE_ASSERT(node.type == symbol_block_statement); - PARSE_ASSERT(node.child_count == 5); - - // Fetch arguments and redirections. These ought to be evaluated before the job list - exec_block_statement_t statement; - assemble_arguments_and_redirections(node.child_offset(4), &statement.arguments_and_redirections); - - // Generic visit - visitor->enter_block_statement(statement); - - // Dig into the header to discover the type - const parse_node_t &header_parent = parse_tree.at(node.child_offset(0)); - PARSE_ASSERT(header_parent.type == symbol_block_header); - PARSE_ASSERT(header_parent.child_count == 1); - const node_offset_t header_idx = header_parent.child_offset(0); - - // Fetch body (job list) - node_offset_t body_idx = node.child_offset(2); - PARSE_ASSERT(parse_tree.at(body_idx).type == symbol_job_list); - - pop(); - push(exec_node_t(header_idx, body_idx)); - } - - /* which: 0 -> if, 1 -> else if, 2 -> else */ - void assemble_if_else_clause(exec_node_t &exec_node, const parse_node_t &node, int which) - { - if (which == 0) - { - PARSE_ASSERT(node.type == symbol_if_clause); - PARSE_ASSERT(node.child_count == 4); - } - else if (which == 2) - { - PARSE_ASSERT(node.type == symbol_else_continuation); - PARSE_ASSERT(node.child_count == 2); - } - - struct exec_if_clause_t clause; - if (which == 0) - { - clause.body = node.child_offset(3); - } - else - { - clause.body = node.child_offset(1); - } - if (! exec_node.visited) - { - visitor->enter_if_clause(clause); - exec_node.visited = true; - if (which == 0) - { - push(node.child_offset(1)); - } - } - else - { - visitor->exit_if_clause(clause); - pop(); - } - } - - void assemble_arguments(node_offset_t start_idx, exec_argument_list_t *output) const - { - node_offset_t idx = start_idx; - for (;;) - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.type == symbol_argument_list); - if (node.type == symbol_argument_list) - { - // argument list, may be empty - PARSE_ASSERT(node.child_count == 0 || node.child_count == 1); - if (node.child_count == 0) - { - break; - } - else - { - idx = node.child_offset(0); - } - } - else - { - // nonempty argument list - PARSE_ASSERT(node.child_count == 2); - output->push_back(exec_argument_t(node.child_offset(0))); - idx = node.child_offset(1); - } - } - } - - void assemble_1_case_item(exec_switch_statement_t *statement, node_offset_t node_idx) - { - const parse_node_t &node = parse_tree.at(node_idx); - PARSE_ASSERT(node.type == symbol_case_item); - - // add a new case - size_t len = statement->cases.size(); - statement->cases.resize(len + 1); - exec_switch_case_t &new_case = statement->cases.back(); - - // assemble it - new_case.body = node.child_offset(3); - assemble_arguments(node.child_offset(1), &new_case.arguments); - - - } - - void assemble_case_item_list(exec_switch_statement_t *statement, node_offset_t node_idx) - { - const parse_node_t &node = parse_tree.at(node_idx); - PARSE_ASSERT(node.type == symbol_case_item_list); - PARSE_ASSERT(node.child_count == 0 || node.child_count == 2); - if (node.child_count == 2) - { - assemble_1_case_item(statement, node.child_offset(0)); - assemble_case_item_list(statement, node.child_offset(1)); - } - } - - void assemble_switch_statement(const exec_node_t &exec_node, const parse_node_t &parse_node) - { - PARSE_ASSERT(parse_node.type == symbol_switch_statement); - exec_switch_statement_t statement; - - statement.argument.parse_node_idx = parse_node.child_offset(1); - assemble_case_item_list(&statement, parse_node.child_offset(3)); - - visitor->visit_switch_statement(statement); - - // pop off the switch - pop(); - } - - void assemble_function_header(const exec_node_t &exec_node, const parse_node_t &header) - { - PARSE_ASSERT(header.type == symbol_function_header); - PARSE_ASSERT(&header == &parse_tree.at(exec_node.parse_node_idx)); - PARSE_ASSERT(exec_node.body_parse_node_idx != NODE_OFFSET_INVALID); - exec_function_header_t function_info; - function_info.name_idx = header.child_offset(1); - function_info.body_idx = exec_node.body_parse_node_idx; - assemble_arguments(header.child_offset(2), &function_info.arguments); - visitor->visit_function(function_info); - - // Always pop - pop(); - } - - - void enter_parse_node(size_t idx); - void run_top_node(void); - -public: - - void get_node_string(node_offset_t idx, wcstring *output) const - { - const parse_node_t &node = parse_tree.at(idx); - PARSE_ASSERT(node.source_start <= src.size()); - PARSE_ASSERT(node.source_start + node.source_length <= src.size()); - output->assign(src, node.source_start, node.source_length); - } - - bool visit_next_node(parse_execution_visitor_t *v); - - parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), visitor(NULL) - { - if (! parse_tree.empty()) - { - exec_nodes.push_back(exec_node_t(0)); - } - } -}; - -void parse_exec_t::run_top_node() -{ - PARSE_ASSERT(! exec_nodes.empty()); - exec_node_t &exec_node = exec_nodes.back(); - const node_offset_t parse_node_idx = exec_node.parse_node_idx; - const parse_node_t &parse_node = parse_tree.at(exec_node.parse_node_idx); - bool log = true; - - if (log) - { - wcstring tmp; - tmp.append(exec_nodes.size(), L' '); - tmp.append(parse_node.describe()); - printf("%ls\n", tmp.c_str()); - } - - switch (parse_node.type) - { - case symbol_job_list: - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - if (parse_node.child_count == 0) - { - // No more jobs, done - visitor->exit_job_list(); - pop(); - } - else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end) - { - // Empty job, so just skip it - pop_push(1, 1); - } - else - { - // Normal job - visitor->enter_job_list(); - pop_push(0, 2); - } - break; - - case symbol_job: - { - PARSE_ASSERT(parse_node.child_count == 2); - visitor->enter_job(); - pop_push_all(); - break; - } - - case symbol_job_continuation: - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3); - if (parse_node.child_count == 0) - { - // All done with this job - visitor->exit_job(); - pop(); - } - else - { - // Skip the pipe - pop_push(1, 2); - } - break; - - case symbol_statement: - { - PARSE_ASSERT(parse_node.child_count == 1); - pop_push_all(); - break; - } - - case symbol_block_statement: - { - PARSE_ASSERT(parse_node.child_count == 5); - assemble_block_statement(parse_node_idx); - break; - } - - case symbol_block_header: - { - PARSE_ASSERT(parse_node.child_count == 1); - pop_push_all(); - break; - } - - case symbol_function_header: - { - PARSE_ASSERT(parse_node.child_count == 3); - assemble_function_header(exec_node, parse_node); - break; - } - - case symbol_if_statement: - { - PARSE_ASSERT(parse_node.child_count == 4); - pop_push(0, 2); - break; - } - - case symbol_if_clause: - { - PARSE_ASSERT(parse_node.child_count == 4); - assemble_if_else_clause(exec_node, parse_node, 0); - pop(); - break; - } - - case symbol_else_clause: - { - PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - if (parse_node.child_count == 0) - { - // No else - pop(); - } - else - { - // We have an else - pop_push(1); - } - break; - } - - case symbol_else_continuation: - { - // Figure out if this is an else if or a terminating else - PARSE_ASSERT(parse_node.child_count == 2); - const parse_node_t &first_child = get_child(parse_node, 1); - PARSE_ASSERT(first_child.type == symbol_if_clause || first_child.type == parse_token_type_end); - if (first_child.type == symbol_if_clause) - { - pop_push_all(); - } - else - { - // else - assemble_if_else_clause(exec_node, parse_node, 2); - pop(); - } - break; - } - - case symbol_switch_statement: - { - assemble_switch_statement(exec_node, parse_node); - break; - } - - case symbol_decorated_statement: - { - PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); - - node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1); - parse_keyword_t decoration = static_cast(parse_node.tag); - assemble_command_for_plain_statement(plain_statement_idx, decoration); - pop(); - break; - } - - // The following symbols should be handled by their parents, i.e. never pushed on our stack - case symbol_case_item_list: - case symbol_plain_statement: - case symbol_arguments_or_redirections_list: - case symbol_argument_or_redirection: - fprintf(stderr, "Unexpected token type %ls at index %ld. This should have been handled by the parent.\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); - PARSER_DIE(); - break; - - case parse_token_type_end: - PARSE_ASSERT(parse_node.child_count == 0); - pop(); - break; - - default: - fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx); - PARSER_DIE(); - break; - - } -} - -bool parse_exec_t::visit_next_node(parse_execution_visitor_t *v) -{ - PARSE_ASSERT(v != NULL); - PARSE_ASSERT(visitor == NULL); - if (exec_nodes.empty()) - { - return false; - } - - visitor = v; - run_top_node(); - visitor = NULL; - return true; -} - -void parse_exec_t::enter_parse_node(size_t idx) -{ - PARSE_ASSERT(idx < parse_tree.size()); - exec_node_t exec(idx); - exec_nodes.push_back(exec); -} - - -parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) -{ - ctx = new parse_exec_t(n, s); -} - -parse_execution_context_t::~parse_execution_context_t() -{ - delete ctx; -} - -bool parse_execution_context_t::visit_next_node(parse_execution_visitor_t *visitor) -{ - return ctx->visit_next_node(visitor); -} - -void parse_execution_context_t::get_source(node_offset_t idx, wcstring *result) const -{ - return ctx->get_node_string(idx, result); -} - - - - diff --git a/parse_exec.h b/parse_exec.h deleted file mode 100644 index 1eea99ab1..000000000 --- a/parse_exec.h +++ /dev/null @@ -1,173 +0,0 @@ -/**\file parse_exec.h - - Programmatic execution of a parse tree -*/ - -#ifndef FISH_PARSE_EXEC_H -#define FISH_PARSE_EXEC_H - -#include "parse_tree.h" - -struct parse_execution_visitor_t; -class parse_exec_t; -class parse_execution_context_t -{ - parse_exec_t *ctx; //owned - -public: - parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s); - ~parse_execution_context_t(); - - bool visit_next_node(parse_execution_visitor_t *visitor); - - // Gets the source for a node at a given index - void get_source(node_offset_t idx, wcstring *result) const; -}; - - -struct exec_argument_t -{ - node_offset_t parse_node_idx; - exec_argument_t(node_offset_t p) : parse_node_idx(p) - { - } - exec_argument_t() - { - } -}; -typedef std::vector exec_argument_list_t; - -struct exec_redirection_t -{ - node_offset_t parse_node_idx; -}; -typedef std::vector exec_redirection_list_t; - -struct exec_arguments_and_redirections_t -{ - exec_argument_list_t arguments; - exec_redirection_list_t redirections; -}; - -struct exec_basic_statement_t -{ - // Node containing the command - node_offset_t command_idx; - - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; - - // Decoration - enum - { - decoration_plain, - decoration_command, - decoration_builtin - } decoration; - - exec_basic_statement_t(); - - void set_decoration(uint32_t k) - { - PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); - switch (k) - { - case parse_keyword_none: - decoration = decoration_plain; - break; - case parse_keyword_command: - decoration = decoration_command; - break; - case parse_keyword_builtin: - decoration = decoration_builtin; - break; - default: - PARSER_DIE(); - break; - } - } - - const exec_argument_list_t &arguments() const - { - return arguments_and_redirections.arguments; - } - - const exec_redirection_list_t &redirections() const - { - return arguments_and_redirections.redirections; - } -}; - -struct exec_function_header_t -{ - // Node containing the function name - node_offset_t name_idx; - - // Node containing the function body - node_offset_t body_idx; - - // Arguments - exec_argument_list_t arguments; -}; - -struct exec_block_statement_t -{ - // Arguments - exec_arguments_and_redirections_t arguments_and_redirections; -}; - -struct exec_if_clause_t -{ - // Node containing the body of the if statement - node_offset_t body; -}; - -struct exec_switch_case_t -{ - exec_argument_list_t arguments; - node_offset_t body; -}; - -struct exec_switch_statement_t -{ - exec_argument_t argument; - std::vector cases; -}; - -struct parse_execution_visitor_t -{ - node_offset_t node_idx; - parse_execution_context_t *context; - - parse_execution_visitor_t() : node_idx(0), context(NULL) - { - } - - virtual bool enter_job_list(void) - { - return true; - } - virtual bool enter_job(void) - { - return true; - } - virtual void visit_statement(void) { } - virtual void visit_function(const exec_function_header_t &function) { } - virtual bool enter_block_statement(const exec_block_statement_t &statement) - { - return true; - } - - virtual void enter_if_clause(const exec_if_clause_t &statement) { } - virtual void exit_if_clause(const exec_if_clause_t &statement) { } - - virtual void visit_switch_statement(const exec_switch_statement_t &header) { } - - - virtual void visit_boolean_statement(void) { } - virtual void visit_basic_statement(const exec_basic_statement_t &statement) { } - virtual void exit_job(void) { } - virtual void exit_job_list(void) { } -}; - -#endif From 7b86b2e05a011e37bf11bba2675ef5db684bca24 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 9 Oct 2013 02:03:50 -0700 Subject: [PATCH 032/108] Adoption of new parser in abbreviations --- fish_tests.cpp | 20 +++++-- highlight.cpp | 40 ++++++------- parse_productions.cpp | 10 ++-- parse_productions.h | 2 +- parse_tree.cpp | 29 ++++++--- parse_tree.h | 42 ++++++------- reader.cpp | 134 ++++++++++++------------------------------ 7 files changed, 115 insertions(+), 162 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 6c77ec08a..99ed6cd34 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -61,7 +61,6 @@ #include "signal.h" #include "highlight.h" #include "parse_tree.h" -#include "parse_exec.h" #include "parse_util.h" /** @@ -769,6 +768,11 @@ static void test_abbreviations(void) expanded = reader_expand_abbreviation_in_command(L"of gc", wcslen(L"of gc"), &result); if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__); + /* others should not be */ + expanded = reader_expand_abbreviation_in_command(L"command gc", wcslen(L"command gc"), &result); + if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__); + + env_pop(); } @@ -1916,12 +1920,16 @@ static void test_new_parser_fuzzing(void) size_t max = 5; for (size_t len=1; len <= max; len++) { - fprintf(stderr, "%lu / %lu\n", len, max); + fprintf(stderr, "%lu / %lu...", len, max); std::vector tokens(len); + size_t count = 0; + parse_t parser; + parse_node_tree_t parse_tree; do { - parse_t parser; - parse_node_tree_t parse_tree; + parser.clear(); + parse_tree.clear(); + count++; for (size_t i=0; i < len; i++) { const parser_fuzz_token_t &token = tokens[i]; @@ -1931,6 +1939,7 @@ static void test_new_parser_fuzzing(void) // keep going until we wrap } while (! increment(tokens)); + fprintf(stderr, "done (%lu)\n", count); } double end = timef(); say(L"All fuzzed in %f seconds!", end - start); @@ -2108,7 +2117,7 @@ int main(int argc, char **argv) say(L"Testing low-level functionality"); set_main_thread(); setup_fork_guards(); - //proc_init(); + //proc_init(); //disabling this prevents catching SIGINT event_init(); function_init(); builtin_init(); @@ -2116,7 +2125,6 @@ int main(int argc, char **argv) env_init(); test_highlighting(); - return 0; test_new_parser_fuzzing(); test_new_parser_correctness(); test_highlighting(); diff --git a/highlight.cpp b/highlight.cpp index 28e32b7a1..8fe9989b9 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -332,7 +332,7 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d } /* Given a plain statement node in a parse tree, get the command and return it, expanded appropriately for commands. If we succeed, return true. */ -static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) +bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd) { assert(plain_statement.type == symbol_plain_statement); bool result = false; @@ -708,15 +708,15 @@ static bool has_expand_reserved(const wcstring &str) return result; } -/* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */ -static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, const parse_node_t **out_last_arg) +/* Parse a command line. Return by reference the last command, and the last argument to that command (as a copied node), if any. This is used by autosuggestions */ +static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, parse_node_t *out_last_arg) { bool result = false; /* Parse the buffer */ parse_node_tree_t parse_tree; parse_t parser; - parser.parse(buff, parse_flag_continue_after_error, &parse_tree, NULL); + parser.parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Find the last statement */ const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); @@ -727,8 +727,12 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand /* We got it */ result = true; - /* Find the last argument */ - *out_last_arg = parse_tree.find_last_node_of_type(symbol_plain_statement, last_statement); + /* Find the last argument. If we don't get one, return an invalid node. */ + const parse_node_t *last_arg = parse_tree.find_last_node_of_type(symbol_argument, last_statement); + if (last_arg != NULL) + { + *out_last_arg = *last_arg; + } } } return result; @@ -739,20 +743,20 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di { if (str.empty()) return false; - + ASSERT_IS_BACKGROUND_THREAD(); /* Parse the string */ wcstring parsed_command; - const parse_node_t *last_arg_node = NULL; + parse_node_t last_arg_node(token_type_invalid); if (! autosuggest_parse_command(str, &parsed_command, &last_arg_node)) return false; bool result = false; - if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) + if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source()) { /* We can possibly handle this specially */ - const wcstring escaped_dir = last_arg_node->get_source(str); + const wcstring escaped_dir = last_arg_node.get_source(str); wcstring suggested_path; /* We always return true because we recognized the command. This prevents us from falling back to dumber algorithms; for example we won't suggest a non-directory for the cd command. */ @@ -771,13 +775,12 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di path_flags_t path_flags = (quote == L'\0') ? PATH_EXPAND_TILDE : 0; if (unescaped && is_potential_cd_path(unescaped_dir, working_directory, path_flags, &suggested_path)) { - /* Note: this looks really wrong for strings that have an "unescapable" character in them, e.g. a \t, because parse_util_escape_string_with_quote will insert that character */ wcstring escaped_suggested_path = parse_util_escape_string_with_quote(suggested_path, quote); /* Return it */ out_suggestion = str; - out_suggestion.erase(last_arg_node->source_start); + out_suggestion.erase(last_arg_node.source_start); if (quote != L'\0') out_suggestion.push_back(quote); out_suggestion.append(escaped_suggested_path); if (quote != L'\0') out_suggestion.push_back(quote); @@ -798,14 +801,14 @@ bool autosuggest_validate_from_history(const history_item_t &item, file_detectio /* Parse the string */ wcstring parsed_command; - const parse_node_t *last_arg_node = NULL; + parse_node_t last_arg_node(token_type_invalid); if (! autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node)) return false; - if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source()) + if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source()) { /* We can possibly handle this specially */ - wcstring dir = last_arg_node->get_source(item.str()); + wcstring dir = last_arg_node.get_source(item.str()); if (expand_one(dir, EXPAND_SKIP_CMDSUBST)) { handled = true; @@ -1968,12 +1971,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() case symbol_plain_statement: { // Get the decoration from the parent - enum parse_statement_decoration_t decoration = parse_statement_decoration_none; - const parse_node_t *decorated_statement = parse_tree.get_parent(node, symbol_decorated_statement); - if (decorated_statement != NULL) - { - decoration = static_cast(decorated_statement->production_idx); - } + enum parse_statement_decoration_t decoration = parse_tree.decoration_for_plain_statement(node); /* Color the command */ const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); diff --git a/parse_productions.cpp b/parse_productions.cpp index 0900977f7..38d57ebab 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -27,8 +27,8 @@ static bool production_is_valid(const production_options_t production_list, prod } #define PRODUCTIONS(sym) static const production_options_t productions_##sym -#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) -#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; } +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } #define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) @@ -418,7 +418,7 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag, wcstring *out_error_text) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, wcstring *out_error_text) { bool log_it = false; if (log_it) @@ -428,7 +428,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; - production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL; + production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword) = NULL; switch (node_type) { TEST(job_list) @@ -486,7 +486,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSE_ASSERT(resolver != NULL); const production_t *result = NULL; - production_option_idx_t which = resolver(input_type, input_keyword, out_tag); + production_option_idx_t which = resolver(input_type, input_keyword); if (log_it) { diff --git a/parse_productions.h b/parse_productions.h index a0d43f629..7e132d0c4 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -63,7 +63,7 @@ inline bool production_element_is_valid(production_element_t elem) } /* Fetch a production */ -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag, wcstring *out_error_text); +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index 30ee6856b..900513f50 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -720,7 +720,7 @@ void parse_ll_t::accept_token(parse_token_t token) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */); + const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, NULL /* error text */); if (production == NULL) { if (should_generate_error_messages) @@ -804,6 +804,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; + if (parse_flags & parse_flag_accept_incomplete_tokens) + tok_options |= TOK_ACCEPT_UNFINISHED; + this->parser->set_should_generate_error_messages(errors != NULL); tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); @@ -845,14 +848,14 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n // Tag nodes -#if 0 - wcstring result = dump_tree(this->parser->nodes, str); - fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); - fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); -#endif - // Acquire the output from the parser this->parser->acquire_output(output, errors); + +#if 0 + //wcstring result = dump_tree(this->parser->nodes, str); + //fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); + fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", output->size(), sizeof(parse_node_t), output->size() * sizeof(parse_node_t)); +#endif // Indicate if we had a fatal error return ! this->parser->has_fatal_error(); @@ -992,3 +995,15 @@ bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const } return result; } + +enum parse_statement_decoration_t parse_node_tree_t::decoration_for_plain_statement(const parse_node_t &node) const +{ + assert(node.type == symbol_plain_statement); + enum parse_statement_decoration_t decoration = parse_statement_decoration_none; + const parse_node_t *decorated_statement = this->get_parent(node, symbol_decorated_statement); + if (decorated_statement != NULL) + { + decoration = static_cast(decorated_statement->production_idx); + } + return decoration; +} diff --git a/parse_tree.h b/parse_tree.h index b2059914c..945d550c4 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -125,7 +125,10 @@ enum parse_flag_continue_after_error = 1 << 0, /* Include comment tokens */ - parse_flag_include_comments = 1 << 1 + parse_flag_include_comments = 1 << 1, + + /* Indicate that the tokenizer should accept incomplete tokens */ + parse_flag_accept_incomplete_tokens = 1 << 2 }; typedef unsigned int parse_tree_flags_t; @@ -175,9 +178,6 @@ public: node_offset_t child_start; node_offset_t child_count; - /* Type-dependent data */ - uint32_t tag; - /* Which production was used */ uint8_t production_idx; @@ -185,7 +185,7 @@ public: wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), tag(0) + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0) { } @@ -211,6 +211,15 @@ public: } }; +/* Statement decorations. This matches the order of productions in decorated_statement */ +enum parse_statement_decoration_t +{ + parse_statement_decoration_none, + parse_statement_decoration_command, + parse_statement_decoration_builtin +}; + + /* The parse tree itself */ class parse_node_tree_t : public std::vector { @@ -232,27 +241,10 @@ public: /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ bool argument_list_is_root(const parse_node_t &node) const; -}; - - -/* Node type specific data, stored in the tag field */ - -/* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */ -enum parse_statement_decoration_t -{ - parse_statement_decoration_none, - parse_statement_decoration_command, - parse_statement_decoration_builtin -}; - -/* Argument flags as a bitmask, stored in the tag of argument */ -enum parse_argument_flags_t -{ - /* Indicates that this or a prior argument was --, so this should not be treated as an option */ - parse_argument_no_options = 1 << 0, - /* Indicates that the argument is for a cd command */ - parse_argument_is_for_cd = 1 << 1 + /* Utilities */ + enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const; + }; /* Fish grammar: diff --git a/reader.cpp b/reader.cpp index 228fa9183..0f022c279 100644 --- a/reader.cpp +++ b/reader.cpp @@ -99,6 +99,7 @@ commence. #include "path.h" #include "parse_util.h" #include "parser_keywords.h" +#include "parse_tree.h" /** Maximum length of prefix string when printing completion @@ -659,117 +660,56 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso const size_t subcmd_offset = cmdsub_begin - buff; const wcstring subcmd = wcstring(cmdsub_begin, cmdsub_end - cmdsub_begin); - const wchar_t *subcmd_cstr = subcmd.c_str(); - - /* Get the token containing the cursor */ - const wchar_t *subcmd_tok_begin = NULL, *subcmd_tok_end = NULL; - assert(cursor_pos >= subcmd_offset); - size_t subcmd_cursor_pos = cursor_pos - subcmd_offset; - parse_util_token_extent(subcmd_cstr, subcmd_cursor_pos, &subcmd_tok_begin, &subcmd_tok_end, NULL, NULL); - - /* Compute the offset of the token before the cursor within the subcmd */ - assert(subcmd_tok_begin >= subcmd_cstr); - assert(subcmd_tok_end >= subcmd_tok_begin); - const size_t subcmd_tok_begin_offset = subcmd_tok_begin - subcmd_cstr; - const size_t subcmd_tok_length = subcmd_tok_end - subcmd_tok_begin; - - /* Now parse the subcmd, looking for commands */ - bool had_cmd = false, previous_token_is_cmd = false; - tokenizer_t tok(subcmd_cstr, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - for (; tok_has_next(&tok); tok_next(&tok)) + const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset; + + /* Parse this subcmd */ + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + + /* Look for plain statements where the cursor is at the end of the command */ + const parse_node_t *matching_cmd_node = NULL; + const size_t len = parse_tree.size(); + for (size_t i=0; i < len; i++) { - size_t tok_pos = static_cast(tok_get_pos(&tok)); - if (tok_pos > subcmd_tok_begin_offset) + const parse_node_t &node = parse_tree.at(i); + + /* Only interested in plain statements with source */ + if (node.type != symbol_plain_statement || ! node.has_source()) + continue; + + /* Skip decorated statements */ + if (parse_tree.decoration_for_plain_statement(node) != parse_statement_decoration_none) + continue; + + /* Get the command node. Skip it if we can't or it has no source */ + const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string); + if (cmd_node == NULL || ! cmd_node->has_source()) + continue; + + /* Now see if its source range contains our cursor, including at the end */ + if (subcmd_cursor_pos >= cmd_node->source_start && subcmd_cursor_pos <= cmd_node->source_start + cmd_node->source_length) { - /* We've passed the token we're interested in */ + /* Success! */ + matching_cmd_node = cmd_node; break; } - - int last_type = tok_last_type(&tok); - - switch (last_type) - { - case TOK_STRING: - { - if (had_cmd) - { - /* Parameter to the command. */ - } - else - { - const wcstring potential_cmd = tok_last(&tok); - if (parser_keywords_is_subcommand(potential_cmd)) - { - if (potential_cmd == L"command" || potential_cmd == L"builtin") - { - /* 'command' and 'builtin' defeat abbreviation expansion. Skip this command. */ - had_cmd = true; - } - else - { - /* Other subcommand. Pretend it doesn't exist so that we can expand the following command */ - had_cmd = false; - } - } - else - { - /* It's a normal command */ - had_cmd = true; - if (tok_pos == subcmd_tok_begin_offset) - { - /* This is the token we care about! */ - previous_token_is_cmd = true; - } - } - } - break; - } - - case TOK_REDIRECT_NOCLOB: - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - { - if (!had_cmd) - { - break; - } - tok_next(&tok); - break; - } - - case TOK_PIPE: - case TOK_BACKGROUND: - case TOK_END: - { - had_cmd = false; - break; - } - - case TOK_COMMENT: - case TOK_ERROR: - default: - { - break; - } - } } - + + /* Now if we found a command node, expand it */ bool result = false; - if (previous_token_is_cmd) + if (matching_cmd_node != NULL) { - /* The token is a command. Try expanding it as an abbreviation. */ - const wcstring token = wcstring(subcmd, subcmd_tok_begin_offset, subcmd_tok_length); + assert(matching_cmd_node->type == parse_token_type_string); + const wcstring token = matching_cmd_node->get_source(subcmd); wcstring abbreviation; if (expand_abbreviation(token, &abbreviation)) { /* There was an abbreviation! Replace the token in the full command. Maintain the relative position of the cursor. */ if (output != NULL) { - size_t cmd_tok_begin_offset = subcmd_tok_begin_offset + subcmd_offset; output->assign(cmdline); - output->replace(cmd_tok_begin_offset, subcmd_tok_length, abbreviation); + output->replace(subcmd_offset + matching_cmd_node->source_start, matching_cmd_node->source_length, abbreviation); } result = true; } From e763345f25f1ddc6b4f149da12fe947a9c2c8a71 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 9 Oct 2013 03:45:58 -0700 Subject: [PATCH 033/108] Reduce child_count in node structure to 8 bits --- parse_tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse_tree.h b/parse_tree.h index 945d550c4..b83e47abc 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -176,7 +176,7 @@ public: /* Children */ node_offset_t child_start; - node_offset_t child_count; + uint8_t child_count; /* Which production was used */ uint8_t production_idx; From 58447c147f20d55555ed4035e3add1ccafec2998 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 9 Oct 2013 15:57:10 -0700 Subject: [PATCH 034/108] Make the new parser LL(2). Support for correct handling of e.g. 'command --help' --- fish_tests.cpp | 80 +++++++++++++++++++++++++++ highlight.cpp | 8 +-- parse_productions.cpp | 44 +++++++++++++-- parse_productions.h | 4 +- parse_tree.cpp | 125 +++++++++++++++++++++++++++++------------- parse_tree.h | 11 +++- 6 files changed, 219 insertions(+), 53 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 99ed6cd34..40a8d7db4 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1945,6 +1945,85 @@ static void test_new_parser_fuzzing(void) say(L"All fuzzed in %f seconds!", end - start); } +// Parse a statement, returning the command, args (joined by spaces), and the decoration. Returns true if successful. +static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args, enum parse_statement_decoration_t *out_deco) +{ + out_cmd->clear(); + out_joined_args->clear(); + *out_deco = parse_statement_decoration_none; + + bool result = false; + parse_node_tree_t tree; + parse_t parser; + if (parser.parse(src, parse_flag_none, &tree, NULL)) + { + /* Get the statement. Should only have one */ + const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement); + if (stmt_nodes.size() != 1) + { + say(L"Unexpected number of statements (%lu) found in '%ls'", stmt_nodes.size(), src.c_str()); + return false; + } + const parse_node_t &stmt = *stmt_nodes.at(0); + + /* Return its decoration */ + *out_deco = tree.decoration_for_plain_statement(stmt); + + /* Return its command */ + tree.command_for_plain_statement(stmt, src, out_cmd); + + /* Return arguments separated by spaces */ + const parse_node_tree_t::parse_node_list_t arg_nodes = tree.find_nodes(stmt, symbol_argument); + for (size_t i=0; i < arg_nodes.size(); i++) + { + if (i > 0) out_joined_args->push_back(L' '); + out_joined_args->append(arg_nodes.at(i)->get_source(src)); + } + result = true; + } + return result; +} + +/* Test the LL2 (two token lookahead) nature of the parser by exercising the special builtin and command handling. In particular, 'command foo' should be a decorated statement 'foo' but 'command --help' should be an undecorated statement 'command' with argument '--help', and NOT attempt to run a command called '--help' */ +static void test_new_parser_ll2(void) +{ + say(L"Testing parser two-token lookahead"); + + const struct + { + wcstring src; + wcstring cmd; + wcstring args; + enum parse_statement_decoration_t deco; + } tests[] = + { + {L"echo hello", L"echo", L"hello", parse_statement_decoration_none}, + {L"command echo hello", L"echo", L"hello", parse_statement_decoration_command}, + {L"command command hello", L"command", L"hello", parse_statement_decoration_command}, + {L"builtin command hello", L"command", L"hello", parse_statement_decoration_builtin}, + {L"command --help", L"command", L"--help", parse_statement_decoration_none}, + {L"command -h", L"command", L"-h", parse_statement_decoration_none}, + {L"command", L"command", L"", parse_statement_decoration_none}, + {L"function", L"function", L"", parse_statement_decoration_none}, + {L"function --help", L"function", L"--help", parse_statement_decoration_none} + }; + + for (size_t i=0; i < sizeof tests / sizeof *tests; i++) + { + wcstring cmd, args; + enum parse_statement_decoration_t deco = parse_statement_decoration_none; + bool success = test_1_parse_ll2(tests[i].src, &cmd, &args, &deco); + if (! success) + err(L"Parse of '%ls' failed on line %ld", tests[i].cmd.c_str(), (long)__LINE__); + if (cmd != tests[i].cmd) + err(L"When parsing '%ls', expected command '%ls' but got '%ls' on line %ld", tests[i].src.c_str(), tests[i].cmd.c_str(), cmd.c_str(), (long)__LINE__); + if (args != tests[i].args) + err(L"When parsing '%ls', expected args '%ls' but got '%ls' on line %ld", tests[i].src.c_str(), tests[i].args.c_str(), args.c_str(), (long)__LINE__); + if (deco != tests[i].deco) + err(L"When parsing '%ls', expected decoration %d but got %d on line %ld", tests[i].src.c_str(), (int)tests[i].deco, (int)deco, (long)__LINE__); + } +} + __attribute__((unused)) static void test_new_parser(void) { @@ -2125,6 +2204,7 @@ int main(int argc, char **argv) env_init(); test_highlighting(); + test_new_parser_ll2(); test_new_parser_fuzzing(); test_new_parser_correctness(); test_highlighting(); diff --git a/highlight.cpp b/highlight.cpp index 8fe9989b9..ffd5953c6 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -337,12 +337,10 @@ bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_ assert(plain_statement.type == symbol_plain_statement); bool result = false; - // Get the command - const parse_node_t *cmd_node = tree.get_child(plain_statement, 0, parse_token_type_string); - if (cmd_node != NULL && cmd_node->has_source()) + /* Get the command */ + wcstring cmd; + if (tree.command_for_plain_statement(plain_statement, src, &cmd)) { - wcstring cmd(src, cmd_node->source_start, cmd_node->source_length); - /* Try expanding it. If we cannot, it's an error. */ if (expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) { diff --git a/parse_productions.cpp b/parse_productions.cpp index 38d57ebab..90e4a99b8 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -8,7 +8,7 @@ static bool production_is_empty(const production_t production) return production[0] == token_type_invalid; } -// Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it +/* Empty productions are allowed but must be first. Validate that the given production is in the valid range, i.e. it is either not empty or there is a non-empty production after it */ static bool production_is_valid(const production_options_t production_list, production_option_idx_t which) { if (which < 0 || which >= MAX_PRODUCTIONS) @@ -26,9 +26,24 @@ static bool production_is_valid(const production_options_t production_list, prod return nonempty_found; } +/* Helper function indicates whether a token (typically second token) means 'help'. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. + + if naked_invocation_invokes_help is true, then we treat an invalid type or something other than a string as indicating help; this means that the user ran e.g. 'command' with no arguments. +*/ +static inline bool token_means_help(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) +{ + if (keyword == parse_keyword_dash_h || keyword == parse_keyword_dashdash_help) + return true; + + if (naked_invocation_invokes_help && type != parse_token_type_string) + return true; + + return false; +} + #define PRODUCTIONS(sym) static const production_options_t productions_##sym -#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) -#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; } +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) { return 0; } #define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) @@ -115,6 +130,17 @@ PRODUCTIONS(statement) = }; RESOLVE(statement) { + // Go to decorated statements if the subsequent token looks like '--help' + // If we are 'begin', then we expect to be invoked with no arguments. But if we are anything else, we require an argument, so do the same thing if the subsequent token is a line end. + if (token_type == parse_token_type_string) + { + bool naked_invocation_invokes_help = (token_keyword != parse_keyword_begin && token_keyword != parse_keyword_end); + if (token_means_help(token_type2, token_keyword2, naked_invocation_invokes_help)) + { + return 4; //decorated statement + } + } + switch (token_type) { case parse_token_type_string: @@ -149,6 +175,8 @@ RESOLVE(statement) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: + case parse_keyword_dash_h: + case parse_keyword_dashdash_help: return 4; } break; @@ -336,6 +364,10 @@ PRODUCTIONS(decorated_statement) = }; RESOLVE(decorated_statement) { + /* If this is e.g. 'command --help' then the command is 'command' and not a decoration */ + if (token_means_help(token_type2, token_keyword2, true /* naked_invocation_is_help */)) + return 0; + switch (token_keyword) { default: @@ -418,7 +450,7 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, wcstring *out_error_text) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_which_production, wcstring *out_error_text) { bool log_it = false; if (log_it) @@ -428,7 +460,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; - production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword) = NULL; + production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) = NULL; switch (node_type) { TEST(job_list) @@ -486,7 +518,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSE_ASSERT(resolver != NULL); const production_t *result = NULL; - production_option_idx_t which = resolver(input_type, input_keyword); + production_option_idx_t which = resolver(input_type, input_keyword, input_type2, input_keyword2); if (log_it) { diff --git a/parse_productions.h b/parse_productions.h index 7e132d0c4..298be0b1c 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -62,8 +62,8 @@ inline bool production_element_is_valid(production_element_t elem) return elem != token_type_invalid; } -/* Fetch a production */ -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, wcstring *out_error_text); +/* Fetch a production. We are passed two input tokens. The first input token is guaranteed to not be invalid; the second token may be invalid if there's no more tokens. */ +const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_idx, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index 900513f50..2066b8246 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -199,7 +199,7 @@ struct parse_token_t }; /* Convert from tokenizer_t's token type to a parse_token_t type */ -static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) +static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) { parse_token_type_t result = token_type_invalid; switch (tokenizer_token_type) @@ -447,7 +447,7 @@ class parse_ll_t } /* Input */ - void accept_token(parse_token_t token); + void accept_tokens(parse_token_t token1, parse_token_t token2); /* Indicate if we hit a fatal error */ bool has_fatal_error(void) const @@ -678,23 +678,23 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) return handled; } -void parse_ll_t::accept_token(parse_token_t token) +void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) { bool logit = false; if (logit) { - fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); + fprintf(stderr, "Accept token %ls\n", token1.describe().c_str()); } - PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); + PARSE_ASSERT(token1.type >= FIRST_PARSE_TOKEN_TYPE); bool consumed = false; // Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty. - if (token.type == parse_special_type_parse_error || token.type == parse_special_type_tokenizer_error || token.type == parse_special_type_comment) + if (token1.type == parse_special_type_parse_error || token1.type == parse_special_type_tokenizer_error || token1.type == parse_special_type_comment) { - parse_node_t err_node(token.type); - err_node.source_start = token.source_start; - err_node.source_length = token.source_length; + parse_node_t err_node(token1.type); + err_node.source_start = token1.source_start; + err_node.source_length = token1.source_length; nodes.push_back(err_node); consumed = true; } @@ -703,11 +703,11 @@ void parse_ll_t::accept_token(parse_token_t token) { PARSE_ASSERT(! symbol_stack.empty()); - if (top_node_handle_terminal_types(token)) + if (top_node_handle_terminal_types(token1)) { if (logit) { - fprintf(stderr, "Consumed token %ls\n", token.describe().c_str()); + fprintf(stderr, "Consumed token %ls\n", token1.describe().c_str()); } consumed = true; break; @@ -720,16 +720,16 @@ void parse_ll_t::accept_token(parse_token_t token) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, NULL /* error text */); + const production_t *production = production_for_token(stack_elem.type, token1.type, token1.keyword, token2.type, token2.keyword, &node.production_idx, NULL /* error text */); if (production == NULL) { if (should_generate_error_messages) { - this->parse_error(token, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token.describe().c_str()); + this->parse_error(token1, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token1.describe().c_str()); } else { - this->parse_error(token, NULL); + this->parse_error(token1, NULL); } // parse_error sets fatal_errored, which ends the loop } @@ -742,7 +742,7 @@ void parse_ll_t::accept_token(parse_token_t token) // If we end up with an empty stack, something bad happened, like an unbalanced end if (symbol_stack.empty()) { - this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); + this->parse_error(token1, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); } } } @@ -783,7 +783,9 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"or", parse_keyword_or}, {L"not", parse_keyword_not}, {L"command", parse_keyword_command}, - {L"builtin", parse_keyword_builtin} + {L"builtin", parse_keyword_builtin}, + {L"-h", parse_keyword_dash_h}, + {L"--help", parse_keyword_dashdash_help} }; for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) @@ -798,8 +800,38 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } +/* Placeholder invalid token */ +static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, -1, -1}; + +/* Return a new parse token, advancing the tokenizer */ +static inline parse_token_t next_parse_token(tokenizer_t *tok) +{ + if (! tok_has_next(tok)) + { + return kInvalidToken; + } + + token_type tok_type = static_cast(tok_last_type(tok)); + int tok_start = tok_get_pos(tok); + size_t tok_extent = tok_get_extent(tok); + assert(tok_extent < 10000000); //paranoia + const wchar_t *tok_txt = tok_last(tok); + + parse_token_t result; + result.type = parse_token_type_from_tokenizer_token(tok_type); + result.source_start = (size_t)tok_start; + result.source_length = tok_extent; + result.keyword = keyword_for_token(tok_type, tok_txt); + + tok_next(tok); + return result; +} + bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { + this->parser->set_should_generate_error_messages(errors != NULL); + + /* Construct the tokenizer */ tok_flags_t tok_options = TOK_SQUASH_ERRORS; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; @@ -807,32 +839,29 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n if (parse_flags & parse_flag_accept_incomplete_tokens) tok_options |= TOK_ACCEPT_UNFINISHED; - this->parser->set_should_generate_error_messages(errors != NULL); - tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); - for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok)) + + /* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */ + parse_token_t queue[2] = {kInvalidToken, next_parse_token(&tok)}; + + /* Go until the most recently added token is invalid. Note this may mean we don't process anything if there were no tokens. */ + while (queue[1].type != token_type_invalid) { - token_type tok_type = static_cast(tok_last_type(&tok)); - const wchar_t *tok_txt = tok_last(&tok); - int tok_start = tok_get_pos(&tok); - size_t tok_extent = tok_get_extent(&tok); - assert(tok_extent < 10000000); //paranoia - - parse_token_t token; - token.type = parse_token_type_from_tokenizer_token(tok_type); - token.source_start = (size_t)tok_start; - token.source_length = tok_extent; - token.keyword = keyword_for_token(tok_type, tok_txt); - this->parser->accept_token(token); - + /* Push a new token onto the queue */ + queue[0] = queue[1]; + queue[1] = next_parse_token(&tok); + + /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */ + this->parser->accept_tokens(queue[0], queue[1]); + + /* Handle errors */ if (this->parser->has_fatal_error()) { if (parse_flags & parse_flag_continue_after_error) { - /* Mark an error and then keep going */ - token.type = parse_special_type_parse_error; - token.keyword = parse_keyword_none; - this->parser->accept_token(token); + /* Mark a special error token, and then keep going */ + const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, -1, -1}; + this->parser->accept_tokens(token, kInvalidToken); this->parser->reset_symbols(); } else @@ -843,11 +872,10 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n } } + // Teach each node where its source range is this->parser->determine_node_ranges(); - // Tag nodes - // Acquire the output from the parser this->parser->acquire_output(output, errors); @@ -863,6 +891,8 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) { + const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1}; + // Only strings can have keywords. So if we have a keyword, the type must be a string assert(keyword == parse_keyword_none || token_type == parse_token_type_string); @@ -875,7 +905,7 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo bool wants_errors = (errors != NULL); this->parser->set_should_generate_error_messages(wants_errors); - this->parser->accept_token(token); + this->parser->accept_tokens(token, invalid_token); return ! this->parser->has_fatal_error(); } @@ -1007,3 +1037,20 @@ enum parse_statement_decoration_t parse_node_tree_t::decoration_for_plain_statem } return decoration; } + +bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const +{ + bool result = false; + assert(node.type == symbol_plain_statement); + const parse_node_t *cmd_node = this->get_child(node, 0, parse_token_type_string); + if (cmd_node != NULL && cmd_node->has_source()) + { + out_cmd->assign(src, cmd_node->source_start, cmd_node->source_length); + result = true; + } + else + { + out_cmd->clear(); + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index b83e47abc..941ddd4e2 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -112,8 +112,12 @@ enum parse_keyword_t parse_keyword_not, parse_keyword_command, parse_keyword_builtin, + + /* The following are not really keywords but are necessary for e.g. "command --help" to work */ + parse_keyword_dash_h, + parse_keyword_dashdash_help, - LAST_KEYWORD = parse_keyword_builtin + LAST_KEYWORD = parse_keyword_dashdash_help }; @@ -243,7 +247,12 @@ public: bool argument_list_is_root(const parse_node_t &node) const; /* Utilities */ + + /* Given a plain statement, get the decoration (from the parent node), or none if there is no decoration */ enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const; + + /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ + bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; }; From 5490f54d005dcc9b35409315aefc698203b81ff1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 01:17:55 -0700 Subject: [PATCH 035/108] Make parse_t::parse a static method so we don't have to create lots of useless parse_t objects --- builtin.cpp | 3 +-- fish_tests.cpp | 9 +++------ parse_tree.cpp | 8 +++++++- parse_tree.h | 6 ++++-- reader.cpp | 3 +-- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index b84d78e73..6e669e697 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3976,8 +3976,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) const wcstring src = str2wcstring(&txt.at(0), txt.size()); parse_node_tree_t parse_tree; parse_error_list_t errors; - parse_t parser; - bool success = parser.parse(src, parse_flag_none, &parse_tree, &errors, true); + bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true); if (! success) { stdout_buffer.append(L"Parsing failed:\n"); diff --git a/fish_tests.cpp b/fish_tests.cpp index 40a8d7db4..0cd7dbf08 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1845,8 +1845,7 @@ static void test_new_parser_correctness(void) const parser_test_t *test = &parser_tests[i]; parse_node_tree_t parse_tree; - parse_t parser; - bool success = parser.parse(test->src, parse_flag_none, &parse_tree, NULL); + bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL); say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no"); if (success && ! test->ok) { @@ -1954,8 +1953,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o bool result = false; parse_node_tree_t tree; - parse_t parser; - if (parser.parse(src, parse_flag_none, &tree, NULL)) + if (parse_t::parse(src, parse_flag_none, &tree, NULL)) { /* Get the statement. Should only have one */ const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement); @@ -2030,8 +2028,7 @@ static void test_new_parser(void) say(L"Testing new parser"); const wcstring src = L"echo hello world"; parse_node_tree_t parse_tree; - parse_t parser; - bool success = parser.parse(src, parse_flag_none, &parse_tree, NULL); + bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL); if (! success) { say(L"Parsing failed"); diff --git a/parse_tree.cpp b/parse_tree.cpp index 2066b8246..a1acdb13b 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -827,7 +827,7 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) return result; } -bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) +bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { this->parser->set_should_generate_error_messages(errors != NULL); @@ -889,6 +889,12 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n return ! this->parser->has_fatal_error(); } +bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) +{ + parse_t parse; + return parse.parse_internal(str, flags, output, errors, log_it); +} + bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) { const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1}; diff --git a/parse_tree.h b/parse_tree.h index 941ddd4e2..f6e913c25 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -141,12 +141,14 @@ class parse_t { parse_ll_t * const parser; + bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + public: parse_t(); ~parse_t(); - /* Parse a string */ - bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + /* Parse a string all at once */ + static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); /* Parse a single token */ bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors); diff --git a/reader.cpp b/reader.cpp index 0f022c279..b59f8086c 100644 --- a/reader.cpp +++ b/reader.cpp @@ -664,8 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso /* Parse this subcmd */ parse_node_tree_t parse_tree; - parse_t parser; - parser.parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Look for plain statements where the cursor is at the end of the command */ const parse_node_t *matching_cmd_node = NULL; From 77e358a001eea9a36e0c466b0af253d992c370bd Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 01:46:22 -0700 Subject: [PATCH 036/108] Support for parsing e.g. 'command --' as a plain statement, instead of executing the command '--'. --- fish_tests.cpp | 2 ++ parse_productions.cpp | 33 ++++++++++++++++++++++++--------- parse_tree.cpp | 2 ++ parse_tree.h | 2 ++ 4 files changed, 30 insertions(+), 9 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 0cd7dbf08..3884e470e 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2002,6 +2002,8 @@ static void test_new_parser_ll2(void) {L"command --help", L"command", L"--help", parse_statement_decoration_none}, {L"command -h", L"command", L"-h", parse_statement_decoration_none}, {L"command", L"command", L"", parse_statement_decoration_none}, + {L"command -", L"command", L"-", parse_statement_decoration_none}, + {L"command --", L"command", L"--", parse_statement_decoration_none}, {L"function", L"function", L"", parse_statement_decoration_none}, {L"function --help", L"function", L"--help", parse_statement_decoration_none} }; diff --git a/parse_productions.cpp b/parse_productions.cpp index 90e4a99b8..bb415e603 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -26,19 +26,32 @@ static bool production_is_valid(const production_options_t production_list, prod return nonempty_found; } -/* Helper function indicates whether a token (typically second token) means 'help'. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. +/* Helper function indicates whether a token (typically second token) causes the preceding token to be treated as a command instead of giving it a special role. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. if naked_invocation_invokes_help is true, then we treat an invalid type or something other than a string as indicating help; this means that the user ran e.g. 'command' with no arguments. */ -static inline bool token_means_help(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) +static inline bool token_implies_previous_keyword_is_command(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) { - if (keyword == parse_keyword_dash_h || keyword == parse_keyword_dashdash_help) - return true; + bool result = false; + switch (keyword) + { + case parse_keyword_dash: + case parse_keyword_dashdash: + case parse_keyword_dash_h: + case parse_keyword_dashdash_help: + result = true; + break; + + default: + break; + } - if (naked_invocation_invokes_help && type != parse_token_type_string) - return true; + if (! result) + { + result = naked_invocation_invokes_help && type != parse_token_type_string; + } - return false; + return result; } #define PRODUCTIONS(sym) static const production_options_t productions_##sym @@ -135,7 +148,7 @@ RESOLVE(statement) if (token_type == parse_token_type_string) { bool naked_invocation_invokes_help = (token_keyword != parse_keyword_begin && token_keyword != parse_keyword_end); - if (token_means_help(token_type2, token_keyword2, naked_invocation_invokes_help)) + if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, naked_invocation_invokes_help)) { return 4; //decorated statement } @@ -175,6 +188,8 @@ RESOLVE(statement) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: + case parse_keyword_dash: + case parse_keyword_dashdash: case parse_keyword_dash_h: case parse_keyword_dashdash_help: return 4; @@ -365,7 +380,7 @@ PRODUCTIONS(decorated_statement) = RESOLVE(decorated_statement) { /* If this is e.g. 'command --help' then the command is 'command' and not a decoration */ - if (token_means_help(token_type2, token_keyword2, true /* naked_invocation_is_help */)) + if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, true /* naked_invocation_is_help */)) return 0; switch (token_keyword) diff --git a/parse_tree.cpp b/parse_tree.cpp index a1acdb13b..793715ecb 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -784,6 +784,8 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"not", parse_keyword_not}, {L"command", parse_keyword_command}, {L"builtin", parse_keyword_builtin}, + {L"-", parse_keyword_dash}, + {L"--", parse_keyword_dashdash}, {L"-h", parse_keyword_dash_h}, {L"--help", parse_keyword_dashdash_help} }; diff --git a/parse_tree.h b/parse_tree.h index f6e913c25..c46eb9116 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -114,6 +114,8 @@ enum parse_keyword_t parse_keyword_builtin, /* The following are not really keywords but are necessary for e.g. "command --help" to work */ + parse_keyword_dash, + parse_keyword_dashdash, parse_keyword_dash_h, parse_keyword_dashdash_help, From ddec870d252c0ae84fd80f8b4b75d91a97e99395 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 02:46:49 -0700 Subject: [PATCH 037/108] Rework decision process for whether to interpret keywords as structural or as commands (for LL parser). Will allow 'builtin --' to parse as a plain statement, instead of a decorated statement '--' --- fish_tests.cpp | 1 + parse_productions.cpp | 103 +++++++++++++++++------------------------- parse_productions.h | 3 +- parse_tree.cpp | 39 +++++++--------- parse_tree.h | 19 +++++--- 5 files changed, 72 insertions(+), 93 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 3884e470e..f91f9d3b7 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2004,6 +2004,7 @@ static void test_new_parser_ll2(void) {L"command", L"command", L"", parse_statement_decoration_none}, {L"command -", L"command", L"-", parse_statement_decoration_none}, {L"command --", L"command", L"--", parse_statement_decoration_none}, + {L"builtin --names", L"builtin", L"--names", parse_statement_decoration_none}, {L"function", L"function", L"", parse_statement_decoration_none}, {L"function --help", L"function", L"--help", parse_statement_decoration_none} }; diff --git a/parse_productions.cpp b/parse_productions.cpp index bb415e603..528ca3cea 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -26,37 +26,9 @@ static bool production_is_valid(const production_options_t production_list, prod return nonempty_found; } -/* Helper function indicates whether a token (typically second token) causes the preceding token to be treated as a command instead of giving it a special role. This is so we can treat e.g. 'command --help' as "invoke the 'command' builtin with --help' instead of 'run the --help command'. - - if naked_invocation_invokes_help is true, then we treat an invalid type or something other than a string as indicating help; this means that the user ran e.g. 'command' with no arguments. -*/ -static inline bool token_implies_previous_keyword_is_command(parse_token_type_t type, parse_keyword_t keyword, bool naked_invocation_invokes_help) -{ - bool result = false; - switch (keyword) - { - case parse_keyword_dash: - case parse_keyword_dashdash: - case parse_keyword_dash_h: - case parse_keyword_dashdash_help: - result = true; - break; - - default: - break; - } - - if (! result) - { - result = naked_invocation_invokes_help && type != parse_token_type_string; - } - - return result; -} - #define PRODUCTIONS(sym) static const production_options_t productions_##sym -#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) -#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) { return 0; } +#define RESOLVE(sym) static production_option_idx_t resolve_##sym (const parse_token_t &token1, const parse_token_t &token2) +#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (const parse_token_t &input1, const parse_token_t &input2) { return 0; } #define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1) @@ -71,11 +43,11 @@ PRODUCTIONS(job_list) = RESOLVE(job_list) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: // 'end' is special - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_end: case parse_keyword_else: @@ -120,7 +92,7 @@ PRODUCTIONS(job_continuation) = }; RESOLVE(job_continuation) { - switch (token_type) + switch (token1.type) { case parse_token_type_pipe: // Pipe, continuation @@ -143,21 +115,29 @@ PRODUCTIONS(statement) = }; RESOLVE(statement) { - // Go to decorated statements if the subsequent token looks like '--help' + // Go to decorated statements if the subsequent token looks like '--' // If we are 'begin', then we expect to be invoked with no arguments. But if we are anything else, we require an argument, so do the same thing if the subsequent token is a line end. - if (token_type == parse_token_type_string) + if (token1.type == parse_token_type_string) { - bool naked_invocation_invokes_help = (token_keyword != parse_keyword_begin && token_keyword != parse_keyword_end); - if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, naked_invocation_invokes_help)) + // If the next token looks like an option (starts with a dash), then parse it as a decorated statement + if (token2.has_dash_prefix) { - return 4; //decorated statement + return 4; } + + // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if". + bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); + if (naked_invocation_invokes_help && token2.type != parse_token_type_string) + { + return 4; + } + } - switch (token_type) + switch (token1.type) { case parse_token_type_string: - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_and: case parse_keyword_or: @@ -188,10 +168,6 @@ RESOLVE(statement) case parse_keyword_command: case parse_keyword_builtin: case parse_keyword_case: - case parse_keyword_dash: - case parse_keyword_dashdash: - case parse_keyword_dash_h: - case parse_keyword_dashdash_help: return 4; } break; @@ -227,7 +203,7 @@ PRODUCTIONS(else_clause) = }; RESOLVE(else_clause) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_else: return 1; @@ -243,7 +219,7 @@ PRODUCTIONS(else_continuation) = }; RESOLVE(else_continuation) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_if: return 0; @@ -266,8 +242,8 @@ PRODUCTIONS(case_item_list) = }; RESOLVE(case_item_list) { - if (token_keyword == parse_keyword_case) return 1; - else if (token_type == parse_token_type_end) return 2; //empty line + if (token1.keyword == parse_keyword_case) return 1; + else if (token1.type == parse_token_type_end) return 2; //empty line else return 0; } @@ -284,7 +260,7 @@ PRODUCTIONS(argument_list) = }; RESOLVE(argument_list) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: return 1; @@ -308,7 +284,7 @@ PRODUCTIONS(block_header) = }; RESOLVE(block_header) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_else: return NO_PRODUCTION; @@ -358,7 +334,7 @@ PRODUCTIONS(boolean_statement) = }; RESOLVE(boolean_statement) { - switch (token_keyword) + switch (token1.keyword) { case parse_keyword_and: return 0; @@ -379,11 +355,13 @@ PRODUCTIONS(decorated_statement) = }; RESOLVE(decorated_statement) { - /* If this is e.g. 'command --help' then the command is 'command' and not a decoration */ - if (token_implies_previous_keyword_is_command(token_type2, token_keyword2, true /* naked_invocation_is_help */)) + /* If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the second token is not a string, then this is a naked 'command' and we should execute it as undecorated. */ + if (token2.type != parse_token_type_string || token2.has_dash_prefix) + { return 0; + } - switch (token_keyword) + switch (token1.keyword) { default: return 0; @@ -407,7 +385,7 @@ PRODUCTIONS(arguments_or_redirections_list) = }; RESOLVE(arguments_or_redirections_list) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: case parse_token_type_redirection: @@ -424,7 +402,7 @@ PRODUCTIONS(argument_or_redirection) = }; RESOLVE(argument_or_redirection) { - switch (token_type) + switch (token1.type) { case parse_token_type_string: return 0; @@ -455,7 +433,7 @@ PRODUCTIONS(optional_background) = RESOLVE(optional_background) { - switch (token_type) + switch (token1.type) { case parse_token_type_background: return 1; @@ -465,17 +443,17 @@ RESOLVE(optional_background) } #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; -const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_which_production, wcstring *out_error_text) +const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text) { bool log_it = false; if (log_it) { - fprintf(stderr, "Resolving production for %ls with input type %ls <%ls>\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str()); + fprintf(stderr, "Resolving production for %ls with input token <%ls>\n", token_type_description(node_type).c_str(), input1.describe().c_str()); } /* Fetch the list of productions and the function to resolve them */ const production_options_t *production_list = NULL; - production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, parse_token_type_t token_type2, parse_keyword_t token_keyword2) = NULL; + production_option_idx_t (*resolver)(const parse_token_t &input1, const parse_token_t &input2) = NULL; switch (node_type) { TEST(job_list) @@ -533,7 +511,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n PARSE_ASSERT(resolver != NULL); const production_t *result = NULL; - production_option_idx_t which = resolver(input_type, input_keyword, input_type2, input_keyword2); + production_option_idx_t which = resolver(input1, input2); if (log_it) { @@ -545,7 +523,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n { if (log_it) { - fprintf(stderr, "Token type '%ls' has no production for input type '%ls', keyword '%ls' (in %s)\n", token_type_description(node_type).c_str(), token_type_description(input_type).c_str(), keyword_description(input_keyword).c_str(), __FUNCTION__); + fprintf(stderr, "Node type '%ls' has no production for input '%ls' (in %s)\n", token_type_description(node_type).c_str(), input1.describe().c_str(), __FUNCTION__); } result = NULL; } @@ -557,3 +535,4 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n *out_which_production = which; return result; } + diff --git a/parse_productions.h b/parse_productions.h index 298be0b1c..18894ca78 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -14,7 +14,6 @@ namespace parse_productions #define MAX_PRODUCTIONS 5 #define MAX_SYMBOLS_PER_PRODUCTION 5 - typedef uint32_t production_tag_t; /* A production is an array of unsigned char. Symbols are encoded directly as their symbol value. Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together keywords and symbols. */ @@ -63,7 +62,7 @@ inline bool production_element_is_valid(production_element_t elem) } /* Fetch a production. We are passed two input tokens. The first input token is guaranteed to not be invalid; the second token may be invalid if there's no more tokens. */ -const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, parse_token_type_t input_type2, parse_keyword_t input_keyword2, production_option_idx_t *out_idx, wcstring *out_error_text); +const production_t *production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text); } diff --git a/parse_tree.cpp b/parse_tree.cpp index 793715ecb..207458ef6 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -179,24 +179,18 @@ wcstring parse_node_t::describe(void) const return result; } -/** A struct representing the token type passed to */ -struct parse_token_t -{ - enum parse_token_type_t type; // The type of the token as represented by the parser - enum parse_keyword_t keyword; // Any keyword represented by this parser - size_t source_start; - size_t source_length; - wcstring describe() const +/** Returns a string description of the given parse token */ +wcstring parse_token_t::describe() const +{ + wcstring result = token_type_description(type); + if (keyword != parse_keyword_none) { - wcstring result = token_type_description(type); - if (keyword != parse_keyword_none) - { - append_format(result, L" <%ls>", keyword_description(keyword).c_str()); - } - return result; + append_format(result, L" <%ls>", keyword_description(keyword).c_str()); } -}; + return result; +} + /* Convert from tokenizer_t's token type to a parse_token_t type */ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) @@ -720,7 +714,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) // Get the production for the top of the stack parse_stack_element_t &stack_elem = symbol_stack.back(); parse_node_t &node = nodes.at(stack_elem.node_idx); - const production_t *production = production_for_token(stack_elem.type, token1.type, token1.keyword, token2.type, token2.keyword, &node.production_idx, NULL /* error text */); + const production_t *production = production_for_token(stack_elem.type, token1, token2, &node.production_idx, NULL /* error text */); if (production == NULL) { if (should_generate_error_messages) @@ -783,11 +777,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) {L"or", parse_keyword_or}, {L"not", parse_keyword_not}, {L"command", parse_keyword_command}, - {L"builtin", parse_keyword_builtin}, - {L"-", parse_keyword_dash}, - {L"--", parse_keyword_dashdash}, - {L"-h", parse_keyword_dash_h}, - {L"--help", parse_keyword_dashdash_help} + {L"builtin", parse_keyword_builtin} }; for (size_t i=0; i < sizeof keywords / sizeof *keywords; i++) @@ -803,7 +793,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) } /* Placeholder invalid token */ -static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, -1, -1}; +static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, false, -1, -1}; /* Return a new parse token, advancing the tokenizer */ static inline parse_token_t next_parse_token(tokenizer_t *tok) @@ -820,10 +810,13 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) const wchar_t *tok_txt = tok_last(tok); parse_token_t result; + + /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard ant it even starts to look like a feature. */ result.type = parse_token_type_from_tokenizer_token(tok_type); + result.keyword = keyword_for_token(tok_type, tok_txt); + result.has_dash_prefix = (tok_txt[0] == L'-'); result.source_start = (size_t)tok_start; result.source_length = tok_extent; - result.keyword = keyword_for_token(tok_type, tok_txt); tok_next(tok); return result; diff --git a/parse_tree.h b/parse_tree.h index c46eb9116..c1bcbab96 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -113,13 +113,20 @@ enum parse_keyword_t parse_keyword_command, parse_keyword_builtin, - /* The following are not really keywords but are necessary for e.g. "command --help" to work */ - parse_keyword_dash, - parse_keyword_dashdash, - parse_keyword_dash_h, - parse_keyword_dashdash_help, + LAST_KEYWORD = parse_keyword_builtin +}; - LAST_KEYWORD = parse_keyword_dashdash_help + +/** A struct representing the token type that we use internally */ +struct parse_token_t +{ + enum parse_token_type_t type; // The type of the token as represented by the parser + enum parse_keyword_t keyword; // Any keyword represented by this token + bool has_dash_prefix; // Hackish: whether the source contains a dash prefix + size_t source_start; + size_t source_length; + + wcstring describe() const; }; From b60db798669faeaf87ccd4a8611b4c2b77ff0990 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 11:32:34 -0700 Subject: [PATCH 038/108] Modify fish_tests to support specifying the tests to run via arguments --- fish_tests.cpp | 90 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index f91f9d3b7..bb13bd2ee 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -63,6 +63,26 @@ #include "parse_tree.h" #include "parse_util.h" +static const char * const * s_arguments; + +/* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */ +static bool should_test_function(const char *func_name) +{ + /* No args, test everything */ + if (! s_arguments || ! s_arguments[0]) + return true; + + for (size_t i=0; s_arguments[i] != NULL; i++) + { + if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) + { + /* Prefix match */ + return true; + } + } + return false; +} + /** The number of tests to run */ @@ -1100,6 +1120,12 @@ static void test_complete(void) assert(completions.size() == 2); assert(completions.at(0).completion == L"$Foo1"); assert(completions.at(1).completion == L"$Bar1"); + + completions.clear(); + complete(L"echo (/bin/ech", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"o"); + complete_set_variable_names(NULL); @@ -2192,6 +2218,7 @@ int main(int argc, char **argv) configure_thread_assertions_for_testing(); program_name=L"(ignore)"; + s_arguments = argv; say(L"Testing low-level functionality"); set_main_thread(); @@ -2203,39 +2230,38 @@ int main(int argc, char **argv) reader_init(); env_init(); - test_highlighting(); - test_new_parser_ll2(); - test_new_parser_fuzzing(); - test_new_parser_correctness(); - test_highlighting(); - test_new_parser(); + if (should_test_function("highlighting")) test_highlighting(); + if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); + if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); + if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); + if (should_test_function("new_parser")) test_new_parser(); - test_format(); - test_escape(); - test_convert(); - test_convert_nulls(); - test_tok(); - test_fork(); - test_parser(); - test_utils(); - test_escape_sequences(); - test_lru(); - test_expand(); - test_fuzzy_match(); - test_abbreviations(); - test_test(); - test_path(); - test_word_motion(); - test_is_potential_path(); - test_colors(); - test_complete(); - test_completion_insertions(); - test_autosuggestion_combining(); - test_autosuggest_suggest_special(); - history_tests_t::test_history(); - history_tests_t::test_history_merge(); - history_tests_t::test_history_races(); - history_tests_t::test_history_formats(); + if (should_test_function("format")) test_format(); + if (should_test_function("escape")) test_escape(); + if (should_test_function("convert")) test_convert(); + if (should_test_function("convert_nulls")) test_convert_nulls(); + if (should_test_function("tok")) test_tok(); + if (should_test_function("fork")) test_fork(); + if (should_test_function("parser")) test_parser(); + if (should_test_function("utils")) test_utils(); + if (should_test_function("escape_sequences")) test_escape_sequences(); + if (should_test_function("lru")) test_lru(); + if (should_test_function("expand")) test_expand(); + if (should_test_function("fuzzy_match")) test_fuzzy_match(); + if (should_test_function("abbreviations")) test_abbreviations(); + if (should_test_function("test")) test_test(); + if (should_test_function("path")) test_path(); + if (should_test_function("word_motion")) test_word_motion(); + if (should_test_function("is_potential_path")) test_is_potential_path(); + if (should_test_function("colors")) test_colors(); + if (should_test_function("complete")) test_complete(); + if (should_test_function("completion_insertions")) test_completion_insertions(); + if (should_test_function("autosuggestion_combining")) test_autosuggestion_combining(); + if (should_test_function("autosuggest_suggest_special")) test_autosuggest_suggest_special(); + if (should_test_function("history")) history_tests_t::test_history(); + if (should_test_function("history_merge")) history_tests_t::test_history_merge(); + if (should_test_function("history_races")) history_tests_t::test_history_races(); + if (should_test_function("history_formats")) history_tests_t::test_history_formats(); //history_tests_t::test_history_speed(); say(L"Encountered %d errors in low-level tests", err_count); From cbd8a27a6d1ec705032486851203f8c4d1b4f56f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 12:04:31 -0700 Subject: [PATCH 039/108] Beef up completion tests --- fish_tests.cpp | 34 ++++++++++++++++++++++++++++++++-- function.cpp | 8 +++++++- function.h | 2 +- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index bb13bd2ee..b52b612d1 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1122,12 +1122,42 @@ static void test_complete(void) assert(completions.at(1).completion == L"$Bar1"); completions.clear(); - complete(L"echo (/bin/ech", completions, COMPLETION_REQUEST_DEFAULT); + complete(L"echo (/bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); assert(completions.size() == 1); - assert(completions.at(0).completion == L"o"); + assert(completions.at(0).completion == L"r"); + completions.clear(); + complete(L"echo (ls /bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"r"); + + completions.clear(); + complete(L"echo (command ls /bin/mkdi", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"r"); + + /* Add a function and test completing it in various ways */ + struct function_data_t func_data; + func_data.name = L"scuttlebutt"; + func_data.definition = L"echo gongoozle"; + function_add(func_data, parser_t::principal_parser()); + /* Complete a function name */ + completions.clear(); + complete(L"echo (scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 1); + assert(completions.at(0).completion == L"t"); + /* But not with the command prefix */ + completions.clear(); + complete(L"echo (command scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 0); + + /* Not with the builtin prefix */ + completions.clear(); + complete(L"echo (builtin scuttlebut", completions, COMPLETION_REQUEST_DEFAULT); + assert(completions.size() == 0); + complete_set_variable_names(NULL); } diff --git a/function.cpp b/function.cpp index 08fb85560..fef342720 100644 --- a/function.cpp +++ b/function.cpp @@ -192,7 +192,13 @@ void function_add(const function_data_t &data, const parser_t &parser) /* Create and store a new function */ const wchar_t *filename = reader_current_filename(); - int def_offset = parser.line_number_of_character_at_offset(parser.current_block->tok_pos) - 1; + + int def_offset = -1; + if (parser.current_block != NULL) + { + def_offset = parser.line_number_of_character_at_offset(parser.current_block->tok_pos); + } + const function_map_t::value_type new_pair(data.name, function_info_t(data, filename, def_offset, is_autoload)); loaded_functions.insert(new_pair); diff --git a/function.h b/function.h index 2f8dfc36c..fd9455706 100644 --- a/function.h +++ b/function.h @@ -39,7 +39,7 @@ struct function_data_t /** Function definition */ - wchar_t *definition; + const wchar_t *definition; /** List of all event handlers for this function */ From ddf98661e4f16f75bb3deea26ec0c1e3bc651263 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 12 Oct 2013 18:17:03 -0700 Subject: [PATCH 040/108] Adopt new parser in tab completions --- complete.cpp | 320 +++++++++++++++++++------------------------------ highlight.cpp | 9 +- parse_tree.cpp | 28 +++++ parse_tree.h | 9 ++ reader.cpp | 3 + 5 files changed, 169 insertions(+), 200 deletions(-) diff --git a/complete.cpp b/complete.cpp index 8df02b35a..e25042059 100644 --- a/complete.cpp +++ b/complete.cpp @@ -44,6 +44,7 @@ #include "parser_keywords.h" #include "wutil.h" #include "path.h" +#include "parse_tree.h" /* Completion description strings, mostly for different types of files, such as sockets, block devices, etc. @@ -1363,7 +1364,9 @@ struct local_options_t bool completer_t::complete_param(const wcstring &scmd_orig, const wcstring &spopt, const wcstring &sstr, bool use_switches) { - const wchar_t * const cmd_orig = scmd_orig.c_str(), * const popt = spopt.c_str(), * const str = sstr.c_str(); + const wchar_t * const cmd_orig = scmd_orig.c_str(); + const wchar_t * const popt = spopt.c_str(); + const wchar_t * const str = sstr.c_str(); bool use_common=1, use_files=1; @@ -1790,231 +1793,160 @@ bool completer_t::try_complete_user(const wcstring &str) return res; } -void complete(const wcstring &cmd, std::vector &comps, completion_request_flags_t flags, wcstring_list_t *commands_to_load) +void complete(const wcstring &cmd_with_subcmds, std::vector &comps, completion_request_flags_t flags, wcstring_list_t *commands_to_load) { + /* Determine the innermost subcommand */ + const wchar_t *cmdsubst_begin, *cmdsubst_end; + parse_util_cmdsubst_extent(cmd_with_subcmds.c_str(), cmd_with_subcmds.size(), &cmdsubst_begin, &cmdsubst_end); + assert(cmdsubst_begin != NULL && cmdsubst_end != NULL && cmdsubst_end >= cmdsubst_begin); + const wcstring cmd = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin); + /* Make our completer */ completer_t completer(cmd, flags); - const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end; - wcstring current_token, prev_token; wcstring current_command; - int on_command=0; - size_t pos; + const size_t pos = cmd.size(); bool done=false; - int use_command = 1; - int use_function = 1; - int use_builtin = 1; - int had_ddash = 0; + bool use_command = 1; + bool use_function = 1; + bool use_builtin = 1; // debug( 1, L"Complete '%ls'", cmd ); - size_t cursor_pos = cmd.size(); - const wchar_t *cmd_cstr = cmd.c_str(); - parse_util_cmdsubst_extent(cmd_cstr, cursor_pos, &cmdsubst_begin, &cmdsubst_end); - parse_util_token_extent(cmd_cstr, cursor_pos, &tok_begin, &tok_end, &prev_begin, &prev_end); - - if (!cmdsubst_begin) - done=1; - + const wchar_t *tok_begin = NULL, *prev_begin = NULL, *prev_end = NULL; + parse_util_token_extent(cmd_cstr, cmd.size(), &tok_begin, NULL, &prev_begin, &prev_end); /** If we are completing a variable name or a tilde expansion user name, we do that and return. No need for any other completions. */ + + const wcstring current_token = tok_begin; if (!done) { - wcstring tmp = tok_begin; - done = completer.try_complete_variable(tmp) || completer.try_complete_user(tmp); + done = completer.try_complete_variable(current_token) || completer.try_complete_user(current_token); } if (!done) { - pos = cursor_pos-(cmdsubst_begin-cmd_cstr); - - const wcstring buff = wcstring(cmdsubst_begin, cmdsubst_end-cmdsubst_begin); - - int had_cmd=0; - int end_loop=0; - - tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - while (tok_has_next(&tok) && !end_loop) + //const size_t prev_token_len = (prev_begin ? prev_end - prev_begin : 0); + //const wcstring prev_token(prev_begin, prev_token_len); + + parse_node_tree_t tree; + parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL); + + /* Find the plain statement that contains the position */ + const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL); + if (plain_statement != NULL) { - switch (tok_last_type(&tok)) + assert(plain_statement->has_source() && plain_statement->type == symbol_plain_statement); + + /* Get the command node */ + const parse_node_t *cmd_node = tree.get_child(*plain_statement, 0, parse_token_type_string); + + /* Get the actual command string */ + if (cmd_node != NULL) + current_command = cmd_node->get_source(cmd); + + /* Check the decoration */ + switch (tree.decoration_for_plain_statement(*plain_statement)) { - - case TOK_STRING: + case parse_statement_decoration_none: + use_command = true; + use_function = false; + use_builtin = false; + break; + + case parse_statement_decoration_command: + use_command = true; + use_function = false; + use_builtin = false; + break; + + case parse_statement_decoration_builtin: + use_command = false; + use_function = false; + use_builtin = true; + break; + } + + if (cmd_node && cmd_node->location_in_or_at_end_of_source_range(pos)) + { + /* Complete command filename */ + completer.complete_cmd(current_token, use_function, use_builtin, use_command); + } + else + { + /* Get all the arguments */ + const parse_node_tree_t::parse_node_list_t all_arguments = tree.find_nodes(*plain_statement, symbol_argument); + + /* See whether we are in an argument. We may also be in a redirection, or nothing at all. */ + size_t matching_arg_index = -1; + for (size_t i=0; i < all_arguments.size(); i++) { - - const wcstring ncmd = tok_last(&tok); - int is_ddash = (ncmd == L"--") && ((tok_get_pos(&tok)+2) < (long)pos); - - if (!had_cmd) + const parse_node_t *node = all_arguments.at(i); + if (node->location_in_or_at_end_of_source_range(pos)) { - - if (parser_keywords_is_subcommand(ncmd)) - { - if (ncmd == L"builtin") - { - use_function = 0; - use_command = 0; - use_builtin = 1; - } - else if (ncmd == L"command") - { - use_command = 1; - use_function = 0; - use_builtin = 0; - } - break; - } - - - if (!is_ddash || - ((use_command && use_function && use_builtin))) - { - current_command = ncmd; - - size_t token_end = tok_get_pos(&tok) + ncmd.size(); - - on_command = (pos <= token_end); - had_cmd=1; - } - + matching_arg_index = i; + break; } - else - { - if (is_ddash) - { - had_ddash = 1; - } - } - - break; - } - - case TOK_END: - case TOK_PIPE: - case TOK_BACKGROUND: - { - had_cmd=0; - had_ddash = 0; - use_command = 1; - use_function = 1; - use_builtin = 1; - break; - } - - case TOK_ERROR: - { - end_loop=1; - break; } - default: + bool had_ddash = false; + wcstring current_argument, previous_argument; + if (matching_arg_index != (size_t)(-1)) { - break; + /* Get the current argument and the previous argument, if we have one */ + current_argument = all_arguments.at(matching_arg_index)->get_source(cmd); + + if (matching_arg_index > 0) + previous_argument = all_arguments.at(matching_arg_index - 1)->get_source(cmd); + + /* Check to see if we have a preceding double-dash */ + for (size_t i=0; i < matching_arg_index; i++) + { + if (all_arguments.at(i)->get_source(cmd) == L"--") + { + had_ddash = true; + break; + } + } } + + bool do_file = false; + + wcstring current_command_unescape = current_command; + wcstring previous_argument_unescape = previous_argument; + wcstring current_argument_unescape = current_argument; + + if (unescape_string(current_command_unescape, 0) && + unescape_string(previous_argument_unescape, 0) && + unescape_string(current_argument_unescape, UNESCAPE_INCOMPLETE)) + { + do_file = completer.complete_param(current_command_unescape, + previous_argument_unescape, + current_argument_unescape, + !had_ddash); + } + + /* If we have found no command specific completions at all, fall back to using file completions. */ + if (completer.empty()) + do_file = true; + + /* But if we are planning on loading commands, don't do file completions. + See https://github.com/fish-shell/fish-shell/issues/378 */ + if (commands_to_load != NULL && completer.has_commands_to_load()) + do_file = false; + + /* And if we're autosuggesting, and the token is empty, don't do file suggestions */ + if ((flags & COMPLETION_REQUEST_AUTOSUGGESTION) && current_argument_unescape.empty()) + do_file = false; + + /* This function wants the unescaped string */ + completer.complete_param_expand(current_token, do_file); } - - if (tok_get_pos(&tok) >= (long)pos) - { - end_loop=1; - } - - tok_next(&tok); - - } - - /* - Get the string to complete - */ - - current_token.assign(tok_begin, cursor_pos-(tok_begin-cmd_cstr)); - - if (prev_begin) - { - prev_token.assign(prev_begin, prev_end - prev_begin); - } - else - { - prev_token.clear(); - } - -// debug( 0, L"on_command: %d, %ls %ls\n", on_command, current_command, current_token ); - - /* - Check if we are using the 'command' or 'builtin' builtins - _and_ we are writing a switch instead of a command. In that - case, complete using the builtins completions, not using a - subcommand. - */ - - if ((on_command || current_token == L"--") && - string_prefixes_string(L"-", current_token) && - !(use_command && use_function && use_builtin)) - { - if (use_command == 0) - current_command = L"builtin"; - else - current_command = L"command"; - - had_cmd = 1; - on_command = 0; - } - - /* - Use command completions if in between commands - */ - if (!had_cmd) - { - on_command=1; - } - - - if (on_command) - { - /* Complete command filename */ - completer.complete_cmd(current_token, use_function, use_builtin, use_command); - } - else - { - bool do_file = false; - - wcstring current_command_unescape = current_command; - wcstring prev_token_unescape = prev_token; - wcstring current_token_unescape = current_token; - - if (unescape_string(current_command_unescape, 0) && - unescape_string(prev_token_unescape, 0) && - unescape_string(current_token_unescape, UNESCAPE_INCOMPLETE)) - { - do_file = completer.complete_param(current_command_unescape, - prev_token_unescape, - current_token_unescape, - !had_ddash); - } - - /* If we have found no command specific completions at - all, fall back to using file completions. - */ - if (completer.empty()) - do_file = true; - - /* But if we are planning on loading commands, don't do file completions. - See https://github.com/fish-shell/fish-shell/issues/378 */ - if (commands_to_load != NULL && completer.has_commands_to_load()) - do_file = false; - - /* And if we're autosuggesting, and the token is empty, don't do file suggestions */ - if ((flags & COMPLETION_REQUEST_AUTOSUGGESTION) && current_token_unescape.empty()) - do_file = false; - - /* - This function wants the unescaped string - */ - completer.complete_param_expand(current_token, do_file); } } diff --git a/highlight.cpp b/highlight.cpp index ffd5953c6..c4ad7d92e 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -713,8 +713,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t parser; - parser.parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Find the last statement */ const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); @@ -1709,8 +1708,7 @@ class highlighter_t { /* Parse the tree */ this->parse_tree.clear(); - parse_t parser; - parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); + parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); } /* Perform highlighting, returning an array of colors */ @@ -1920,8 +1918,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t parser; - parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); + parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); #if 0 const wcstring dump = parse_dump_tree(parse_tree, buff); diff --git a/parse_tree.cpp b/parse_tree.cpp index 207458ef6..87e2b3dc0 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1013,6 +1013,34 @@ const parse_node_t *parse_node_tree_t::find_last_node_of_type(parse_token_type_t return result; } +const parse_node_t *parse_node_tree_t::find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const +{ + const parse_node_t *result = NULL; + // Find nodes of the given type in the tree, working backwards + const size_t len = this->size(); + for (size_t idx=0; idx < len; idx++) + { + const parse_node_t &node = this->at(idx); + + /* Types must match */ + if (node.type != type) + continue; + + /* Must contain source location */ + if (! node.location_in_or_at_end_of_source_range(source_loc)) + continue; + + /* If a parent is given, it must be an ancestor */ + if (parent != NULL && node_has_ancestor(*this, node, *parent)) + continue; + + /* Found it */ + result = &node; + break; + } + return result; +} + bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const { diff --git a/parse_tree.h b/parse_tree.h index c1bcbab96..62ffb622a 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -224,6 +224,12 @@ public: else return wcstring(str, this->source_start, this->source_length); } + + /* Returns whether the given location is within the source range or at its end */ + bool location_in_or_at_end_of_source_range(size_t loc) const + { + return has_source() && source_start <= loc && loc - source_start <= source_length; + } }; /* Statement decorations. This matches the order of productions in decorated_statement */ @@ -254,6 +260,9 @@ public: /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; + /* Finds a node containing the given source location */ + const parse_node_t *find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const; + /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ bool argument_list_is_root(const parse_node_t &node) const; diff --git a/reader.cpp b/reader.cpp index b59f8086c..6c1e4a52d 100644 --- a/reader.cpp +++ b/reader.cpp @@ -3187,6 +3187,9 @@ const wchar_t *reader_readline(void) /* Figure out the extent of the token within the command substitution. Note we pass cmdsub_begin here, not buff */ const wchar_t *token_begin, *token_end; parse_util_token_extent(cmdsub_begin, data->buff_pos - (cmdsub_begin-buff), &token_begin, &token_end, 0, 0); + + /* Hack: the token may extend past the end of the command substitution, e.g. in (echo foo) the last token is 'foo)'. Don't let that happen. */ + if (token_end > cmdsub_end) token_end = cmdsub_end; /* Figure out how many steps to get from the current position to the end of the current token. */ size_t end_of_token_offset = token_end - buff; From 41e562ebc22d3e4dfc444e14b579236b82e08b1f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 13 Oct 2013 13:26:52 -0700 Subject: [PATCH 041/108] Clean up redirection parsing in the tokenizer. --- tokenizer.cpp | 166 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 60 deletions(-) diff --git a/tokenizer.cpp b/tokenizer.cpp index 1ef0bf5dc..8a6fe58a8 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -50,7 +50,7 @@ segments. /** Error string for when trying to pipe from fd 0 */ -#define PIPE_ERROR _( L"Can not use fd 0 as pipe output" ) +#define PIPE_ERROR _( L"Cannot use stdin (fd 0) as pipe output" ) /** Characters that separate tokens. They are ordered by frequency of occurrence to increase parsing speed. @@ -435,66 +435,92 @@ static void read_comment(tokenizer_t *tok) tok->last_type = TOK_COMMENT; } -/** - Read a FD redirection. +/* Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were consumed. If zero, then this string was not a redirection. + + Also returns by reference the redirection mode, and the fd to redirection. */ -static void read_redirect(tokenizer_t *tok, int fd) +static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type *out_redirection_mode, int *out_fd) { + bool errored = false; + int fd = 0; enum token_type redirection_mode = TOK_NONE; - if ((*tok->buff == L'>') || - (*tok->buff == L'^')) + size_t idx = 0; + + /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character */ + for (; iswdigit(buff[idx]); idx++) { - tok->buff++; - if (*tok->buff == *(tok->buff-1)) + int digit = buff[idx] - L'0'; + fd = fd * 10 + digit; + } + + if (idx == 0) + { + /* We did not find a leading digit, so there's no explicit fd. Infer it from the type */ + switch (buff[idx]) { - tok->buff++; - redirection_mode = TOK_REDIRECT_APPEND; - } - else - { - redirection_mode = TOK_REDIRECT_OUT; - } - - if (*tok->buff == L'|') - { - if (fd == 0) - { - TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); - return; - } - tok->buff++; - tok->last_token = to_string(fd); - tok->last_type = TOK_PIPE; - return; + case L'>': fd = STDOUT_FILENO; break; + case L'<': fd = STDIN_FILENO; break; + case L'^': fd = STDERR_FILENO; break; + default: errored = true; break; } } - else if (*tok->buff == L'<') + + /* Either way we should have ended on the redirection character itself like '>' */ + wchar_t redirect_char = buff[idx++]; //note increment of idx + if (redirect_char == L'>' || redirect_char == L'^') + { + redirection_mode = TOK_REDIRECT_OUT; + if (buff[idx] == redirect_char) + { + /* Doubled up like ^^ or >>. That means append */ + redirection_mode = TOK_REDIRECT_APPEND; + idx++; + } + } + else if (redirect_char == L'<') { - tok->buff++; redirection_mode = TOK_REDIRECT_IN; } else { - TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + /* Something else */ + errored = true; } - - tok->last_token = to_string(fd); - - if (*tok->buff == L'&') + + /* Optional characters like & or ?, or the pipe char | */ + wchar_t opt_char = buff[idx]; + if (opt_char == L'&') { - tok->buff++; - tok->last_type = TOK_REDIRECT_FD; + redirection_mode = TOK_REDIRECT_FD; + idx++; } - else if (*tok->buff == L'?') + else if (opt_char == L'?') { - tok->buff++; - tok->last_type = TOK_REDIRECT_NOCLOB; + redirection_mode = TOK_REDIRECT_NOCLOB; + idx++; } - else + else if (opt_char == L'|') { - tok->last_type = redirection_mode; + /* So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets handled elsewhere. */ + redirection_mode = TOK_PIPE; + idx++; } + + /* Don't return valid-looking stuff on error */ + if (errored) + { + idx = 0; + redirection_mode = TOK_NONE; + } + + /* Return stuff */ + if (out_redirection_mode != NULL) + *out_redirection_mode = redirection_mode; + if (out_fd != NULL) + *out_fd = fd; + + return idx; } wchar_t tok_last_quote(tokenizer_t *tok) @@ -606,36 +632,56 @@ void tok_next(tokenizer_t *tok) break; case L'>': - read_redirect(tok, 1); - return; case L'<': - read_redirect(tok, 0); - return; case L'^': - read_redirect(tok, 2); - return; + { + /* There's some duplication with the code in the default case below. The key difference here is that we must never parse these as a string; a failed redirection is an error! */ + enum token_type mode = TOK_NONE; + int fd = -1; + size_t consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + if (consumed == 0) + { + TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); + } + } + break; default: { + /* Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string */ + size_t consumed = 0; + enum token_type mode = TOK_NONE; + int fd = -1; if (iswdigit(*tok->buff)) + consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + + if (consumed > 0) { - const wchar_t *orig = tok->buff; - int fd = 0; - while (iswdigit(*tok->buff)) - fd = (fd*10) + (*(tok->buff++) - L'0'); - - switch (*(tok->buff)) + /* It looks like a redirection or a pipe. But we don't support piping fd 0. */ + if (mode == TOK_PIPE && fd == 0) { - case L'^': - case L'>': - case L'<': - read_redirect(tok, fd); - return; + TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); } - tok->buff = orig; } - read_string(tok); + else + { + /* Not a redirection or pipe, so just a stirng */ + read_string(tok); + } } + break; } From ce7c681462a34a0bb04b23b35dfdf7337a4e4dab Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 13 Oct 2013 16:46:02 -0700 Subject: [PATCH 042/108] Fix for bogus completions for function names --- complete.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/complete.cpp b/complete.cpp index e25042059..a9fe01ad5 100644 --- a/complete.cpp +++ b/complete.cpp @@ -1855,8 +1855,8 @@ void complete(const wcstring &cmd_with_subcmds, std::vector &comps { case parse_statement_decoration_none: use_command = true; - use_function = false; - use_builtin = false; + use_function = true; + use_builtin = true; break; case parse_statement_decoration_command: From ce857b077048ced8326e21161385d2bd823c4e05 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 13 Oct 2013 16:58:40 -0700 Subject: [PATCH 043/108] Syntax highlighting for file redirections --- fish_tests.cpp | 81 +++++++++++++++++++--- highlight.cpp | 152 ++++++++++++++++++++++++++++++++++++++++-- parse_productions.cpp | 4 +- parse_tree.cpp | 32 ++++++++- parse_tree.h | 7 +- tokenizer.cpp | 29 ++++++-- tokenizer.h | 3 + 7 files changed, 277 insertions(+), 31 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index b52b612d1..e013743ec 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -396,6 +396,18 @@ static void test_tok() } } } + + /* Test redirection_type_for_string */ + if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"^") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>") != TOK_REDIRECT_OUT) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L">>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>>") != TOK_REDIRECT_APPEND) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>?") != TOK_REDIRECT_NOCLOB) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"9999999999999999>?") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>&3") != TOK_REDIRECT_FD) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); + if (redirection_type_for_string(L"2>|") != TOK_NONE) err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); } static int test_fork_helper(void *unused) @@ -2182,9 +2194,59 @@ static void test_highlighting(void) {L")", HIGHLIGHT_OPERATOR}, {NULL, -1} }; + + // Redirections substitutions + const highlight_component_t components8[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"param1", HIGHLIGHT_PARAM}, + + /* Input redirection */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/bin/echo", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to a valid fd */ + {L"1>&2", HIGHLIGHT_REDIRECTION}, + + /* Output redirection to an invalid fd */ + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"LOL", HIGHLIGHT_ERROR}, + + /* Just a param, not a redirection */ + {L"/tmp/blah", HIGHLIGHT_PARAM}, + + /* Input redirection from directory */ + {L"<", HIGHLIGHT_REDIRECTION}, + {L"/tmp/", HIGHLIGHT_ERROR}, + + /* Output redirection to an invalid path */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/not/a/valid/path/nope", HIGHLIGHT_ERROR}, + + /* Output redirection to directory */ + {L"3>", HIGHLIGHT_REDIRECTION}, + {L"/tmp/nope/", HIGHLIGHT_ERROR}, + + + /* Redirections to overflow fd */ + {L"99999999999999999999>&2", HIGHLIGHT_ERROR}, + {L"2>&", HIGHLIGHT_REDIRECTION}, + {L"99999999999999999999", HIGHLIGHT_ERROR}, + + /* Output redirection containing a command substitution */ + {L"4>", HIGHLIGHT_REDIRECTION}, + {L"(", HIGHLIGHT_OPERATOR}, + {L"echo", HIGHLIGHT_COMMAND}, + {L"/tmp/somewhere", HIGHLIGHT_PARAM}, + {L")", HIGHLIGHT_OPERATOR}, + + /* Just another param */ + {L"param2", HIGHLIGHT_PARAM}, + {NULL, -1} + }; - const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7}; + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; @@ -2206,14 +2268,7 @@ static void test_highlighting(void) expected_colors.push_back(0); } text.append(components[i].txt); - - // hackish space handling - const size_t text_len = wcslen(components[i].txt); - for (size_t j=0; j < text_len; j++) - { - bool is_space = (components[i].txt[j] == L' '); - expected_colors.push_back(is_space ? 0 : components[i].color); - } + expected_colors.resize(text.size(), components[i].color); } assert(expected_colors.size() == text.size()); @@ -2227,6 +2282,10 @@ static void test_highlighting(void) assert(expected_colors.size() == colors.size()); for (size_t i=0; i < text.size(); i++) { + // Hackish space handling. We don't care about the colors in spaces. + if (text.at(i) == L' ') + continue; + if (expected_colors.at(i) != colors.at(i)) { const wcstring spaces(i, L' '); @@ -2248,7 +2307,7 @@ int main(int argc, char **argv) configure_thread_assertions_for_testing(); program_name=L"(ignore)"; - s_arguments = argv; + s_arguments = argv + 1; say(L"Testing low-level functionality"); set_main_thread(); @@ -2262,7 +2321,7 @@ int main(int argc, char **argv) if (should_test_function("highlighting")) test_highlighting(); if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); - if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); + //if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); if (should_test_function("new_parser")) test_new_parser(); diff --git a/highlight.cpp b/highlight.cpp index c4ad7d92e..3acaf4968 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "fallback.h" #include "util.h" @@ -1692,9 +1693,15 @@ class highlighter_t /* Color an argument */ void color_argument(const parse_node_t &node); + /* Color a redirection */ + void color_redirection(const parse_node_t &node); + /* Color the arguments of the given node */ void color_arguments(const parse_node_t &list_node); + /* Color the redirections of the given node */ + void color_redirections(const parse_node_t &list_node); + /* Color all the children of the command with the given type */ void color_children(const parse_node_t &parent, parse_token_type_t type, int color); @@ -1729,6 +1736,7 @@ void highlighter_t::color_node(const parse_node_t &node, int color) std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end, color); } +/* node does not necessarily have type symbol_argument here */ void highlighter_t::color_argument(const parse_node_t &node) { if (! node.has_source()) @@ -1819,7 +1827,7 @@ void highlighter_t::color_arguments(const parse_node_t &list_node) /* Find all the arguments of this list */ const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_argument); - for (node_offset_t i=0; i < nodes.size(); i++) + for (size_t i=0; i < nodes.size(); i++) { const parse_node_t *child = nodes.at(i); assert(child != NULL && child->type == symbol_argument); @@ -1841,6 +1849,141 @@ void highlighter_t::color_arguments(const parse_node_t &list_node) } } +void highlighter_t::color_redirection(const parse_node_t &redirection_node) +{ + assert(redirection_node.type == symbol_redirection); + if (! redirection_node.has_source()) + return; + + const parse_node_t *redirection_primitive = this->parse_tree.get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->parse_tree.get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL) + { + wcstring target; + const enum token_type redirect_type = this->parse_tree.type_for_redirection(redirection_node, this->buff, &target); + + /* We may get a TOK_NONE redirection type, e.g. if the redirection is invalid */ + this->color_node(*redirection_primitive, redirect_type == TOK_NONE ? HIGHLIGHT_ERROR : HIGHLIGHT_REDIRECTION); + + /* Check if the argument contains a command substitution. If so, highlight it as a param even though it's a command redirection, and don't try to do any other validation. */ + if (parse_util_locate_cmdsubst(target.c_str(), NULL, NULL, true) != 0) + { + if (redirection_target != NULL) + this->color_argument(*redirection_target); + } + else + { + /* No command substitution, so we can highlight the target file or fd. For example, disallow redirections into a non-existent directory */ + bool target_is_valid = true; + + if (! expand_one(target, EXPAND_SKIP_CMDSUBST)) + { + /* Could not be expanded */ + target_is_valid = false; + } + else + { + /* Ok, we successfully expanded our target. Now verify that it works with this redirection. We will probably need it as a path (but not in the case of fd redirections */ + const wcstring target_path = apply_working_directory(target, this->working_directory); + switch (redirect_type) + { + case TOK_REDIRECT_FD: + { + /* target should be an fd. It must be all digits, and must not overflow. fish_wcstoi returns INT_MAX on overflow; we could instead check errno to disambiguiate this from a real INT_MAX fd, but instead we just disallow that. */ + const wchar_t *target_cstr = target.c_str(); + wchar_t *end = NULL; + int fd = fish_wcstoi(target_cstr, &end, 10); + + /* The iswdigit check ensures there's no leading whitespace, the *end check ensures the entire string was consumed, and the numeric checks ensure the fd is at least zero and there was no overflow */ + target_is_valid = (iswdigit(target_cstr[0]) && *end == L'\0' && fd >= 0 && fd < INT_MAX); + } + break; + + case TOK_REDIRECT_IN: + { + /* Input redirections must have a readable non-directory */ + struct stat buf = {}; + target_is_valid = ! waccess(target_path, R_OK) && ! wstat(target_path, &buf) && ! S_ISDIR(buf.st_mode); + } + break; + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_NOCLOB: + { + /* Test whether the file exists, and whether it's writable (possibly after creating it). access() returns failure if the file does not exist. */ + bool file_exists = false, file_is_writable = false; + int err = 0; + + struct stat buf = {}; + if (wstat(target_path, &buf) < 0) + { + err = errno; + } + + if (string_suffixes_string(L"/", target)) + { + /* Redirections to things that are directories is definitely not allowed */ + file_exists = false; + file_is_writable = false; + } + else if (err == 0) + { + /* No err. We can write to it if it's not a directory and we have permission */ + file_exists = true; + file_is_writable = ! S_ISDIR(buf.st_mode) && ! waccess(target_path, W_OK); + } + else if (err == ENOENT) + { + /* File does not exist. Check if its parent directory is writable. */ + wcstring parent = wdirname(target_path); + + /* Ensure that the parent ends with the path separator. This will ensure that we get an error if the parent directory is not really a directory. */ + if (! string_suffixes_string(L"/", parent)) + parent.push_back(L'/'); + + /* Now the file is considered writable if the parent directory is writable */ + file_exists = false; + file_is_writable = (0 == waccess(parent, W_OK)); + } + else + { + /* Other errors we treat as not writable. This includes things like ENOTDIR. */ + file_exists = false; + file_is_writable = false; + } + + /* NOCLOB means that we must not overwrite files that exist */ + target_is_valid = file_is_writable && ! (file_exists && redirect_type == TOK_REDIRECT_NOCLOB); + } + break; + + default: + /* We should not get here, since the node was marked as a redirection, but treat it as an error for paranoia */ + target_is_valid = false; + break; + } + } + + if (redirection_target != NULL) + { + this->color_node(*redirection_target, target_is_valid ? HIGHLIGHT_REDIRECTION : HIGHLIGHT_ERROR); + } + } + } +} + +// Color all of the redirections of the given command +void highlighter_t::color_redirections(const parse_node_t &list_node) +{ + const parse_node_tree_t::parse_node_list_t nodes = this->parse_tree.find_nodes(list_node, symbol_redirection); + for (size_t i=0; i < nodes.size(); i++) + { + this->color_redirection(*nodes.at(i)); + } +} + /* Color all the children of the command with the given type */ void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type, int color) { @@ -1950,12 +2093,6 @@ const highlighter_t::color_array_t & highlighter_t::highlight() } break; - case symbol_redirection: - { - this->color_children(node, parse_token_type_string, HIGHLIGHT_REDIRECTION); - } - break; - case parse_token_type_background: case parse_token_type_end: { @@ -1994,6 +2131,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() if (parse_tree.argument_list_is_root(node)) { this->color_arguments(node); + this->color_redirections(node); } } break; diff --git a/parse_productions.cpp b/parse_productions.cpp index 528ca3cea..227955453 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -398,7 +398,7 @@ RESOLVE(arguments_or_redirections_list) PRODUCTIONS(argument_or_redirection) = { {symbol_argument}, - {parse_token_type_redirection} + {symbol_redirection} }; RESOLVE(argument_or_redirection) { @@ -421,7 +421,7 @@ RESOLVE_ONLY(argument) PRODUCTIONS(redirection) = { - {parse_token_type_redirection} + {parse_token_type_redirection, parse_token_type_string} }; RESOLVE_ONLY(redirection) diff --git a/parse_tree.cpp b/parse_tree.cpp index 87e2b3dc0..97421dab1 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -519,8 +519,11 @@ void parse_ll_t::determine_node_ranges(void) for (node_offset_t i=0; i < parent->child_count; i++) { const parse_node_t &child = nodes.at(parent->child_offset(i)); - min_start = std::min(min_start, child.source_start); - max_end = std::max(max_end, child.source_start + child.source_length); + if (child.has_source()) + { + min_start = std::min(min_start, child.source_start); + max_end = std::max(max_end, child.source_start + child.source_length); + } } if (min_start != source_start_invalid) @@ -691,6 +694,10 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) err_node.source_length = token1.source_length; nodes.push_back(err_node); consumed = true; + + /* tokenizer errors are fatal */ + if (token1.type == parse_special_type_tokenizer_error) + this->fatal_errored = true; } while (! consumed && ! this->fatal_errored) @@ -811,7 +818,7 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) parse_token_t result; - /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard ant it even starts to look like a feature. */ + /* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it even starts to look like a feature. */ result.type = parse_token_type_from_tokenizer_token(tok_type); result.keyword = keyword_for_token(tok_type, tok_txt); result.has_dash_prefix = (tok_txt[0] == L'-'); @@ -906,6 +913,7 @@ bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keywo bool wants_errors = (errors != NULL); this->parser->set_should_generate_error_messages(wants_errors); + /* Passing invalid_token here is totally wrong. This code is only used in testing however. */ this->parser->accept_tokens(token, invalid_token); return ! this->parser->has_fatal_error(); @@ -1083,3 +1091,21 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co } return result; } + +enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const +{ + assert(redirection_node.type == symbol_redirection); + enum token_type result = TOK_NONE; + const parse_node_t *redirection_primitive = this->get_child(redirection_node, 0, parse_token_type_redirection); //like 2> + const parse_node_t *redirection_target = this->get_child(redirection_node, 1, parse_token_type_string); //like &1 or file path + + if (redirection_primitive != NULL && redirection_primitive->has_source()) + { + result = redirection_type_for_string(redirection_primitive->get_source(src)); + } + if (out_target != NULL) + { + *out_target = redirection_target ? redirection_target->get_source(src) : L""; + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index 62ffb622a..79cae8ccb 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -273,7 +273,9 @@ public: /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; - + + /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ + enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const; }; /* Fish grammar: @@ -332,7 +334,8 @@ public: argument_or_redirection arguments_or_redirections_list argument_or_redirection = argument | redirection argument = - redirection = + + redirection = terminator = | diff --git a/tokenizer.cpp b/tokenizer.cpp index 8a6fe58a8..2416ce9d6 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -435,9 +435,11 @@ static void read_comment(tokenizer_t *tok) tok->last_type = TOK_COMMENT; } + + /* Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were consumed. If zero, then this string was not a redirection. - Also returns by reference the redirection mode, and the fd to redirection. + Also returns by reference the redirection mode, and the fd to redirection. If there is overflow, *out_fd is set to -1. */ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type *out_redirection_mode, int *out_fd) { @@ -447,13 +449,17 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type * size_t idx = 0; - /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character */ + /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character. Watch out for overflow. */ + long long big_fd = 0; for (; iswdigit(buff[idx]); idx++) { - int digit = buff[idx] - L'0'; - fd = fd * 10 + digit; + /* Note that it's important we consume all the digits here, even if it overflows. */ + if (big_fd <= INT_MAX) + big_fd = big_fd * 10 + (buff[idx] - L'0'); } + fd = (big_fd > INT_MAX ? -1 : static_cast(big_fd)); + if (idx == 0) { /* We did not find a leading digit, so there's no explicit fd. Infer it from the type */ @@ -523,6 +529,17 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type * return idx; } +enum token_type redirection_type_for_string(const wcstring &str) +{ + enum token_type mode = TOK_NONE; + int fd = 0; + read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); + /* Redirections only, no pipes */ + if (mode == TOK_PIPE || fd < 0) + mode = TOK_NONE; + return mode; +} + wchar_t tok_last_quote(tokenizer_t *tok) { CHECK(tok, 0); @@ -639,7 +656,7 @@ void tok_next(tokenizer_t *tok) enum token_type mode = TOK_NONE; int fd = -1; size_t consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); - if (consumed == 0) + if (consumed == 0 || fd < 0) { TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); } @@ -663,7 +680,7 @@ void tok_next(tokenizer_t *tok) if (consumed > 0) { - /* It looks like a redirection or a pipe. But we don't support piping fd 0. */ + /* It looks like a redirection or a pipe. But we don't support piping fd 0. Note that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer error. */ if (mode == TOK_PIPE && fd == 0) { TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); diff --git a/tokenizer.h b/tokenizer.h index dec206a58..8e130f0e7 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -187,6 +187,9 @@ const wchar_t *tok_get_desc(int type); */ int tok_get_error(tokenizer_t *tok); +/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid */ +enum token_type redirection_type_for_string(const wcstring &str); + enum move_word_style_t { move_word_style_punctuation, //stop at punctuation From e8ba3c2f4de8c60808e9c919cc3e947d15136e21 Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Mon, 14 Oct 2013 09:12:45 +0200 Subject: [PATCH 044/108] Fix compilation errors under Clang. --- parse_productions.h | 1 + parse_tree.h | 1 + 2 files changed, 2 insertions(+) diff --git a/parse_productions.h b/parse_productions.h index 18894ca78..e6c003ab2 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -7,6 +7,7 @@ #define FISH_PARSE_TREE_CONSTRUCTION_H #include "parse_tree.h" +#include namespace parse_productions { diff --git a/parse_tree.h b/parse_tree.h index 79cae8ccb..8621cea8f 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -13,6 +13,7 @@ #include "common.h" #include "tokenizer.h" #include +#include #define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0) From 22d22f6aa883a6f48e9df0cd55254faa2cfc425e Mon Sep 17 00:00:00 2001 From: Konrad Borowski Date: Mon, 14 Oct 2013 11:45:29 +0200 Subject: [PATCH 045/108] Remove undefined behavior from parse_error(). Having function that takes arbitrary number of arguments without actually reading them is undefined behavior, as it could cause stack to be in the corrupted state. Now arguments after token are parsed, even if they aren't needed. See also: http://asciinema.org/a/5904 --- parse_tree.cpp | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index 97421dab1..3e0c52566 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -341,6 +341,7 @@ class parse_ll_t bool top_node_handle_terminal_types(parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token); + void parse_error(parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); @@ -551,19 +552,25 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e this->symbol_stack.clear(); } -void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) +void parse_ll_t::parse_error(parse_token_t token) { this->fatal_errored = true; +} + +void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) +{ + parse_error(token); + + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + if (this->should_generate_error_messages) { - //this->dump_stack(); - parse_error_t err; - - va_list va; - va_start(va, fmt); - err.text = vformat_string(fmt, va); - va_end(va); - err.source_start = token.source_start; err.source_length = token.source_length; this->errors.push_back(err); @@ -730,7 +737,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) } else { - this->parse_error(token1, NULL); + this->parse_error(token1); } // parse_error sets fatal_errored, which ends the loop } From b06e7983733578d783bc585e7b0fc95f401b2e8d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 16 Oct 2013 01:17:27 -0700 Subject: [PATCH 046/108] Revert "Remove undefined behavior from parse_error()." Per my understanding this is not undefined behavior. No ABI depends on the called function reading variadic arguments, nor does any standard require it. So if this is crashing something else must be going on. This reverts commit 22d22f6aa883a6f48e9df0cd55254faa2cfc425e. --- parse_tree.cpp | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index 3e0c52566..97421dab1 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -341,7 +341,6 @@ class parse_ll_t bool top_node_handle_terminal_types(parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token); - void parse_error(parse_token_t token); void parse_error(parse_token_t token, const wchar_t *format, ...); void append_error_callout(wcstring &error_message, parse_token_t token); @@ -552,25 +551,19 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e this->symbol_stack.clear(); } -void parse_ll_t::parse_error(parse_token_t token) -{ - this->fatal_errored = true; -} - void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) { - parse_error(token); - - //this->dump_stack(); - parse_error_t err; - - va_list va; - va_start(va, fmt); - err.text = vformat_string(fmt, va); - va_end(va); - + this->fatal_errored = true; if (this->should_generate_error_messages) { + //this->dump_stack(); + parse_error_t err; + + va_list va; + va_start(va, fmt); + err.text = vformat_string(fmt, va); + va_end(va); + err.source_start = token.source_start; err.source_length = token.source_length; this->errors.push_back(err); @@ -737,7 +730,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) } else { - this->parse_error(token1); + this->parse_error(token1, NULL); } // parse_error sets fatal_errored, which ends the loop } From 2a1fd421bda502cc183415163eea1a216da79576 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 25 Nov 2013 00:48:01 -0800 Subject: [PATCH 047/108] Correctly detect unbalanced 'end' in syntax highlighting --- fish_tests.cpp | 8 +++++++- parse_tree.cpp | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 153ca70ea..b5bc3a20e 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2263,9 +2263,15 @@ static void test_highlighting(void) {L"param2", HIGHLIGHT_PARAM}, {NULL, -1} }; + + const highlight_component_t components9[] = + { + {L"end", HIGHLIGHT_ERROR}, + {NULL, -1} + }; - const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8}; + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; diff --git a/parse_tree.cpp b/parse_tree.cpp index 97421dab1..24cf45986 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -862,7 +862,7 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags if (parse_flags & parse_flag_continue_after_error) { /* Mark a special error token, and then keep going */ - const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, -1, -1}; + const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[0].source_start, queue[0].source_length}; this->parser->accept_tokens(token, kInvalidToken); this->parser->reset_symbols(); } From f2211ff3ad826896a0903bdfa3f2ffea93049caf Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 26 Nov 2013 00:01:23 -0800 Subject: [PATCH 048/108] Update some tests --- fish_tests.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index b5bc3a20e..c22c71e70 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2002,11 +2002,13 @@ static void test_new_parser_fuzzing(void) { say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t)); double start = timef(); + bool log_it = false; // ensure nothing crashes - size_t max = 5; + size_t max = 4; for (size_t len=1; len <= max; len++) { - fprintf(stderr, "%lu / %lu...", len, max); + if (log_it) + fprintf(stderr, "%lu / %lu...", len, max); std::vector tokens(len); size_t count = 0; parse_t parser; @@ -2025,10 +2027,12 @@ static void test_new_parser_fuzzing(void) // keep going until we wrap } while (! increment(tokens)); - fprintf(stderr, "done (%lu)\n", count); + if (log_it) + fprintf(stderr, "done (%lu)\n", count); } double end = timef(); - say(L"All fuzzed in %f seconds!", end - start); + if (log_it) + say(L"All fuzzed in %f seconds!", end - start); } // Parse a statement, returning the command, args (joined by spaces), and the decoration. Returns true if successful. @@ -2266,6 +2270,9 @@ static void test_highlighting(void) const highlight_component_t components9[] = { + {L"end", HIGHLIGHT_ERROR}, + {L";", HIGHLIGHT_END}, + {L"if", HIGHLIGHT_COMMAND}, {L"end", HIGHLIGHT_ERROR}, {NULL, -1} }; @@ -2346,7 +2353,7 @@ int main(int argc, char **argv) if (should_test_function("highlighting")) test_highlighting(); if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); - //if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive + if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); if (should_test_function("new_parser")) test_new_parser(); if (should_test_function("escape")) test_unescape_sane(); From dd0cc5ed9fa60f4bae5530d1708a2974eb0c454f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 8 Dec 2013 13:41:12 -0800 Subject: [PATCH 049/108] Rewriting indenting functionality to use new parser --- fish_tests.cpp | 194 +++++++++++++++++++++++++++++++++++++----- parse_productions.cpp | 1 + parse_tree.cpp | 30 ++++--- parse_tree.h | 1 - parse_util.cpp | 115 +++++++++++++++++++++++++ parse_util.h | 2 + parser.cpp | 5 +- parser.h | 9 +- reader.cpp | 9 +- 9 files changed, 328 insertions(+), 38 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index c57af8abe..b4d37f8b8 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -64,23 +64,32 @@ #include "parse_util.h" static const char * const * s_arguments; +static int s_test_run_count = 0; /* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */ static bool should_test_function(const char *func_name) { /* No args, test everything */ + bool result = false; if (! s_arguments || ! s_arguments[0]) - return true; - - for (size_t i=0; s_arguments[i] != NULL; i++) { - if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) + result = true; + } + else + { + for (size_t i=0; s_arguments[i] != NULL; i++) { - /* Prefix match */ - return true; + if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) + { + /* Prefix match */ + result = true; + break; + } } } - return false; + if (result) + s_test_run_count++; + return result; } /** @@ -640,6 +649,147 @@ static void test_parser() } } +static void test_indents() +{ + say(L"Testing indents"); + + // Here are the components of our source and the indents we expect those to be + struct indent_component_t { + const wchar_t *txt; + int indent; + }; + + const indent_component_t components1[] = + { + {L"if foo", 0}, + {L"end", 0}, + {NULL, -1} + }; + + const indent_component_t components2[] = + { + {L"if foo", 0}, + {L"", 1}, //trailing newline! + {NULL, -1} + }; + + const indent_component_t components3[] = + { + {L"if foo", 0}, + {L"foo", 1}, + {L"end", 0}, //trailing newline! + {NULL, -1} + }; + + const indent_component_t components4[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"end", 1}, + {L"end", 0}, + {L"", 0}, + {NULL, -1} + }; + + const indent_component_t components5[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"", 2}, + {NULL, -1} + }; + + const indent_component_t components6[] = + { + {L"begin", 0}, + {L"foo", 1}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components7[] = + { + {L"begin; end", 0}, + {L"foo", 0}, + {L"", 0}, + {NULL, -1} + }; + + const indent_component_t components8[] = + { + {L"if foo", 0}, + {L"if bar", 1}, + {L"baz", 2}, + {L"end", 1}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components9[] = + { + {L"switch foo", 0}, + {L"", 1}, + {NULL, -1} + }; + + const indent_component_t components10[] = + { + {L"switch foo", 0}, + {L"case bar", 1}, + {L"case baz", 1}, + {L"quux", 2}, + {L"", 2}, + {NULL, -1} + }; + + + + const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10}; + for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) + { + const indent_component_t *components = tests[which]; + // Count how many we have + size_t component_count = 0; + while (components[component_count].txt != NULL) + { + component_count++; + } + + // Generate the expected indents + wcstring text; + std::vector expected_indents; + for (size_t i=0; i < component_count; i++) + { + if (i > 0) + { + text.push_back(L'\n'); + expected_indents.push_back(components[i].indent); + } + text.append(components[i].txt); + expected_indents.resize(text.size(), components[i].indent); + } + assert(expected_indents.size() == text.size()); + + // Compute the indents + std::vector indents = parse_util_compute_indents(text); + + if (expected_indents.size() != indents.size()) + { + err(L"Indent vector has wrong size! Expected %lu, actual %lu", expected_indents.size(), indents.size()); + } + assert(expected_indents.size() == indents.size()); + for (size_t i=0; i < text.size(); i++) + { + if (expected_indents.at(i) != indents.at(i)) + { + err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i, which + 1, expected_indents.at(i), indents.at(i), text.c_str()); + break; //don't keep showing errors for the rest of the line + } + } + + } +} + static void test_utils() { say(L"Testing utils"); @@ -2176,25 +2326,26 @@ static void test_new_parser_ll2(void) } } -__attribute__((unused)) -static void test_new_parser(void) +static void test_new_parser_ad_hoc(void) { - say(L"Testing new parser"); - const wcstring src = L"echo hello world"; + /* Very ad-hoc tests for issues encountered */ + say(L"Testing new parser ad hoc tests"); + + /* Ensure that 'case' terminates a job list */ + const wcstring src = L"switch foo ; case bar; case baz; end"; parse_node_tree_t parse_tree; bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL); if (! success) { - say(L"Parsing failed"); + err(L"Parsing failed"); } - else + + /* Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd try to run a command 'case' */ + const parse_node_t &root = parse_tree.at(0); + const parse_node_tree_t::parse_node_list_t node_list = parse_tree.find_nodes(root, symbol_case_item_list); + if (node_list.size() != 3) { -#if 0 - parse_execution_context_t ctx(parse_tree, src); - say(L"Simulating execution:"); - wcstring simulation = ctx.simulate(); - say(simulation.c_str()); -#endif + err(L"Expected 3 case item nodes, found %lu", node_list.size()); } } @@ -2415,7 +2566,7 @@ int main(int argc, char **argv) if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); - if (should_test_function("new_parser")) test_new_parser(); + if (should_test_function("new_parser_ad_hoc")) test_new_parser_ad_hoc(); if (should_test_function("escape")) test_unescape_sane(); if (should_test_function("escape")) test_escape_crazy(); if (should_test_function("format")) test_format(); @@ -2425,6 +2576,7 @@ int main(int argc, char **argv) if (should_test_function("fork")) test_fork(); if (should_test_function("iothread")) test_iothread(); if (should_test_function("parser")) test_parser(); + if (should_test_function("indents")) test_indents(); if (should_test_function("utils")) test_utils(); if (should_test_function("escape_sequences")) test_escape_sequences(); if (should_test_function("lru")) test_lru(); @@ -2447,6 +2599,8 @@ int main(int argc, char **argv) //history_tests_t::test_history_speed(); say(L"Encountered %d errors in low-level tests", err_count); + if (s_test_run_count == 0) + say(L"*** No Tests Were Actually Run! ***"); /* Skip performance tests for now, since they seem to hang when running from inside make (?) diff --git a/parse_productions.cpp b/parse_productions.cpp index 227955453..c3ab9c3ab 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -51,6 +51,7 @@ RESOLVE(job_list) { case parse_keyword_end: case parse_keyword_else: + case parse_keyword_case: // End this job list return 0; diff --git a/parse_tree.cpp b/parse_tree.cpp index 24cf45986..ad0dd0ea9 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -240,10 +240,10 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum toke } /* Helper function for dump_tree */ -static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) +static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, size_t *line) { - assert(start < nodes.size()); - const parse_node_t &node = nodes.at(start); + assert(node_idx < nodes.size()); + const parse_node_t &node = nodes.at(node_idx); const size_t spacesPerIndent = 2; @@ -253,26 +253,33 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & if (indent > 0) indent -= 1; } - append_format(*result, L"%2lu - %l2u ", *line, start); + append_format(*result, L"%2lu - %l2u ", *line, node_idx); result->append(indent * spacesPerIndent, L' ');; result->append(node.describe()); if (node.child_count > 0) { append_format(*result, L" <%lu children>", node.child_count); } - if (node.type == parse_token_type_string) + + if (node.has_source() && node.type == parse_token_type_string) { - if (node.source_start == -1) + result->append(L": \""); + result->append(src, node.source_start, node.source_length); + result->append(L"\""); + } + + if (node.type != parse_token_type_string) + { + if (node.has_source()) { - append_format(*result, L" (no source)"); + append_format(*result, L" [%ld, %ld]", (long)node.source_start, (long)node.source_length); } else { - result->append(L": \""); - result->append(src, node.source_start, node.source_length); - result->append(L"\""); + append_format(*result, L" [no src]", (long)node.source_start, (long)node.source_length); } } + result->push_back(L'\n'); ++*line; for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) @@ -658,7 +665,8 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) if (matched) { - // Success. Tell the node that it matched this token + // Success. Tell the node that it matched this token, and what its source range is + // In the parse phase, we only set source ranges for terminal types. We propagate ranges to parent nodes afterwards. parse_node_t &node = node_for_top_symbol(); node.source_start = token.source_start; node.source_length = token.source_length; diff --git a/parse_tree.h b/parse_tree.h index 8621cea8f..d5b48331f 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -253,7 +253,6 @@ public: /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; - /* Find all the nodes of a given type underneath a given node */ typedef std::vector parse_node_list_t; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; diff --git a/parse_util.cpp b/parse_util.cpp index abcf019c3..842c6f75b 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -38,6 +38,7 @@ #include "env.h" #include "signal.h" #include "wildcard.h" +#include "parse_tree.h" /** Maximum number of autoloaded items opf a specific type to keep in @@ -804,3 +805,117 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote) } return result; } + +/* We are given a parse tree, the index of a node within the tree, its indent, and a vector of indents the same size as the original source string. Set the indent correspdonding to the node's source range, if appropriate. + + trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false ' then we have an if node with an empty job list (without source) but we want the last line to be indented anyways. + + switch statements also indent. +*/ +static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, int node_indent, parse_token_type_t parent_type, std::vector *indents, int *trailing_indent) +{ + /* Guard against incomplete trees */ + if (node_idx > tree.size()) + return; + + /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */ + + const parse_node_t &node = tree.at(node_idx); + const parse_token_type_t node_type = node.type; + + /* Increment the indent if we are either a root job_list, or root case_item_list */ + const bool is_root_job_list = (node_type == symbol_job_list && parent_type != symbol_job_list); + const bool is_root_case_item_list = (node_type == symbol_case_item_list && parent_type != symbol_case_item_list); + if (is_root_job_list || is_root_case_item_list) + { + node_indent += 1; + } + + /* If we have source, store the trailing indent unconditionally. If we do not have source, store the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job lists from affecting the trailing indent. For example, code like this: + + if foo + + will be parsed as this: + + job_list + job + if_statement + job [if] + job_list [empty] + job_list [empty] + + There's two "terminal" job lists, and we want the innermost one. + + Note we are relying on the fact that nodes are in the same order as the source, i.e. an in-order traversal of the node tree also traverses the source from beginning to end. + */ + if (node.has_source() || node_indent > *trailing_indent) + { + *trailing_indent = node_indent; + } + + + /* Store the indent into the indent array */ + if (node.has_source()) + { + assert(node.source_start < indents->size()); + indents->at(node.source_start) = node_indent; + } + + + /* Recursive to all our children */ + for (node_offset_t idx = 0; idx < node.child_count; idx++) + { + /* Note we pass our type to our child, which becomes its parent node type */ + compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, trailing_indent); + } +} + +std::vector parse_util_compute_indents(const wcstring &src) +{ + /* Make a vector the same size as the input string, which contains the indents. Initialize them to -1. */ + const size_t src_size = src.size(); + std::vector indents(src_size, -1); + + parse_node_tree_t tree; + parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); + + /* The indent that we'll get for the last line */ + int trailing_indent = 0; + + /* Invoke the recursive version. As a hack, pass job_list for the 'parent' token, which will prevent the really-root job list from indenting */ + compute_indents_recursive(tree, 0 /* node index */, 0/* current indent */, symbol_job_list, &indents, &trailing_indent); + + int last_indent = 0; + for (size_t i=0; i parse_util_compute_indents(const wcstring &src); #endif diff --git a/parser.cpp b/parser.cpp index 974ba1fe4..f9282a242 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2911,7 +2911,7 @@ struct block_info_t bool has_had_case; //if we are a switch, whether we've encountered a case }; -int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix) +parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix) { ASSERT_IS_MAIN_THREAD(); @@ -2926,7 +2926,6 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w // These are very nearly stacks, but sometimes we have to inspect non-top elements (e.g. return) std::vector block_infos; int indentation_sum = 0; //sum of indentation in block_infos - int res = 0; /* Set to 1 if the current command is inside a pipeline @@ -3704,6 +3703,8 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w if (! block_infos.empty()) unfinished = 1; + parser_test_error_bits_t res = 0; + if (err) res |= PARSER_TEST_ERROR; diff --git a/parser.h b/parser.h index 8b43b83fc..076baceff 100644 --- a/parser.h +++ b/parser.h @@ -13,8 +13,11 @@ #include "function.h" #include -#define PARSER_TEST_ERROR 1 -#define PARSER_TEST_INCOMPLETE 2 +enum { + PARSER_TEST_ERROR = 1, + PARSER_TEST_INCOMPLETE = 2 +}; +typedef unsigned int parser_test_error_bits_t; /** event_blockage_t represents a block on events of the specified type @@ -484,7 +487,7 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - int test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL); /** Test if the specified string can be parsed as an argument list, diff --git a/reader.cpp b/reader.cpp index a09a0bda6..ac0e52f51 100644 --- a/reader.cpp +++ b/reader.cpp @@ -519,7 +519,14 @@ wcstring combine_command_and_autosuggestion(const wcstring &cmdline, const wcstr static void reader_repaint() { // Update the indentation - parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]); + if (0) + { + parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]); + } + else + { + data->indents = parse_util_compute_indents(data->command_line); + } // Combine the command and autosuggestion into one string wcstring full_line = combine_command_and_autosuggestion(data->command_line, data->autosuggestion); From 67b1f14a6f6d095af9141640c6d26771a5236693 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 8 Dec 2013 14:13:23 -0800 Subject: [PATCH 050/108] Better support for parse errors in indenting --- builtin.cpp | 12 +++++++----- fish_tests.cpp | 9 ++++++++- parse_util.cpp | 35 ++++++++++++++++++++++++++++------- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 59f3cf719..3172dbdc5 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3985,12 +3985,14 @@ int builtin_parse(parser_t &parser, wchar_t **argv) stdout_buffer.append(errors.at(i).describe(src)); stdout_buffer.push_back(L'\n'); } + + stdout_buffer.append(L"(Reparsed with continue after error)\n"); + parse_tree.clear(); + errors.clear(); + parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true); } - else - { - const wcstring dump = parse_dump_tree(parse_tree, src); - fprintf(stderr, "%ls", dump.c_str()); - } + const wcstring dump = parse_dump_tree(parse_tree, src); + fprintf(stderr, "%ls", dump.c_str()); } return STATUS_BUILTIN_OK; } diff --git a/fish_tests.cpp b/fish_tests.cpp index b4d37f8b8..5e488aae6 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -741,10 +741,17 @@ static void test_indents() {L"", 2}, {NULL, -1} }; + + const indent_component_t components11[] = + { + {L"switch foo", 0}, + {L"cas", 1}, //parse error indentation handling + {NULL, -1} + }; - const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10}; + const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10, components11}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const indent_component_t *components = tests[which]; diff --git a/parse_util.cpp b/parse_util.cpp index 842c6f75b..cb33915e3 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -811,12 +811,18 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote) trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false ' then we have an if node with an empty job list (without source) but we want the last line to be indented anyways. switch statements also indent. + + max_visited_node_idx is the largest index we visited. */ -static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, int node_indent, parse_token_type_t parent_type, std::vector *indents, int *trailing_indent) +static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, int node_indent, parse_token_type_t parent_type, std::vector *indents, int *trailing_indent, node_offset_t *max_visited_node_idx) { /* Guard against incomplete trees */ if (node_idx > tree.size()) return; + + /* Update max_visited_node_idx */ + if (node_idx > *max_visited_node_idx) + *max_visited_node_idx = node_idx; /* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */ @@ -866,7 +872,7 @@ static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset for (node_offset_t idx = 0; idx < node.child_count; idx++) { /* Note we pass our type to our child, which becomes its parent node type */ - compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, trailing_indent); + compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, trailing_indent, max_visited_node_idx); } } @@ -876,14 +882,29 @@ std::vector parse_util_compute_indents(const wcstring &src) const size_t src_size = src.size(); std::vector indents(src_size, -1); + /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */ parse_node_tree_t tree; parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); - /* The indent that we'll get for the last line */ - int trailing_indent = 0; + /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */ + node_offset_t start_node_idx = 0; + int last_trailing_indent = 0; - /* Invoke the recursive version. As a hack, pass job_list for the 'parent' token, which will prevent the really-root job list from indenting */ - compute_indents_recursive(tree, 0 /* node index */, 0/* current indent */, symbol_job_list, &indents, &trailing_indent); + while (start_node_idx < tree.size()) + { + /* The indent that we'll get for the last line */ + int trailing_indent = 0; + + /* Biggest offset we visited */ + node_offset_t max_visited_node_idx = 0; + + /* Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which will prevent the really-root job list from indenting */ + compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list, &indents, &trailing_indent, &max_visited_node_idx); + + /* We may have more to indent. The trailing indent becomes our current indent. Start at the node after the last we visited. */ + last_trailing_indent = trailing_indent; + start_node_idx = max_visited_node_idx + 1; + } int last_indent = 0; for (size_t i=0; i parse_util_compute_indents(const wcstring &src) { if (!wcschr(L" \n\t\r", src.at(suffix_idx))) break; - indents.at(suffix_idx) = trailing_indent; + indents.at(suffix_idx) = last_trailing_indent; } return indents; From 925fe65dd8d51f481217ef1c11647a39ddf56351 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 8 Dec 2013 16:22:06 -0800 Subject: [PATCH 051/108] Remove the indentation part of parser_t::test(). Rename it to detect_errors(). --- builtin_complete.cpp | 4 +- fish_tests.cpp | 24 +++++----- parser.cpp | 103 ++----------------------------------------- parser.h | 2 +- reader.cpp | 15 ++----- 5 files changed, 23 insertions(+), 125 deletions(-) diff --git a/builtin_complete.cpp b/builtin_complete.cpp index 4bfab1b7c..14b3a4b74 100644 --- a/builtin_complete.cpp +++ b/builtin_complete.cpp @@ -497,14 +497,14 @@ static int builtin_complete(parser_t &parser, wchar_t **argv) { if (condition && wcslen(condition)) { - if (parser.test(condition)) + if (parser.detect_errors(condition)) { append_format(stderr_buffer, L"%ls: Condition '%ls' contained a syntax error\n", argv[0], condition); - parser.test(condition, NULL, &stderr_buffer, argv[0]); + parser.detect_errors(condition, &stderr_buffer, argv[0]); res = true; } diff --git a/fish_tests.cpp b/fish_tests.cpp index 5e488aae6..16f657fc3 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -585,52 +585,52 @@ static void test_parser() parser_t parser(PARSER_TYPE_GENERAL, true); say(L"Testing null input to parser"); - if (!parser.test(NULL)) + if (!parser.detect_errors(NULL)) { - err(L"Null input to parser.test undetected"); + err(L"Null input to parser.detect_errors undetected"); } say(L"Testing block nesting"); - if (!parser.test(L"if; end")) + if (!parser.detect_errors(L"if; end")) { err(L"Incomplete if statement undetected"); } - if (!parser.test(L"if test; echo")) + if (!parser.detect_errors(L"if test; echo")) { err(L"Missing end undetected"); } - if (!parser.test(L"if test; end; end")) + if (!parser.detect_errors(L"if test; end; end")) { err(L"Unbalanced end undetected"); } say(L"Testing detection of invalid use of builtin commands"); - if (!parser.test(L"case foo")) + if (!parser.detect_errors(L"case foo")) { err(L"'case' command outside of block context undetected"); } - if (!parser.test(L"switch ggg; if true; case foo;end;end")) + if (!parser.detect_errors(L"switch ggg; if true; case foo;end;end")) { err(L"'case' command outside of switch block context undetected"); } - if (!parser.test(L"else")) + if (!parser.detect_errors(L"else")) { err(L"'else' command outside of conditional block context undetected"); } - if (!parser.test(L"else if")) + if (!parser.detect_errors(L"else if")) { err(L"'else if' command outside of conditional block context undetected"); } - if (!parser.test(L"if false; else if; end")) + if (!parser.detect_errors(L"if false; else if; end")) { err(L"'else if' missing command undetected"); } - if (!parser.test(L"break")) + if (!parser.detect_errors(L"break")) { err(L"'break' command outside of loop block context undetected"); } - if (!parser.test(L"exec ls|less") || !parser.test(L"echo|return")) + if (!parser.detect_errors(L"exec ls|less") || !parser.detect_errors(L"echo|return")) { err(L"Invalid pipe command undetected"); } diff --git a/parser.cpp b/parser.cpp index f9282a242..02a4dffc9 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2771,7 +2771,7 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha // debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff ); - err |= parser_t::test(subst, 0, out, prefix); + err |= parser_t::detect_errors(subst, out, prefix); free(subst); free(arg_cpy); @@ -2906,12 +2906,9 @@ struct block_info_t { int position; //tokenizer position block_type_t type; //type of the block - int indentation; //indentation associated with the block - - bool has_had_case; //if we are a switch, whether we've encountered a case }; -parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix) +parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix) { ASSERT_IS_MAIN_THREAD(); @@ -2923,9 +2920,8 @@ parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, w int err=0; int unfinished = 0; - // These are very nearly stacks, but sometimes we have to inspect non-top elements (e.g. return) + // This is very nearly a stack, but sometimes we have to inspect non-top elements (e.g. return) std::vector block_infos; - int indentation_sum = 0; //sum of indentation in block_infos /* Set to 1 if the current command is inside a pipeline @@ -2958,16 +2954,6 @@ parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, w CHECK(buff, 1); - if (block_level) - { - size_t len = wcslen(buff); - for (size_t i=0; i tokenizer_push(¤t_tokenizer, &tok); @@ -3059,53 +3045,18 @@ parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, w } else { - indentation_sum -= block_infos.back().indentation; block_infos.pop_back(); } } - /* - Store the block level. This needs to be done - _after_ checking for end commands, but _before_ - checking for block opening commands. - */ - if (block_level != NULL) - { - int indentation_adjust = 0; - if (command == L"else") - { - // if or else if goes back - indentation_adjust = -1; - } - else if (command == L"case") - { - if (! block_infos.empty() && block_infos.back().type == SWITCH) - { - // mark that we've encountered a case, and increase the indentation - // by doing this now, we avoid overly indenting the first case as the user types it - if (! block_infos.back().has_had_case) - { - block_infos.back().has_had_case = true; - block_infos.back().indentation += 1; - indentation_sum += 1; - } - // unindent this case - indentation_adjust = -1; - } - } - - block_level[tok_get_pos(&tok)] = indentation_sum + indentation_adjust; - } - /* Handle block commands */ if (parser_keywords_is_block(command)) { - struct block_info_t info = {current_tokenizer_pos, parser_get_block_type(command), 1 /* indent */}; + struct block_info_t info = {current_tokenizer_pos, parser_get_block_type(command)}; block_infos.push_back(info); - indentation_sum += info.indentation; tok_next(&tok); tok_set_pos(&tok, mark); } @@ -3651,52 +3602,6 @@ parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, w } - /* - Fill in the unset block_level entries. Until now, only places - where the block level _changed_ have been filled out. This fills - in the rest. - */ - - if (block_level) - { - int last_level = 0; - size_t i, len = wcslen(buff); - for (i=0; i= 0) - { - last_level = block_level[i]; - /* - Make all whitespace before a token have the new - level. This avoid using the wrong indentation level - if a new line starts with whitespace. - */ - size_t prev_char_idx = i; - while (prev_char_idx--) - { - if (!wcschr(L" \n\t\r", buff[prev_char_idx])) - break; - block_level[prev_char_idx] = last_level; - } - } - block_level[i] = last_level; - } - - /* - Make all trailing whitespace have the block level that the - validator had at exit. This makes sure a new line is - correctly indented even if it is empty. - */ - int last_indent = block_infos.empty() ? 0 : block_infos.back().indentation; - size_t suffix_idx = len; - while (suffix_idx--) - { - if (!wcschr(L" \n\t\r", buff[suffix_idx])) - break; - block_level[suffix_idx] = last_indent; - } - } - /* Calculate exit status */ diff --git a/parser.h b/parser.h index 076baceff..39973665d 100644 --- a/parser.h +++ b/parser.h @@ -487,7 +487,7 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - parser_test_error_bits_t test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); /** Test if the specified string can be parsed as an argument list, diff --git a/reader.cpp b/reader.cpp index ac0e52f51..df7f070e5 100644 --- a/reader.cpp +++ b/reader.cpp @@ -519,14 +519,7 @@ wcstring combine_command_and_autosuggestion(const wcstring &cmdline, const wcstr static void reader_repaint() { // Update the indentation - if (0) - { - parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]); - } - else - { - data->indents = parse_util_compute_indents(data->command_line); - } + data->indents = parse_util_compute_indents(data->command_line); // Combine the command and autosuggestion into one string wcstring full_line = combine_command_and_autosuggestion(data->command_line, data->autosuggestion); @@ -2479,7 +2472,7 @@ void reader_run_command(parser_t &parser, const wcstring &cmd) int reader_shell_test(const wchar_t *b) { - int res = parser_t::principal_parser().test(b); + int res = parser_t::principal_parser().detect_errors(b); if (res & PARSER_TEST_ERROR) { @@ -2499,7 +2492,7 @@ int reader_shell_test(const wchar_t *b) 0); - parser_t::principal_parser().test(b, NULL, &sb, L"fish"); + parser_t::principal_parser().detect_errors(b, &sb, L"fish"); fwprintf(stderr, L"%ls", sb.c_str()); } return res; @@ -3911,7 +3904,7 @@ static int read_ni(int fd, const io_chain_t &io) } wcstring sb; - if (! parser.test(str.c_str(), 0, &sb, L"fish")) + if (! parser.detect_errors(str.c_str(), &sb, L"fish")) { parser.eval(str, io, TOP); } From 5769fa6aed981e02d9e0f9f7c83f77e677f8c84f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 8 Dec 2013 18:16:55 -0800 Subject: [PATCH 052/108] Fix for off-by-one error in tokenizer error message reporting --- tokenizer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tokenizer.cpp b/tokenizer.cpp index 7d911d300..113f8926e 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -64,7 +64,6 @@ static const wchar_t *tok_desc[] = { N_(L"Tokenizer not yet initialized"), N_(L"Tokenizer error"), - N_(L"Invalid token"), N_(L"String"), N_(L"Pipe"), N_(L"End of command"), @@ -77,6 +76,8 @@ static const wchar_t *tok_desc[] = N_(L"Comment") }; + + /** Set the latest tokens string to be the specified error message */ @@ -559,7 +560,7 @@ static bool my_iswspace(wchar_t c) const wchar_t *tok_get_desc(int type) { - if (type < 0 || (size_t)type >= sizeof(tok_desc)) + if (type < 0 || (size_t)type >= (sizeof tok_desc / sizeof *tok_desc)) { return _(L"Invalid token type"); } From 7a3f5afee7b6a6ab9f801ca3cd65c2c552554987 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 8 Dec 2013 21:54:06 -0800 Subject: [PATCH 053/108] Initial work towars improved error reporting. Tests currently fail. --- fish.xcodeproj/project.pbxproj | 2 + fish_tests.cpp | 55 +++++- parse_constants.h | 320 +++++++++++++++++++++++++++++++++ parse_productions.cpp | 2 +- parse_tree.cpp | 126 ++++++++++--- parse_tree.h | 96 +--------- parser.cpp | 16 +- tokenizer.cpp | 8 - wutil.cpp | 2 +- 9 files changed, 483 insertions(+), 144 deletions(-) create mode 100644 parse_constants.h diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index 2417de453..aa7f9c18a 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -513,6 +513,7 @@ D0D02AE415986537008E62BD /* fish_pager */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish_pager; sourceTree = BUILT_PRODUCTS_DIR; }; D0D02AFA159871B2008E62BD /* osx_fish_launcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = osx_fish_launcher.m; path = osx/osx_fish_launcher.m; sourceTree = ""; }; D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; }; + D0D9B2B318555D92001AE279 /* parse_constants.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parse_constants.h; sourceTree = ""; }; D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = ""; }; D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = ""; }; D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_productions.h; sourceTree = ""; }; @@ -659,6 +660,7 @@ D0A0853D13B3ACEE0099B651 /* expand.cpp */, D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */, D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */, + D0D9B2B318555D92001AE279 /* parse_constants.h */, D0C52F361765284C00BFAB82 /* parse_tree.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, diff --git a/fish_tests.cpp b/fish_tests.cpp index 16f657fc3..c43381b3f 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2333,7 +2333,7 @@ static void test_new_parser_ll2(void) } } -static void test_new_parser_ad_hoc(void) +static void test_new_parser_ad_hoc() { /* Very ad-hoc tests for issues encountered */ say(L"Testing new parser ad hoc tests"); @@ -2356,6 +2356,58 @@ static void test_new_parser_ad_hoc(void) } } +static void test_new_parser_errors(void) +{ + say(L"Testing new parser error reporting"); + const struct + { + const wchar_t *src; + parse_error_code_t code; + } + tests[] = + { + {L"echo (abc", parse_error_tokenizer}, + + {L"end", parse_error_unbalancing_end}, + {L"echo hi ; end", parse_error_unbalancing_end}, + + {L"else", parse_error_unbalancing_else}, + {L"if true ; end ; else", parse_error_unbalancing_else}, + + {L"case", parse_error_unbalancing_case}, + {L"if true ; case ; end", parse_error_unbalancing_case} + }; + + for (size_t i = 0; i < sizeof tests / sizeof *tests; i++) + { + const wcstring src = tests[i].src; + parse_error_code_t expected_code = tests[i].code; + + parse_error_list_t errors; + parse_node_tree_t parse_tree; + bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors); + if (success) + { + err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); + } + + if (errors.size() != 1) + { + err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors", src.c_str(), errors.size()); + } + else if (errors.at(0).code != expected_code) + { + err(L"Source '%ls' was expected to produce error code %lu, but instead produced error code %lu", src.c_str(), expected_code, (unsigned long)errors.at(0).code); + for (size_t i=0; i < errors.size(); i++) + { + err(L"\t\t%ls", errors.at(i).describe(src).c_str()); + } + } + + } + +} + static void test_highlighting(void) { say(L"Testing syntax highlighting"); @@ -2574,6 +2626,7 @@ int main(int argc, char **argv) if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); if (should_test_function("new_parser_ad_hoc")) test_new_parser_ad_hoc(); + if (should_test_function("new_parser_errors")) test_new_parser_errors(); if (should_test_function("escape")) test_unescape_sane(); if (should_test_function("escape")) test_escape_crazy(); if (should_test_function("format")) test_format(); diff --git a/parse_constants.h b/parse_constants.h new file mode 100644 index 000000000..d9f362120 --- /dev/null +++ b/parse_constants.h @@ -0,0 +1,320 @@ +/**\file parse_constants.h + + Constants used in the programmatic representation of fish code. +*/ + +#ifndef fish_parse_constants_h +#define fish_parse_constants_h + +#define PARSE_ASSERT(a) assert(a) +#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0) + + +enum parse_token_type_t +{ + token_type_invalid, + + // Non-terminal tokens + symbol_job_list, + symbol_job, + symbol_job_continuation, + symbol_statement, + symbol_block_statement, + symbol_block_header, + symbol_for_header, + symbol_while_header, + symbol_begin_header, + symbol_function_header, + + symbol_if_statement, + symbol_if_clause, + symbol_else_clause, + symbol_else_continuation, + + symbol_switch_statement, + symbol_case_item_list, + symbol_case_item, + + symbol_boolean_statement, + symbol_decorated_statement, + symbol_plain_statement, + symbol_arguments_or_redirections_list, + symbol_argument_or_redirection, + + symbol_argument_list, + + symbol_argument, + symbol_redirection, + + symbol_optional_background, + + // Terminal types + parse_token_type_string, + parse_token_type_pipe, + parse_token_type_redirection, + parse_token_type_background, + parse_token_type_end, + parse_token_type_terminate, + + // Very special terminal types that don't appear in the production list + parse_special_type_parse_error, + parse_special_type_tokenizer_error, + parse_special_type_comment, + + FIRST_TERMINAL_TYPE = parse_token_type_string, + LAST_TERMINAL_TYPE = parse_token_type_terminate, + + LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, + FIRST_PARSE_TOKEN_TYPE = parse_token_type_string +}; + +enum parse_keyword_t +{ + parse_keyword_none, + parse_keyword_if, + parse_keyword_else, + parse_keyword_for, + parse_keyword_in, + parse_keyword_while, + parse_keyword_begin, + parse_keyword_function, + parse_keyword_switch, + parse_keyword_case, + parse_keyword_end, + parse_keyword_and, + parse_keyword_or, + parse_keyword_not, + parse_keyword_command, + parse_keyword_builtin, + + LAST_KEYWORD = parse_keyword_builtin +}; + +/* Statement decorations. This matches the order of productions in decorated_statement */ +enum parse_statement_decoration_t +{ + parse_statement_decoration_none, + parse_statement_decoration_command, + parse_statement_decoration_builtin +}; + +/* Parse error code list */ +enum parse_error_code_t +{ + parse_error_none, + parse_error_generic, //unknown type + + parse_error_tokenizer, //tokenizer error + + parse_error_unbalancing_end, //end outside of block + parse_error_unbalancing_else, //else outside of if + parse_error_unbalancing_case, //case outside of switch +}; + + +/** + Error message for tokenizer error. The tokenizer message is + appended to this message. +*/ +#define TOK_ERR_MSG _( L"Tokenizer error: '%ls'") + +/** + Error message for short circuit command error. +*/ +#define COND_ERR_MSG _( L"An additional command is required" ) + +/** + Error message on a function that calls itself immediately +*/ +#define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") + +/** + Error message on reaching maximum recursion depth +*/ +#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?") + +/** + Error message used when the end of a block can't be located +*/ +#define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.") + +/** + Error message when a non-string token is found when expecting a command name +*/ +#define CMD_ERR_MSG _( L"Expected a command name, got token of type '%ls'") + +/** + Error message when a non-string token is found when expecting a command name +*/ +#define CMD_OR_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.") + +/** + Error message when a non-string token is found when expecting a command name +*/ +#define CMD_AND_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.") + +/** + Error message when encountering an illegal command name +*/ +#define ILLEGAL_CMD_ERR_MSG _( L"Illegal command name '%ls'") + +/** + Error message when encountering an illegal file descriptor +*/ +#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor '%ls'") + +/** + Error message for wildcards with no matches +*/ +#define WILDCARD_ERR_MSG _( L"No matches for wildcard '%ls'.") + +/** + Error when using case builtin outside of switch block +*/ +#define INVALID_CASE_ERR_MSG _( L"'case' builtin not inside of switch block") + +/** + Error when using loop control builtins (break or continue) outside of loop +*/ +#define INVALID_LOOP_ERR_MSG _( L"Loop control command while not inside of loop" ) + +/** + Error when using return builtin outside of function definition +*/ +#define INVALID_RETURN_ERR_MSG _( L"'return' builtin command outside of function definition" ) + +/** + Error when using else builtin outside of if block +*/ +#define INVALID_ELSE_ERR_MSG _( L"'%ls' builtin not inside of if block" ) + +/** + Error when using 'else if' past a naked 'else' +*/ +#define INVALID_ELSEIF_PAST_ELSE_ERR_MSG _( L"'%ls' used past terminating 'else'" ) + +/** + Error when using end builtin outside of block +*/ +#define INVALID_END_ERR_MSG _( L"'end' command outside of block") + +/** + Error message for Posix-style assignment: foo=bar +*/ +#define COMMAND_ASSIGN_ERR_MSG _( L"Unknown command '%ls'. Did you mean 'set %ls %ls'? See the help section on the set command by typing 'help set'.") + +/** + Error for invalid redirection token +*/ +#define REDIRECT_TOKEN_ERR_MSG _( L"Expected redirection specification, got token of type '%ls'") + +/** + Error when encountering redirection without a command +*/ +#define INVALID_REDIRECTION_ERR_MSG _( L"Encountered redirection when expecting a command name. Fish does not allow a redirection operation before a command.") + +/** + Error for evaluating null pointer +*/ +#define EVAL_NULL_ERR_MSG _( L"Tried to evaluate null pointer." ) + +/** + Error for evaluating in illegal scope +*/ +#define INVALID_SCOPE_ERR_MSG _( L"Tried to evaluate commands using invalid block type '%ls'" ) + + +/** + Error for wrong token type +*/ +#define UNEXPECTED_TOKEN_ERR_MSG _( L"Unexpected token of type '%ls'") + +/** + While block description +*/ +#define WHILE_BLOCK N_( L"'while' block" ) + +/** + For block description +*/ +#define FOR_BLOCK N_( L"'for' block" ) + +/** + Breakpoint block +*/ +#define BREAKPOINT_BLOCK N_( L"Block created by breakpoint" ) + + + +/** + If block description +*/ +#define IF_BLOCK N_( L"'if' conditional block" ) + + +/** + Function definition block description +*/ +#define FUNCTION_DEF_BLOCK N_( L"function definition block" ) + + +/** + Function invocation block description +*/ +#define FUNCTION_CALL_BLOCK N_( L"function invocation block" ) + +/** + Function invocation block description +*/ +#define FUNCTION_CALL_NO_SHADOW_BLOCK N_( L"function invocation block with no variable shadowing" ) + + +/** + Switch block description +*/ +#define SWITCH_BLOCK N_( L"'switch' block" ) + + +/** + Fake block description +*/ +#define FAKE_BLOCK N_( L"unexecutable block" ) + + +/** + Top block description +*/ +#define TOP_BLOCK N_( L"global root block" ) + + +/** + Command substitution block description +*/ +#define SUBST_BLOCK N_( L"command substitution block" ) + + +/** + Begin block description +*/ +#define BEGIN_BLOCK N_( L"'begin' unconditional block" ) + + +/** + Source block description +*/ +#define SOURCE_BLOCK N_( L"Block created by the . builtin" ) + +/** + Source block description +*/ +#define EVENT_BLOCK N_( L"event handler block" ) + + +/** + Unknown block description +*/ +#define UNKNOWN_BLOCK N_( L"unknown/invalid block" ) + + + +#endif diff --git a/parse_productions.cpp b/parse_productions.cpp index c3ab9c3ab..9053c5da7 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -46,7 +46,7 @@ RESOLVE(job_list) switch (token1.type) { case parse_token_type_string: - // 'end' is special + // some keywords are special switch (token1.keyword) { case parse_keyword_end: diff --git a/parse_tree.cpp b/parse_tree.cpp index ad0dd0ea9..a249e45ef 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -4,6 +4,11 @@ using namespace parse_productions; +static bool production_is_empty(const production_t *production) +{ + return (*production)[0] == token_type_invalid; +} + /** Returns a string description of this parse error */ wcstring parse_error_t::describe(const wcstring &src) const { @@ -18,7 +23,7 @@ wcstring parse_error_t::describe(const wcstring &src) const //fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); if (newline != wcstring::npos) { - line_start = newline;// + 1; + line_start = newline + 1; } size_t line_end = src.find(L'\n', source_start + source_length); @@ -155,6 +160,8 @@ wcstring keyword_description(parse_keyword_t k) return L"function"; case parse_keyword_switch: return L"switch"; + case parse_keyword_case: + return L"case"; case parse_keyword_end: return L"end"; case parse_keyword_and: @@ -167,9 +174,8 @@ wcstring keyword_description(parse_keyword_t k) return L"command"; case parse_keyword_builtin: return L"builtin"; - default: - return format_string(L"Unknown keyword type %ld", static_cast(k)); } + return format_string(L"Unknown keyword type %ld", static_cast(k)); } /** Returns a string description of the given parse node */ @@ -348,7 +354,8 @@ class parse_ll_t bool top_node_handle_terminal_types(parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token); - void parse_error(parse_token_t token, const wchar_t *format, ...); + void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...); + void parse_error_unbalancing_token(parse_token_t token); void append_error_callout(wcstring &error_message, parse_token_t token); void dump_stack(void) const; @@ -450,6 +457,9 @@ class parse_ll_t /* Input */ void accept_tokens(parse_token_t token1, parse_token_t token2); + /* Report tokenizer errors */ + void report_tokenizer_error(parse_token_t token, const wchar_t *tok_error); + /* Indicate if we hit a fatal error */ bool has_fatal_error(void) const { @@ -558,7 +568,7 @@ void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *e this->symbol_stack.clear(); } -void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) +void parse_ll_t::parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *fmt, ...) { this->fatal_errored = true; if (this->should_generate_error_messages) @@ -569,6 +579,7 @@ void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) va_list va; va_start(va, fmt); err.text = vformat_string(fmt, va); + err.code = code; va_end(va); err.source_start = token.source_start; @@ -577,6 +588,42 @@ void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) } } +// Unbalancing token. This includes 'else' or 'case' or 'end' outside of the appropriate block +// This essentially duplicates some logic from resolving the production for symbol_statement_list - yuck +void parse_ll_t::parse_error_unbalancing_token(parse_token_t token) +{ + this->fatal_errored = true; + if (this->should_generate_error_messages) + { + assert(token.type == parse_token_type_string); + assert(token.keyword == parse_keyword_end || token.keyword == parse_keyword_else || token.keyword == parse_keyword_case); + switch (token.keyword) + { + case parse_keyword_end: + this->parse_error(token, parse_error_unbalancing_end, L"'end' outside of a block"); + break; + + case parse_keyword_else: + this->parse_error(token, parse_error_unbalancing_else, L"'else' builtin not inside of if block"); + break; + + case parse_keyword_case: + this->parse_error(token, parse_error_unbalancing_case, L"'case' builtin not inside of if block"); + break; + + default: + fprintf(stderr, "Unexpected token %ls passed to %s\n", token.describe().c_str(), __FUNCTION__); + PARSER_DIE(); + break; + } + } +} + +void parse_ll_t::report_tokenizer_error(parse_token_t token, const wchar_t *tok_error) +{ + assert(tok_error != NULL); + this->parse_error(token, parse_error_tokenizer, L"%ls", tok_error); +} void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) { @@ -584,11 +631,7 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) if (this->should_generate_error_messages) { wcstring desc = token_type_description(token.type); - parse_error_t error; - error.text = format_string(L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); - error.source_start = token.source_start; - error.source_start = token.source_length; - errors.push_back(error); + this->parse_error(token, parse_error_generic, L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); } } @@ -629,13 +672,6 @@ static bool type_is_terminal_type(parse_token_type_t type) bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) { - if (symbol_stack.empty()) - { - // This can come about with an unbalanced 'end' or 'else', which causes us to terminate the outermost job list. - this->fatal_errored = true; - return false; - } - PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); bool handled = false; @@ -674,7 +710,30 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) else { // Failure - this->fatal_errored = true; + if (stack_top.type == parse_token_type_string && token.type == parse_token_type_string) + { + // Must be different keywords. We should unify this with the 'matched' computation above. + assert(stack_top.keyword != parse_keyword_none && stack_top.keyword != token.keyword); + const wcstring expected = keyword_description(stack_top.keyword); + wcstring actual; + if (token.keyword == parse_keyword_none) + { + // This is a random other string (not a keyword) + this->parse_error(token, parse_error_generic, L"Expected keyword '%ls'", expected.c_str()); + } + else + { + // Got a real keyword we can report + const wcstring actual = (token.keyword == parse_keyword_none ? token.describe() : keyword_description(token.keyword)); + this->parse_error(token, parse_error_generic, L"Expected keyword '%ls', instead got keyword '%ls'", expected.c_str(), actual.c_str()); + } + } + else + { + const wcstring expected = token_type_description(stack_top.type); + const wcstring actual = token_type_description(token.type); + this->parse_error(expected.c_str(), token); + } } // We handled the token, so pop the symbol stack @@ -734,25 +793,29 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) { if (should_generate_error_messages) { - this->parse_error(token1, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token1.describe().c_str()); + this->parse_error(token1, parse_error_generic, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token1.describe().c_str()); } else { - this->parse_error(token1, NULL); + this->parse_error(token1, parse_error_generic, NULL); } // parse_error sets fatal_errored, which ends the loop } else { + // When a job_list encounters something like 'else', it returns an empty production to return control to the outer block. But if it's unbalanced, then we'll end up with an empty stack! So make sure that doesn't happen. This is the primary mechanism by which we detect e.g. unbalanced end. + if (symbol_stack.size() == 1 && production_is_empty(production)) + { + this->parse_error_unbalancing_token(token1); + break; + } + // Manipulate the symbol stack. // Note that stack_elem is invalidated by popping the stack. symbol_stack_pop_push_production(production); - - // If we end up with an empty stack, something bad happened, like an unbalanced end - if (symbol_stack.empty()) - { - this->parse_error(token1, L"All symbols removed from symbol stack. Likely unbalanced else or end?"); - } + + // Expect to not have an empty stack + assert(! symbol_stack.empty()); } } } @@ -842,13 +905,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags this->parser->set_should_generate_error_messages(errors != NULL); /* Construct the tokenizer */ - tok_flags_t tok_options = TOK_SQUASH_ERRORS; + tok_flags_t tok_options = 0; if (parse_flags & parse_flag_include_comments) tok_options |= TOK_SHOW_COMMENTS; if (parse_flags & parse_flag_accept_incomplete_tokens) tok_options |= TOK_ACCEPT_UNFINISHED; + if (errors == NULL) + tok_options |= TOK_SQUASH_ERRORS; + tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); /* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */ @@ -864,6 +930,12 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */ this->parser->accept_tokens(queue[0], queue[1]); + /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */ + if (queue[1].type == parse_special_type_tokenizer_error) + { + this->parser->report_tokenizer_error(queue[1], tok_last(&tok)); + } + /* Handle errors */ if (this->parser->has_fatal_error()) { diff --git a/parse_tree.h b/parse_tree.h index d5b48331f..6c8f20f73 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -12,12 +12,10 @@ #include "util.h" #include "common.h" #include "tokenizer.h" +#include "parse_constants.h" #include #include -#define PARSE_ASSERT(a) assert(a) -#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0) - class parse_node_t; class parse_node_tree_t; typedef size_t node_offset_t; @@ -27,6 +25,9 @@ struct parse_error_t { /** Text of the error */ wcstring text; + + /** Code for the error */ + enum parse_error_code_t code; /** Offset and length of the token in the source code that triggered this error */ size_t source_start; @@ -37,87 +38,6 @@ struct parse_error_t }; typedef std::vector parse_error_list_t; -enum parse_token_type_t -{ - token_type_invalid, - - // Non-terminal tokens - symbol_job_list, - symbol_job, - symbol_job_continuation, - symbol_statement, - symbol_block_statement, - symbol_block_header, - symbol_for_header, - symbol_while_header, - symbol_begin_header, - symbol_function_header, - - symbol_if_statement, - symbol_if_clause, - symbol_else_clause, - symbol_else_continuation, - - symbol_switch_statement, - symbol_case_item_list, - symbol_case_item, - - symbol_boolean_statement, - symbol_decorated_statement, - symbol_plain_statement, - symbol_arguments_or_redirections_list, - symbol_argument_or_redirection, - - symbol_argument_list, - - symbol_argument, - symbol_redirection, - - symbol_optional_background, - - // Terminal types - parse_token_type_string, - parse_token_type_pipe, - parse_token_type_redirection, - parse_token_type_background, - parse_token_type_end, - parse_token_type_terminate, - - // Very special terminal types that don't appear in the production list - parse_special_type_parse_error, - parse_special_type_tokenizer_error, - parse_special_type_comment, - - FIRST_TERMINAL_TYPE = parse_token_type_string, - LAST_TERMINAL_TYPE = parse_token_type_terminate, - - LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, - FIRST_PARSE_TOKEN_TYPE = parse_token_type_string -}; - -enum parse_keyword_t -{ - parse_keyword_none, - parse_keyword_if, - parse_keyword_else, - parse_keyword_for, - parse_keyword_in, - parse_keyword_while, - parse_keyword_begin, - parse_keyword_function, - parse_keyword_switch, - parse_keyword_case, - parse_keyword_end, - parse_keyword_and, - parse_keyword_or, - parse_keyword_not, - parse_keyword_command, - parse_keyword_builtin, - - LAST_KEYWORD = parse_keyword_builtin -}; - - /** A struct representing the token type that we use internally */ struct parse_token_t { @@ -233,14 +153,6 @@ public: } }; -/* Statement decorations. This matches the order of productions in decorated_statement */ -enum parse_statement_decoration_t -{ - parse_statement_decoration_none, - parse_statement_decoration_command, - parse_statement_decoration_builtin -}; - /* The parse tree itself */ class parse_node_tree_t : public std::vector diff --git a/parser.cpp b/parser.cpp index 02a4dffc9..221b93dac 100644 --- a/parser.cpp +++ b/parser.cpp @@ -86,11 +86,6 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.") -/** - Error message on reaching maximum number of block calls -*/ -#define BLOCK_ERR_MSG _( L"Maximum number of nested blocks reached.") - /** Error message when a non-string token is found when expecting a command name */ @@ -2572,9 +2567,9 @@ void parser_t::eval_job(tokenizer_t *tok) } -int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type) +int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) { - const wchar_t * const cmd = cmdStr.c_str(); + const wchar_t * const cmd = cmd_str.c_str(); size_t forbid_count; int code; block_t *start_current_block = current_block; @@ -2597,13 +2592,6 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type debug(4, L"eval: %ls", cmd); - if (!cmd) - { - debug(1, - EVAL_NULL_ERR_MSG); - bugreport(); - return 1; - } if ((block_type != TOP) && (block_type != SUBST)) diff --git a/tokenizer.cpp b/tokenizer.cpp index 113f8926e..0705e620a 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -96,16 +96,8 @@ int tok_get_error(tokenizer_t *tok) tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0) { - - /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */ - if (!(flags & TOK_SQUASH_ERRORS)) - { - ASSERT_IS_MAIN_THREAD(); - } - CHECK(b,); - this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED); this->show_comments = !!(flags & TOK_SHOW_COMMENTS); this->squash_errors = !!(flags & TOK_SQUASH_ERRORS); diff --git a/wutil.cpp b/wutil.cpp index 3f70368e8..ffb4f2b22 100644 --- a/wutil.cpp +++ b/wutil.cpp @@ -476,7 +476,7 @@ const wchar_t *wgettext(const wchar_t *in) { cstring mbs_in = wcs2string(key); char *out = fish_gettext(mbs_in.c_str()); - val = new wcstring(format_string(L"%s", out)); + val = new wcstring(format_string(L"%s", out)); //note that this writes into the map! } errno = err; return val->c_str(); From 383b6aabf5f180305823e485fc25c2712d26bf00 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 8 Dec 2013 22:29:02 -0800 Subject: [PATCH 054/108] Improve error reporting in new parser. Tests now pass. --- parse_tree.cpp | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index a249e45ef..3521dedfe 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -608,7 +608,7 @@ void parse_ll_t::parse_error_unbalancing_token(parse_token_t token) break; case parse_keyword_case: - this->parse_error(token, parse_error_unbalancing_case, L"'case' builtin not inside of if block"); + this->parse_error(token, parse_error_unbalancing_case, L"'case' builtin not inside of switch block"); break; default: @@ -712,20 +712,36 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) // Failure if (stack_top.type == parse_token_type_string && token.type == parse_token_type_string) { - // Must be different keywords. We should unify this with the 'matched' computation above. + // Keyword failure. We should unify this with the 'matched' computation above. assert(stack_top.keyword != parse_keyword_none && stack_top.keyword != token.keyword); - const wcstring expected = keyword_description(stack_top.keyword); - wcstring actual; - if (token.keyword == parse_keyword_none) + + // Check to see which keyword we got which was considered wrong + switch (token.keyword) { - // This is a random other string (not a keyword) - this->parse_error(token, parse_error_generic, L"Expected keyword '%ls'", expected.c_str()); - } - else - { - // Got a real keyword we can report - const wcstring actual = (token.keyword == parse_keyword_none ? token.describe() : keyword_description(token.keyword)); - this->parse_error(token, parse_error_generic, L"Expected keyword '%ls', instead got keyword '%ls'", expected.c_str(), actual.c_str()); + // Some keywords are only valid in certain contexts. If this cascaded all the way down through the outermost job_list, it was not in a valid context. + case parse_keyword_case: + case parse_keyword_end: + case parse_keyword_else: + this->parse_error_unbalancing_token(token); + break; + + case parse_keyword_none: + { + // This is a random other string (not a keyword) + const wcstring expected = keyword_description(stack_top.keyword); + this->parse_error(token, parse_error_generic, L"Expected keyword '%ls'", expected.c_str()); + break; + } + + + default: + { + // Got a real keyword we can report + const wcstring actual = (token.keyword == parse_keyword_none ? token.describe() : keyword_description(token.keyword)); + const wcstring expected = keyword_description(stack_top.keyword); + this->parse_error(token, parse_error_generic, L"Expected keyword '%ls', instead got keyword '%ls'", expected.c_str(), actual.c_str()); + break; + } } } else From d5d9b9284ad990c69d1fa17294d00a81d4cfc9b6 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 11 Dec 2013 18:34:28 -0800 Subject: [PATCH 055/108] Initial work towards rewriting detect_errors to use new parser. Low-level tests currently pass; high level tests fail. --- highlight.cpp | 3 +- parse_constants.h | 2 + parse_productions.cpp | 15 ++- parse_tree.cpp | 84 +++++++++++++++-- parse_tree.h | 26 ++++-- parser.cpp | 209 +++++++++++++++++++++++++++++++++++++++++- parser.h | 5 +- 7 files changed, 316 insertions(+), 28 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 23fe912b0..3c60150cd 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -2089,8 +2089,9 @@ const highlighter_t::color_array_t & highlighter_t::highlight() case symbol_decorated_statement: case symbol_if_statement: { - // Color the 'end' this->color_children(node, parse_token_type_string, HIGHLIGHT_COMMAND); + // Color the 'end' + this->color_children(node, symbol_end_command, HIGHLIGHT_COMMAND); } break; diff --git a/parse_constants.h b/parse_constants.h index d9f362120..706c31b84 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -47,6 +47,8 @@ enum parse_token_type_t symbol_redirection, symbol_optional_background, + + symbol_end_command, // Terminal types parse_token_type_string, diff --git a/parse_productions.cpp b/parse_productions.cpp index 9053c5da7..f79e945ae 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -187,7 +187,7 @@ RESOLVE(statement) PRODUCTIONS(if_statement) = { - {symbol_if_clause, symbol_else_clause, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_if_clause, symbol_else_clause, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(if_statement) @@ -231,7 +231,7 @@ RESOLVE(else_continuation) PRODUCTIONS(switch_statement) = { - { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, KEYWORD(parse_keyword_end)} + { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command} }; RESOLVE_ONLY(switch_statement) @@ -272,7 +272,7 @@ RESOLVE(argument_list) PRODUCTIONS(block_statement) = { - {symbol_block_header, parse_token_type_end, symbol_job_list, KEYWORD(parse_keyword_end), symbol_arguments_or_redirections_list} + {symbol_block_header, parse_token_type_end, symbol_job_list, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(block_statement) @@ -287,8 +287,6 @@ RESOLVE(block_header) { switch (token1.keyword) { - case parse_keyword_else: - return NO_PRODUCTION; case parse_keyword_for: return 0; case parse_keyword_while: @@ -443,6 +441,12 @@ RESOLVE(optional_background) } } +PRODUCTIONS(end_command) = +{ + {KEYWORD(parse_keyword_end)} +}; +RESOLVE_ONLY(end_command) + #define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break; const production_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, const parse_token_t &input2, production_option_idx_t *out_which_production, wcstring *out_error_text) { @@ -483,6 +487,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n TEST(argument) TEST(redirection) TEST(optional_background) + TEST(end_command) case parse_token_type_string: case parse_token_type_pipe: diff --git a/parse_tree.cpp b/parse_tree.cpp index 3521dedfe..b8cb348c0 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -110,6 +110,10 @@ wcstring token_type_description(parse_token_type_t type) return L"symbol_argument"; case symbol_redirection: return L"symbol_redirection"; + case symbol_optional_background: + return L"optional_background"; + case symbol_end_command: + return L"symbol_end_command"; case parse_token_type_string: @@ -124,8 +128,7 @@ wcstring token_type_description(parse_token_type_t type) return L"token_end"; case parse_token_type_terminate: return L"token_terminate"; - case symbol_optional_background: - return L"optional_background"; + case parse_special_type_parse_error: return L"parse_error"; @@ -1057,21 +1060,37 @@ const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, pars return result; } -static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result) +const parse_node_t *parse_node_tree_t::get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const { - if (parent.type == type) result->push_back(&parent); - for (size_t i=0; i < parent.child_count; i++) + const parse_node_t *ancestor = &node; + while ((ancestor = this->get_parent(*ancestor))) { - const parse_node_t *child = tree.get_child(parent, i); - assert(child != NULL); - find_nodes_recursive(tree, *child, type, result); + if (ancestor->type == desired_type) + { + break; + } + } + return ancestor; +} + +static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result, size_t max_count) +{ + if (result->size() < max_count) + { + if (parent.type == type) result->push_back(&parent); + for (size_t i=0; i < parent.child_count; i++) + { + const parse_node_t *child = tree.get_child(parent, i); + assert(child != NULL); + find_nodes_recursive(tree, *child, type, result, max_count); + } } } -parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const +parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count) const { parse_node_list_t result; - find_nodes_recursive(*this, parent, type, &result); + find_nodes_recursive(*this, parent, type, &result, max_count); return result; } @@ -1188,6 +1207,37 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co return result; } +bool parse_node_tree_t::plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const +{ + // Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. This checks if we are in the second or greater element in a pipeline; if we are the first element we treat this as false + bool result = false; + const parse_node_t *ancestor = &node; + + if (ancestor) + ancestor = this->get_parent(*ancestor, symbol_decorated_statement); + if (ancestor) + ancestor = this->get_parent(*ancestor, symbol_statement); + if (ancestor) + ancestor = this->get_parent(*ancestor); + + if (ancestor) + { + if (ancestor->type == symbol_job_continuation) + { + // Second or more in a pipeline + result = true; + } + else if (ancestor->type == symbol_job && include_first) + { + // Check to see if we have a job continuation that's not empty + const parse_node_t *continuation = this->get_child(*ancestor, 1, symbol_job_continuation); + result = (continuation != NULL && continuation->child_count > 0); + } + } + + return result; +} + enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const { assert(redirection_node.type == symbol_redirection); @@ -1205,3 +1255,17 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi } return result; } + +const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) +{ + const parse_node_t *result = NULL; + if (node.type == symbol_block_statement) + { + const parse_node_t *block_header = this->get_child(node, 0, symbol_block_header); + if (block_header != NULL) + { + result = this->get_child(*block_header, 0); + } + } + return result; +} diff --git a/parse_tree.h b/parse_tree.h index 6c8f20f73..e65d1bafd 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -159,15 +159,19 @@ class parse_node_tree_t : public std::vector { public: - /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ - const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. + */ + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; + + /* Returns the first ancestor of the given type, or NULL. */ + const parse_node_t *get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const; - /* Find all the nodes of a given type underneath a given node */ + /* Find all the nodes of a given type underneath a given node, up to max_count of them */ typedef std::vector parse_node_list_t; - parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; + parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const; /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; @@ -186,8 +190,14 @@ public: /* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */ bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; + /* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */ + bool plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; + /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const; + + /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ + const parse_node_t *header_node_for_block_statement(const parse_node_t &node); }; /* Fish grammar: @@ -210,19 +220,19 @@ public: # A block is a conditional, loop, or begin/end - if_statement = if_clause else_clause arguments_or_redirections_list + if_statement = if_clause else_clause end_command arguments_or_redirections_list if_clause = job STATEMENT_TERMINATOR job_list else_clause = | else_continuation else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list - switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list end_command case_item_list = | case_item case_item_list case_item = CASE argument_list STATEMENT_TERMINATOR job_list - block_statement = block_header job_list arguments_or_redirections_list + block_statement = block_header job_list end_command arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header for_header = FOR var_name IN arguments_or_redirections_list while_header = WHILE statement @@ -252,6 +262,8 @@ public: terminator = | optional_background = | + + end_command = END */ diff --git a/parser.cpp b/parser.cpp index 221b93dac..6053b4a34 100644 --- a/parser.cpp +++ b/parser.cpp @@ -44,6 +44,7 @@ The fish parser. Contains functions for parsing and evaluating code. #include "path.h" #include "signal.h" #include "complete.h" +#include "parse_tree.h" /** Maximum number of function calls, i.e. recursion depth. @@ -550,14 +551,16 @@ void parser_t::allow_function() forbidden_function.pop_back(); } -void parser_t::error(int ec, int p, const wchar_t *str, ...) +void parser_t::error(int ec, size_t p, const wchar_t *str, ...) { va_list va; CHECK(str,); error_code = ec; - err_pos = p; + + assert(p <= INT_MAX); + err_pos = static_cast(p); va_start(va, str); err_buff = vformat_string(str, va); @@ -1148,7 +1151,7 @@ const wchar_t *parser_t::get_buffer() const } -int parser_t::is_help(const wchar_t *s, int min_match) const +int parser_t::is_help(const wchar_t *s, int min_match) { CHECK(s, 0); @@ -2889,6 +2892,21 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre return err; } +// Check if the first argument under the given node is --help +static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src) +{ + bool is_help = false; + const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1); + if (! arg_nodes.empty()) + { + // Check the first argument only + const parse_node_t &arg = *arg_nodes.at(0); + const wcstring first_arg_src = arg.get_source(src); + is_help = parser_t::is_help(first_arg_src.c_str(), 3); + } + return is_help; +} + // helper type used in parser::test below struct block_info_t { @@ -2897,6 +2915,191 @@ struct block_info_t }; parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix) +{ + ASSERT_IS_MAIN_THREAD(); + + if (! buff) + return PARSER_TEST_ERROR; + + const wcstring buff_src = buff; + parse_node_tree_t node_tree; + parse_error_list_t parse_errors; + + // Whether we encountered a parse error + bool errored = false; + long error_line = -1; + + // Whether we encountered an unclosed block + // We detect this via an 'end_command' block without source + bool has_unclosed_block = false; + + bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); + if (! parsed) + { + // report errors + if (out) + { + for (size_t i=0; i < parse_errors.size(); i++) + { + const parse_error_t &error = parse_errors.at(i); + this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str()); + } + } + errored = true; + error_line = __LINE__; + } + + // Expand all commands + // Verify 'or' and 'and' not used inside pipelines + // Verify pipes via parser_is_pipe_forbidden + // Verify return only within a function + + if (! errored) + { + const size_t node_tree_size = node_tree.size(); + for (size_t i=0; i < node_tree_size; i++) + { + const parse_node_t &node = node_tree.at(i); + if (node.type == symbol_end_command && ! node.has_source()) + { + // an 'end' without source is an unclosed block + has_unclosed_block = true; + } + else if (node.type == symbol_plain_statement) + { + wcstring command; + if (node_tree.command_for_plain_statement(node, buff_src, &command)) + { + // Check that we can expand the command + if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str()); + errored = true; + error_line = __LINE__; + } + + // Check that pipes are sound + bool is_boolean_command = contains(command, L"or", L"and"); + bool is_pipe_forbidden = parser_is_pipe_forbidden(command); + if (! errored && (is_boolean_command || is_pipe_forbidden)) + { + // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all + if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) + { + error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG); + errored = true; + error_line = __LINE__; + } + } + + // Check that we don't return from outside a function + // But we allow it if it's 'return --help' + if (! errored && command == L"return") + { + const parse_node_t *ancestor = &node; + bool found_function = false; + while (ancestor != NULL) + { + const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (possible_function_header != NULL && possible_function_header->type == symbol_function_header) + { + found_function = true; + break; + } + ancestor = node_tree.get_parent(*ancestor); + + } + if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) + { + error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); + errored = true; + error_line = __LINE__; + } + } + + // Check that we don't return from outside a function + if (! errored && (command == L"break" || command == L"continue")) + { + // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function. + // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header. + bool found_loop = false, end_search = false; + const parse_node_t *ancestor = &node; + while (ancestor != NULL && ! end_search) + { + bool end_search = false; + const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (loop_or_function_header != NULL) + { + switch (loop_or_function_header->type) + { + case symbol_while_header: + case symbol_for_header: + // this is a loop header, so we can break or continue + found_loop = true; + end_search = true; + + case symbol_function_header: + // this is a function header, so we cannot break or continue. We stop our search here. + found_loop = false; + end_search = true; + break; + + default: + // most likely begin / end style block, which makes no difference + break; + } + } + ancestor = node_tree.get_parent(*ancestor); + } + + + + const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header); + if (function_node == NULL) + { + // Ok, this looks bad: return not in a function! + // But we allow it if it's 'return --help' + // Get the arguments + bool is_help = false; + const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument); + if (! arg_nodes.empty()) + { + // Check the first argument only + const parse_node_t &arg = *arg_nodes.at(0); + const wcstring first_arg_src = arg.get_source(buff_src); + is_help = parser_t::is_help(first_arg_src.c_str(), 3); + } + + // If it's not help, then it's an invalid return + if (! is_help) + { + error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); + errored = true; + error_line = __LINE__; + } + } + } + } + } + } + } + + parser_test_error_bits_t res = 0; + + if (errored) + res |= PARSER_TEST_ERROR; + + if (has_unclosed_block) + res |= PARSER_TEST_INCOMPLETE; + + error_code=0; + + + return res; + +} + +parser_test_error_bits_t parser_t::detect_errors2(const wchar_t *buff, wcstring *out, const wchar_t *prefix) { ASSERT_IS_MAIN_THREAD(); diff --git a/parser.h b/parser.h index 39973665d..b2fbfe134 100644 --- a/parser.h +++ b/parser.h @@ -420,7 +420,7 @@ public: \param p The character offset at which the error occured \param str The printf-style error message filter */ - void error(int ec, int p, const wchar_t *str, ...); + void error(int ec, size_t p, const wchar_t *str, ...); /** Returns a string describing the current parser pisition in the format 'FILENAME (line LINE_NUMBER): LINE'. @@ -488,6 +488,7 @@ public: \param prefix the prefix string to prepend to each error message written to the \c out buffer */ parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); /** Test if the specified string can be parsed as an argument list, @@ -524,7 +525,7 @@ public: \param s the string to test \param min_match is the minimum number of characters that must match in a long style option, i.e. the longest common prefix between --help and any other option. If less than 3, 3 will be assumed. */ - int is_help(const wchar_t *s, int min_match) const; + static int is_help(const wchar_t *s, int min_match); /** Returns the file currently evaluated by the parser. This can be From e25d49b80b0668b55a58e1445aa047a68a1043d3 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 12 Dec 2013 18:17:30 -0800 Subject: [PATCH 056/108] Make builtin_parse output to stdout --- builtin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin.cpp b/builtin.cpp index 3172dbdc5..fe7b83777 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3992,7 +3992,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true); } const wcstring dump = parse_dump_tree(parse_tree, src); - fprintf(stderr, "%ls", dump.c_str()); + stdout_buffer.append(dump); } return STATUS_BUILTIN_OK; } From 5cf59de6763a0000fdc87f0101ca78bd137dffcc Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 12 Dec 2013 18:18:07 -0800 Subject: [PATCH 057/108] Finish rewriting detect_errors to use new parser. All tests now pass (!) --- builtin_complete.cpp | 14 ++++--- fish_tests.cpp | 24 ++++++++--- parse_constants.h | 8 +++- parse_tree.cpp | 45 ++++++++++++++++++-- parse_tree.h | 3 ++ parser.cpp | 97 +++++++++++++++++--------------------------- parser.h | 3 +- reader.cpp | 23 +++++++---- tests/test7.in | 9 ---- tests/test7.out | 1 - 10 files changed, 133 insertions(+), 94 deletions(-) diff --git a/builtin_complete.cpp b/builtin_complete.cpp index 14b3a4b74..0cc3b7e7d 100644 --- a/builtin_complete.cpp +++ b/builtin_complete.cpp @@ -497,15 +497,19 @@ static int builtin_complete(parser_t &parser, wchar_t **argv) { if (condition && wcslen(condition)) { - if (parser.detect_errors(condition)) + const wcstring condition_string = condition; + parse_error_list_t errors; + if (parser.detect_errors(condition_string, &errors)) { append_format(stderr_buffer, - L"%ls: Condition '%ls' contained a syntax error\n", + L"%ls: Condition '%ls' contained a syntax error", argv[0], condition); - - parser.detect_errors(condition, &stderr_buffer, argv[0]); - + for (size_t i=0; i < errors.size(); i++) + { + append_format(stderr_buffer, L"\n%s: ", argv[0]); + stderr_buffer.append(errors.at(i).describe(condition_string)); + } res = true; } } diff --git a/fish_tests.cpp b/fish_tests.cpp index c43381b3f..0c273643a 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -584,12 +584,6 @@ static void test_parser() parser_t parser(PARSER_TYPE_GENERAL, true); - say(L"Testing null input to parser"); - if (!parser.detect_errors(NULL)) - { - err(L"Null input to parser.detect_errors undetected"); - } - say(L"Testing block nesting"); if (!parser.detect_errors(L"if; end")) { @@ -630,10 +624,28 @@ static void test_parser() { err(L"'break' command outside of loop block context undetected"); } + + if (parser.detect_errors(L"break --help")) + { + err(L"'break --help' incorrectly marked as error"); + } + + if (! parser.detect_errors(L"while false ; function foo ; break ; end ; end ")) + { + err(L"'break' command inside function allowed to break from loop outside it"); + } + + if (!parser.detect_errors(L"exec ls|less") || !parser.detect_errors(L"echo|return")) { err(L"Invalid pipe command undetected"); } + + if (parser.detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end ")) + { + err(L"'break' command inside switch falsely reported as error"); + } + say(L"Testing basic evaluation"); #if 0 diff --git a/parse_constants.h b/parse_constants.h index 706c31b84..7ccc962c2 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -104,7 +104,13 @@ enum parse_statement_decoration_t enum parse_error_code_t { parse_error_none, - parse_error_generic, //unknown type + + /* matching values from enum parser_error */ + parse_error_syntax, + parse_error_eval, + parse_error_cmdsubst, + + parse_error_generic, // unclassified error types parse_error_tokenizer, //tokenizer error diff --git a/parse_tree.cpp b/parse_tree.cpp index b8cb348c0..ad83a0d60 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,6 +1,8 @@ #include "parse_productions.h" #include "tokenizer.h" +#include "fallback.h" #include +#include using namespace parse_productions; @@ -32,21 +34,58 @@ wcstring parse_error_t::describe(const wcstring &src) const line_end = src.size(); } assert(line_end >= line_start); - //fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start); + //fprintf(stderr, "source start: %lu, source_length %lu, line start %lu, line end %lu\n", source_start, source_length, line_start, line_end); assert(source_start >= line_start); // Append the line of text result.push_back(L'\n'); result.append(src, line_start, line_end - line_start); - // Append the caret line + // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions + wcstring caret_space_line; + caret_space_line.reserve(source_start - line_start); + for (size_t i=line_start; i < source_start; i++) + { + wchar_t wc = src.at(i); + if (wc == L'\t') + { + caret_space_line.push_back(L'\t'); + } + else + { + int width = fish_wcwidth(wc); + if (width > 0) + { + caret_space_line.append(static_cast(width), L' '); + } + } + } result.push_back(L'\n'); - result.append(source_start - line_start, L' '); + result.append(caret_space_line); result.push_back(L'^'); } return result; } +wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix) +{ + wcstring target; + for (size_t i=0; i < errors.size(); i++) + { + if (i > 0) + { + target.push_back(L'\n'); + } + if (prefix != NULL) + { + target.append(prefix); + target.append(L": "); + } + target.append(errors.at(i).describe(src)); + } + return target; +} + /** Returns a string description of the given token type */ wcstring token_type_description(parse_token_type_t type) { diff --git a/parse_tree.h b/parse_tree.h index e65d1bafd..8a0b3eedd 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -38,6 +38,9 @@ struct parse_error_t }; typedef std::vector parse_error_list_t; +/* Returns a description of a list of parse errors */ +wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix = NULL); + /** A struct representing the token type that we use internally */ struct parse_token_t { diff --git a/parser.cpp b/parser.cpp index 6053b4a34..136bc74a4 100644 --- a/parser.cpp +++ b/parser.cpp @@ -565,7 +565,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...) va_start(va, str); err_buff = vformat_string(str, va); va_end(va); - } /** @@ -2753,7 +2752,7 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha case 1: { - wchar_t *subst = wcsndup(paran_begin+1, paran_end-paran_begin-1); + const wcstring subst(paran_begin + 1, paran_end); wcstring tmp; tmp.append(arg_cpy, paran_begin - arg_cpy); @@ -2762,17 +2761,16 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha // debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff ); - err |= parser_t::detect_errors(subst, out, prefix); + parse_error_list_t errors; + err |= parser_t::detect_errors(subst, &errors); + if (out && ! errors.empty()) + { + out->append(parse_errors_description(errors, subst, prefix)); + } - free(subst); free(arg_cpy); arg_cpy = wcsdup(tmp.c_str()); - /* - Do _not_ call sb_destroy on this stringbuffer - it's - buffer is used as the new 'arg_cpy'. It is free'd at - the end of the loop. - */ break; } } @@ -2914,39 +2912,43 @@ struct block_info_t block_type_t type; //type of the block }; -parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring *out, const wchar_t *prefix) +/* Append a syntax error to the given error list */ +static bool append_syntax_error(parse_error_list_t *errors, const parse_node_t &node, const wchar_t *fmt, ...) +{ + parse_error_t error; + error.source_start = node.source_start; + error.source_length = node.source_length; + error.code = parse_error_syntax; + + va_list va; + va_start(va, fmt); + error.text = vformat_string(fmt, va); + va_end(va); + + errors->push_back(error); + return true; +} + +parser_test_error_bits_t parser_t::detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, const wchar_t *prefix) { ASSERT_IS_MAIN_THREAD(); - if (! buff) - return PARSER_TEST_ERROR; - - const wcstring buff_src = buff; parse_node_tree_t node_tree; parse_error_list_t parse_errors; // Whether we encountered a parse error bool errored = false; - long error_line = -1; // Whether we encountered an unclosed block // We detect this via an 'end_command' block without source bool has_unclosed_block = false; + // Parse the input string into a parse tree + // Some errors are detected here bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); if (! parsed) { - // report errors - if (out) - { - for (size_t i=0; i < parse_errors.size(); i++) - { - const parse_error_t &error = parse_errors.at(i); - this->error(SYNTAX_ERROR, error.source_start, L"%ls", error.text.c_str()); - } - } errored = true; - error_line = __LINE__; } // Expand all commands @@ -2973,9 +2975,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * // Check that we can expand the command if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) { - error(SYNTAX_ERROR, node.source_start, ILLEGAL_CMD_ERR_MSG, command.c_str()); - errored = true; - error_line = __LINE__; + errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str()); } // Check that pipes are sound @@ -2986,9 +2986,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) { - error(SYNTAX_ERROR, node.source_start, EXEC_ERR_MSG); - errored = true; - error_line = __LINE__; + errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG); } } @@ -3011,9 +3009,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * } if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) { - error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); - errored = true; - error_line = __LINE__; + errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG); } } @@ -3026,7 +3022,6 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * const parse_node_t *ancestor = &node; while (ancestor != NULL && ! end_search) { - bool end_search = false; const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); if (loop_or_function_header != NULL) { @@ -3037,6 +3032,7 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * // this is a loop header, so we can break or continue found_loop = true; end_search = true; + break; case symbol_function_header: // this is a function header, so we cannot break or continue. We stop our search here. @@ -3052,31 +3048,9 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * ancestor = node_tree.get_parent(*ancestor); } - - - const parse_node_t *function_node = node_tree.get_first_ancestor_of_type(node, symbol_function_header); - if (function_node == NULL) + if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) { - // Ok, this looks bad: return not in a function! - // But we allow it if it's 'return --help' - // Get the arguments - bool is_help = false; - const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument); - if (! arg_nodes.empty()) - { - // Check the first argument only - const parse_node_t &arg = *arg_nodes.at(0); - const wcstring first_arg_src = arg.get_source(buff_src); - is_help = parser_t::is_help(first_arg_src.c_str(), 3); - } - - // If it's not help, then it's an invalid return - if (! is_help) - { - error(SYNTAX_ERROR, node.source_start, INVALID_RETURN_ERR_MSG); - errored = true; - error_line = __LINE__; - } + errored = append_syntax_error(&parse_errors, node, INVALID_LOOP_ERR_MSG); } } } @@ -3092,6 +3066,11 @@ parser_test_error_bits_t parser_t::detect_errors(const wchar_t *buff, wcstring * if (has_unclosed_block) res |= PARSER_TEST_INCOMPLETE; + if (out_errors) + { + out_errors->swap(parse_errors); + } + error_code=0; diff --git a/parser.h b/parser.h index b2fbfe134..fb3efad85 100644 --- a/parser.h +++ b/parser.h @@ -11,6 +11,7 @@ #include "util.h" #include "event.h" #include "function.h" +#include "parse_tree.h" #include enum { @@ -487,7 +488,7 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - parser_test_error_bits_t detect_errors(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t detect_errors(const wcstring &buff, parse_error_list_t *out_errors = NULL, const wchar_t *prefix = NULL); parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); /** diff --git a/reader.cpp b/reader.cpp index df7f070e5..3eeae6271 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2472,12 +2472,11 @@ void reader_run_command(parser_t &parser, const wcstring &cmd) int reader_shell_test(const wchar_t *b) { - int res = parser_t::principal_parser().detect_errors(b); + wcstring bstr = b; + int res = parser_t::principal_parser().detect_errors(bstr); if (res & PARSER_TEST_ERROR) { - wcstring sb; - const int tmp[1] = {0}; const int tmp2[1] = {0}; const wcstring empty; @@ -2490,10 +2489,15 @@ int reader_shell_test(const wchar_t *b) tmp, tmp2, 0); - - - parser_t::principal_parser().detect_errors(b, &sb, L"fish"); - fwprintf(stderr, L"%ls", sb.c_str()); + + parse_error_list_t errors; + parser_t::principal_parser().detect_errors(bstr, &errors, L"fish"); + + if (! errors.empty()) + { + const wcstring sb = parse_errors_description(errors, b, L"fish"); + fwprintf(stderr, L"%ls", sb.c_str()); + } } return res; } @@ -3903,13 +3907,14 @@ static int read_ni(int fd, const io_chain_t &io) res = 1; } - wcstring sb; - if (! parser.detect_errors(str.c_str(), &sb, L"fish")) + parse_error_list_t errors; + if (! parser.detect_errors(str, &errors, L"fish")) { parser.eval(str, io, TOP); } else { + const wcstring sb = parse_errors_description(errors, str); fwprintf(stderr, L"%ls", sb.c_str()); res = 1; } diff --git a/tests/test7.in b/tests/test7.in index 22f5d92c6..a3ae8360c 100644 --- a/tests/test7.in +++ b/tests/test7.in @@ -20,15 +20,6 @@ case one echo $status end -# Test that non-case tokens inside `switch` don't blow away status -# (why are these even allowed?) -false -switch one -true -case one - echo $status -end - #test contains -i echo test contains -i contains -i string a b c string d diff --git a/tests/test7.out b/tests/test7.out index fd3b8a701..bbe2ab1a5 100644 --- a/tests/test7.out +++ b/tests/test7.out @@ -3,7 +3,6 @@ 3 0 -1 1 test contains -i 4 From 5b24aac2660c27d27c9f3192821cd063fd07f9c0 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 15 Dec 2013 16:05:37 -0800 Subject: [PATCH 058/108] Initial work on backtrace support with new parser --- builtin_complete.cpp | 2 +- parse_constants.h | 7 ++ parse_util.cpp | 201 +++++++++++++++++++++++++++++++++++++++++-- parse_util.h | 3 + parser.cpp | 183 +++++---------------------------------- parser.h | 12 +-- reader.cpp | 19 ++-- 7 files changed, 237 insertions(+), 190 deletions(-) diff --git a/builtin_complete.cpp b/builtin_complete.cpp index 0cc3b7e7d..f3773f487 100644 --- a/builtin_complete.cpp +++ b/builtin_complete.cpp @@ -499,7 +499,7 @@ static int builtin_complete(parser_t &parser, wchar_t **argv) { const wcstring condition_string = condition; parse_error_list_t errors; - if (parser.detect_errors(condition_string, &errors)) + if (parse_util_detect_errors(condition_string, &errors)) { append_format(stderr_buffer, L"%ls: Condition '%ls' contained a syntax error", diff --git a/parse_constants.h b/parse_constants.h index 7ccc962c2..e3eebbf5d 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -119,6 +119,13 @@ enum parse_error_code_t parse_error_unbalancing_case, //case outside of switch }; +enum { + PARSER_TEST_ERROR = 1, + PARSER_TEST_INCOMPLETE = 2 +}; +typedef unsigned int parser_test_error_bits_t; + + /** Error message for tokenizer error. The tokenizer message is diff --git a/parse_util.cpp b/parse_util.cpp index cb33915e3..f95679591 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -39,18 +39,12 @@ #include "signal.h" #include "wildcard.h" #include "parse_tree.h" +#include "parser.h" /** - Maximum number of autoloaded items opf a specific type to keep in - memory at a time. + Error message for improper use of the exec builtin */ -#define AUTOLOAD_MAX 10 - -/** - Minimum time, in seconds, before an autoloaded item will be - unloaded -*/ -#define AUTOLOAD_MIN_AGE 60 +#define EXEC_ERR_MSG _(L"This command can not be used in a pipeline") int parse_util_lineno(const wchar_t *str, size_t offset) { @@ -940,3 +934,192 @@ std::vector parse_util_compute_indents(const wcstring &src) return indents; } + +/* Append a syntax error to the given error list */ +static bool append_syntax_error(parse_error_list_t *errors, const parse_node_t &node, const wchar_t *fmt, ...) +{ + parse_error_t error; + error.source_start = node.source_start; + error.source_length = node.source_length; + error.code = parse_error_syntax; + + va_list va; + va_start(va, fmt); + error.text = vformat_string(fmt, va); + va_end(va); + + errors->push_back(error); + return true; +} + +/** + Returns 1 if the specified command is a builtin that may not be used in a pipeline +*/ +static int parser_is_pipe_forbidden(const wcstring &word) +{ + return contains(word, + L"exec", + L"case", + L"break", + L"return", + L"continue"); +} + +// Check if the first argument under the given node is --help +static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src) +{ + bool is_help = false; + const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1); + if (! arg_nodes.empty()) + { + // Check the first argument only + const parse_node_t &arg = *arg_nodes.at(0); + const wcstring first_arg_src = arg.get_source(src); + is_help = parser_t::is_help(first_arg_src.c_str(), 3); + } + return is_help; +} + +parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors) +{ + parse_node_tree_t node_tree; + parse_error_list_t parse_errors; + + // Whether we encountered a parse error + bool errored = false; + + // Whether we encountered an unclosed block + // We detect this via an 'end_command' block without source + bool has_unclosed_block = false; + + // Parse the input string into a parse tree + // Some errors are detected here + bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); + if (! parsed) + { + errored = true; + } + + // Expand all commands + // Verify 'or' and 'and' not used inside pipelines + // Verify pipes via parser_is_pipe_forbidden + // Verify return only within a function + + if (! errored) + { + const size_t node_tree_size = node_tree.size(); + for (size_t i=0; i < node_tree_size; i++) + { + const parse_node_t &node = node_tree.at(i); + if (node.type == symbol_end_command && ! node.has_source()) + { + // an 'end' without source is an unclosed block + has_unclosed_block = true; + } + else if (node.type == symbol_plain_statement) + { + wcstring command; + if (node_tree.command_for_plain_statement(node, buff_src, &command)) + { + // Check that we can expand the command + if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) + { + errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str()); + } + + // Check that pipes are sound + bool is_boolean_command = contains(command, L"or", L"and"); + bool is_pipe_forbidden = parser_is_pipe_forbidden(command); + if (! errored && (is_boolean_command || is_pipe_forbidden)) + { + // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all + if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) + { + errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG); + } + } + + // Check that we don't return from outside a function + // But we allow it if it's 'return --help' + if (! errored && command == L"return") + { + const parse_node_t *ancestor = &node; + bool found_function = false; + while (ancestor != NULL) + { + const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (possible_function_header != NULL && possible_function_header->type == symbol_function_header) + { + found_function = true; + break; + } + ancestor = node_tree.get_parent(*ancestor); + + } + if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) + { + errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG); + } + } + + // Check that we don't return from outside a function + if (! errored && (command == L"break" || command == L"continue")) + { + // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function. + // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header. + bool found_loop = false, end_search = false; + const parse_node_t *ancestor = &node; + while (ancestor != NULL && ! end_search) + { + const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); + if (loop_or_function_header != NULL) + { + switch (loop_or_function_header->type) + { + case symbol_while_header: + case symbol_for_header: + // this is a loop header, so we can break or continue + found_loop = true; + end_search = true; + break; + + case symbol_function_header: + // this is a function header, so we cannot break or continue. We stop our search here. + found_loop = false; + end_search = true; + break; + + default: + // most likely begin / end style block, which makes no difference + break; + } + } + ancestor = node_tree.get_parent(*ancestor); + } + + if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) + { + errored = append_syntax_error(&parse_errors, node, INVALID_LOOP_ERR_MSG); + } + } + } + } + } + } + + parser_test_error_bits_t res = 0; + + if (errored) + res |= PARSER_TEST_ERROR; + + if (has_unclosed_block) + res |= PARSER_TEST_INCOMPLETE; + + if (out_errors) + { + out_errors->swap(parse_errors); + } + + return res; + +} diff --git a/parse_util.h b/parse_util.h index b5b5262a3..28e263ed9 100644 --- a/parse_util.h +++ b/parse_util.h @@ -8,6 +8,7 @@ #define FISH_PARSE_UTIL_H #include "autoload.h" +#include "parse_tree.h" #include #include #include @@ -162,4 +163,6 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote) /** Given a string, parse it as fish code and then return the indents. The return value has the same size as the string */ std::vector parse_util_compute_indents(const wcstring &src); +parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors); + #endif diff --git a/parser.cpp b/parser.cpp index 136bc74a4..1c6162a74 100644 --- a/parser.cpp +++ b/parser.cpp @@ -801,7 +801,7 @@ void parser_t::eval_args(const wchar_t *line, std::vector &args) proc_pop_interactive(); } -void parser_t::stack_trace(block_t *b, wcstring &buff) +void parser_t::stack_trace(block_t *b, wcstring &buff) const { /* Check if we should end the recursion @@ -844,7 +844,7 @@ void parser_t::stack_trace(block_t *b, wcstring &buff) { const source_block_t *sb = static_cast(b); const wchar_t *source_dest = sb->source_file; - append_format(buff, _(L"in . (source) call of file '%ls',\n"), source_dest); + append_format(buff, _(L"from sourcing file '%ls',\n"), source_dest); break; } case FUNCTION_CALL: @@ -2762,7 +2762,7 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha // debug( 1, L"%ls -> %ls %ls", arg_cpy, subst, tmp.buff ); parse_error_list_t errors; - err |= parser_t::detect_errors(subst, &errors); + err |= parse_util_detect_errors(subst, &errors); if (out && ! errors.empty()) { out->append(parse_errors_description(errors, subst, prefix)); @@ -2890,21 +2890,6 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre return err; } -// Check if the first argument under the given node is --help -static bool first_argument_is_help(const parse_node_tree_t &node_tree, const parse_node_t &node, const wcstring &src) -{ - bool is_help = false; - const parse_node_tree_t::parse_node_list_t arg_nodes = node_tree.find_nodes(node, symbol_argument, 1); - if (! arg_nodes.empty()) - { - // Check the first argument only - const parse_node_t &arg = *arg_nodes.at(0); - const wcstring first_arg_src = arg.get_source(src); - is_help = parser_t::is_help(first_arg_src.c_str(), 3); - } - return is_help; -} - // helper type used in parser::test below struct block_info_t { @@ -2929,153 +2914,31 @@ static bool append_syntax_error(parse_error_list_t *errors, const parse_node_t & return true; } -parser_test_error_bits_t parser_t::detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors, const wchar_t *prefix) +void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring *output) const { - ASSERT_IS_MAIN_THREAD(); - - parse_node_tree_t node_tree; - parse_error_list_t parse_errors; - - // Whether we encountered a parse error - bool errored = false; - - // Whether we encountered an unclosed block - // We detect this via an 'end_command' block without source - bool has_unclosed_block = false; - - // Parse the input string into a parse tree - // Some errors are detected here - bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); - if (! parsed) + assert(output != NULL); + if (! errors.empty()) { - errored = true; - } - - // Expand all commands - // Verify 'or' and 'and' not used inside pipelines - // Verify pipes via parser_is_pipe_forbidden - // Verify return only within a function - - if (! errored) - { - const size_t node_tree_size = node_tree.size(); - for (size_t i=0; i < node_tree_size; i++) + const parse_error_t err = errors.at(0); + output->append(err.describe(src)); + output->push_back(L'\n'); + + // Determine which line we're on + assert(err.source_start <= src.size()); + size_t which_line = 1 + std::count(src.begin(), src.begin() + err.source_start, L'\n'); + + const wchar_t *filename = this->current_filename(); + if (filename) { - const parse_node_t &node = node_tree.at(i); - if (node.type == symbol_end_command && ! node.has_source()) - { - // an 'end' without source is an unclosed block - has_unclosed_block = true; - } - else if (node.type == symbol_plain_statement) - { - wcstring command; - if (node_tree.command_for_plain_statement(node, buff_src, &command)) - { - // Check that we can expand the command - if (! expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS)) - { - errored = append_syntax_error(&parse_errors, node, ILLEGAL_CMD_ERR_MSG, command.c_str()); - } - - // Check that pipes are sound - bool is_boolean_command = contains(command, L"or", L"and"); - bool is_pipe_forbidden = parser_is_pipe_forbidden(command); - if (! errored && (is_boolean_command || is_pipe_forbidden)) - { - // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all - if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) - { - errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG); - } - } - - // Check that we don't return from outside a function - // But we allow it if it's 'return --help' - if (! errored && command == L"return") - { - const parse_node_t *ancestor = &node; - bool found_function = false; - while (ancestor != NULL) - { - const parse_node_t *possible_function_header = node_tree.header_node_for_block_statement(*ancestor); - if (possible_function_header != NULL && possible_function_header->type == symbol_function_header) - { - found_function = true; - break; - } - ancestor = node_tree.get_parent(*ancestor); - - } - if (! found_function && ! first_argument_is_help(node_tree, node, buff_src)) - { - errored = append_syntax_error(&parse_errors, node, INVALID_RETURN_ERR_MSG); - } - } - - // Check that we don't return from outside a function - if (! errored && (command == L"break" || command == L"continue")) - { - // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function. - // This is a little funny because we can't tell if it's a 'for' or 'while' loop from the ancestor alone; we need the header. That is, we hit a block_statement, and have to check its header. - bool found_loop = false, end_search = false; - const parse_node_t *ancestor = &node; - while (ancestor != NULL && ! end_search) - { - const parse_node_t *loop_or_function_header = node_tree.header_node_for_block_statement(*ancestor); - if (loop_or_function_header != NULL) - { - switch (loop_or_function_header->type) - { - case symbol_while_header: - case symbol_for_header: - // this is a loop header, so we can break or continue - found_loop = true; - end_search = true; - break; - - case symbol_function_header: - // this is a function header, so we cannot break or continue. We stop our search here. - found_loop = false; - end_search = true; - break; - - default: - // most likely begin / end style block, which makes no difference - break; - } - } - ancestor = node_tree.get_parent(*ancestor); - } - - if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) - { - errored = append_syntax_error(&parse_errors, node, INVALID_LOOP_ERR_MSG); - } - } - } - } + append_format(*output, _(L"line %lu of '%ls'\n"), which_line, filename); } + else + { + append_format(*output, L"%ls: ", _(L"Standard input"), which_line); + } + + this->stack_trace(current_block, *output); } - - parser_test_error_bits_t res = 0; - - if (errored) - res |= PARSER_TEST_ERROR; - - if (has_unclosed_block) - res |= PARSER_TEST_INCOMPLETE; - - if (out_errors) - { - out_errors->swap(parse_errors); - } - - error_code=0; - - - return res; - } parser_test_error_bits_t parser_t::detect_errors2(const wchar_t *buff, wcstring *out, const wchar_t *prefix) diff --git a/parser.h b/parser.h index fb3efad85..90b6a1c43 100644 --- a/parser.h +++ b/parser.h @@ -14,12 +14,6 @@ #include "parse_tree.h" #include -enum { - PARSER_TEST_ERROR = 1, - PARSER_TEST_INCOMPLETE = 2 -}; -typedef unsigned int parser_test_error_bits_t; - /** event_blockage_t represents a block on events of the specified type */ @@ -488,8 +482,8 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - parser_test_error_bits_t detect_errors(const wcstring &buff, parse_error_list_t *out_errors = NULL, const wchar_t *prefix = NULL); - parser_test_error_bits_t detect_errors2(const wchar_t * buff, wcstring *out = NULL, const wchar_t *prefix = NULL); + parser_test_error_bits_t detect_errors2(const wchar_t *buff, wcstring *out_error_desc, const wchar_t *prefix); + void get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring *output) const; /** Test if the specified string can be parsed as an argument list, @@ -538,7 +532,7 @@ public: /** Write a stack trace starting at the specified block to the specified wcstring */ - void stack_trace(block_t *b, wcstring &buff); + void stack_trace(block_t *b, wcstring &buff) const; int get_block_type(const wchar_t *cmd) const; const wchar_t *get_block_command(int type) const; diff --git a/reader.cpp b/reader.cpp index 3eeae6271..87c014642 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2473,7 +2473,8 @@ void reader_run_command(parser_t &parser, const wcstring &cmd) int reader_shell_test(const wchar_t *b) { wcstring bstr = b; - int res = parser_t::principal_parser().detect_errors(bstr); + parse_error_list_t errors; + int res = parse_util_detect_errors(bstr, &errors); if (res & PARSER_TEST_ERROR) { @@ -2490,14 +2491,9 @@ int reader_shell_test(const wchar_t *b) tmp2, 0); - parse_error_list_t errors; - parser_t::principal_parser().detect_errors(bstr, &errors, L"fish"); - - if (! errors.empty()) - { - const wcstring sb = parse_errors_description(errors, b, L"fish"); - fwprintf(stderr, L"%ls", sb.c_str()); - } + wcstring sb; + parser_t::principal_parser().get_backtrace(bstr, errors, &sb); + fwprintf(stderr, L"%ls", sb.c_str()); } return res; } @@ -3908,13 +3904,14 @@ static int read_ni(int fd, const io_chain_t &io) } parse_error_list_t errors; - if (! parser.detect_errors(str, &errors, L"fish")) + if (! parse_util_detect_errors(str, &errors)) { parser.eval(str, io, TOP); } else { - const wcstring sb = parse_errors_description(errors, str); + wcstring sb; + parser.get_backtrace(str, errors, &sb); fwprintf(stderr, L"%ls", sb.c_str()); res = 1; } From ddb37a47da240a04ba2193ea3f29c1d12ad807c0 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 15 Dec 2013 16:43:22 -0800 Subject: [PATCH 059/108] Stop using realpath() in builtin_source, so as to preserve relative path reporting in backtraces --- builtin.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index fe7b83777..f0e918381 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3011,10 +3011,7 @@ static int builtin_source(parser_t &parser, wchar_t ** argv) argc = builtin_count_args(argv); - const wchar_t *fn; - const wchar_t *fn_intern; - - + const wchar_t *fn, *fn_intern; if (argc < 2 || (wcscmp(argv[1], L"-") == 0)) { @@ -3047,18 +3044,7 @@ static int builtin_source(parser_t &parser, wchar_t ** argv) return STATUS_BUILTIN_ERROR; } - fn = wrealpath(argv[1], NULL); - - if (!fn) - { - fn_intern = intern(argv[1]); - } - else - { - fn_intern = intern(fn); - free((void *)fn); - } - + fn_intern = intern(argv[1]); } parser.push_block(new source_block_t(fn_intern)); From 471f7f06f790b2fd927d86d3fb3974743ed60607 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 15 Dec 2013 16:44:05 -0800 Subject: [PATCH 060/108] Fix for a busted assertion --- parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser.cpp b/parser.cpp index 1c6162a74..abfb88160 100644 --- a/parser.cpp +++ b/parser.cpp @@ -559,7 +559,7 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...) error_code = ec; - assert(p <= INT_MAX); + // note : p may be -1 err_pos = static_cast(p); va_start(va, str); From 0e421ea31dd8293c64960912ab9651b2a3b2b07d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 16 Dec 2013 15:33:20 -0800 Subject: [PATCH 061/108] Various cleanup and tweaking of backtrace messages --- expand.cpp | 25 +++++++++++++++++++++++++ expand.h | 3 +++ parser.cpp | 47 +++++++++++++++++++---------------------------- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/expand.cpp b/expand.cpp index 10e3cbf40..f743f8ab6 100644 --- a/expand.cpp +++ b/expand.cpp @@ -1602,6 +1602,31 @@ static void unexpand_tildes(const wcstring &input, std::vector *co } } +// If the given path contains the user's home directory, replace that with a tilde +// We don't try to be smart about case insensitivity, etc. +wcstring replace_home_directory_with_tilde(const wcstring &str) +{ + // only absolute paths get this treatment + wcstring result = str; + if (string_prefixes_string(L"/", result)) + { + wcstring home_directory = L"~"; + expand_tilde(home_directory); + if (! string_suffixes_string(L"/", home_directory)) + { + home_directory.push_back(L'/'); + } + + // Now check if the home_directory prefixes the string + if (string_prefixes_string(home_directory, result)) + { + // Success + result.replace(0, home_directory.size(), L"~/"); + } + } + return result; +} + /** Remove any internal separators. Also optionally convert wildcard characters to regular equivalents. This is done to support EXPAND_SKIP_WILDCARDS. diff --git a/expand.h b/expand.h index 4893d2b92..803513c2a 100644 --- a/expand.h +++ b/expand.h @@ -176,6 +176,9 @@ wcstring expand_escape_variable(const wcstring &in); */ void expand_tilde(wcstring &input); +/** Perform the opposite of tilde expansion on the string, which is modified in place */ +wcstring replace_home_directory_with_tilde(const wcstring &str); + /** Test if the specified argument is clean, i.e. it does not contain any tokens which need to be expanded or otherwise altered. Clean diff --git a/parser.cpp b/parser.cpp index abfb88160..ee6948a85 100644 --- a/parser.cpp +++ b/parser.cpp @@ -312,6 +312,13 @@ static const struct block_lookup_entry block_lookup[]= static bool job_should_skip_elseif(const job_t *job, const block_t *current_block); +// Given a file path, return something nicer. Currently we just "unexpand" tildes. +static wcstring user_presentable_path(const wcstring &path) +{ + return replace_home_directory_with_tilde(path); +} + + parser_t::parser_t(enum parser_type_t type, bool errors) : parser_type(type), show_errors(errors), @@ -324,7 +331,6 @@ parser_t::parser_t(enum parser_type_t type, bool errors) : current_block(NULL), block_io(shared_ptr()) { - } /* A pointer to the principal parser (which is a static local) */ @@ -363,7 +369,7 @@ void parser_t::push_block(block_t *newv) const enum block_type_t type = newv->type(); newv->src_lineno = parser_t::get_lineno(); newv->src_filename = parser_t::current_filename()?intern(parser_t::current_filename()):0; - + newv->outer = current_block; if (current_block && current_block->skip) newv->mark_as_fake(); @@ -844,13 +850,13 @@ void parser_t::stack_trace(block_t *b, wcstring &buff) const { const source_block_t *sb = static_cast(b); const wchar_t *source_dest = sb->source_file; - append_format(buff, _(L"from sourcing file '%ls',\n"), source_dest); + append_format(buff, _(L"from sourcing file %ls\n"), user_presentable_path(source_dest).c_str()); break; } case FUNCTION_CALL: { const function_block_t *fb = static_cast(b); - append_format(buff, _(L"in function '%ls',\n"), fb->name.c_str()); + append_format(buff, _(L"in function '%ls'\n"), fb->name.c_str()); break; } case SUBST: @@ -868,14 +874,14 @@ void parser_t::stack_trace(block_t *b, wcstring &buff) const if (file) { append_format(buff, - _(L"\tcalled on line %d of file '%ls',\n"), + _(L"\tcalled on line %d of file %ls\n"), b->src_lineno, - file); + user_presentable_path(file).c_str()); } else { append_format(buff, - _(L"\tcalled on standard input,\n")); + _(L"\tcalled on standard input\n")); } if (b->type() == FUNCTION_CALL) @@ -2611,6 +2617,7 @@ int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_typ tokenizer_t local_tokenizer(cmd, 0); scoped_push tokenizer_push(¤t_tokenizer, &local_tokenizer); + scoped_push tokenizer_pos_push(¤t_tokenizer_pos, 0); error_code = 0; @@ -2897,31 +2904,12 @@ struct block_info_t block_type_t type; //type of the block }; -/* Append a syntax error to the given error list */ -static bool append_syntax_error(parse_error_list_t *errors, const parse_node_t &node, const wchar_t *fmt, ...) -{ - parse_error_t error; - error.source_start = node.source_start; - error.source_length = node.source_length; - error.code = parse_error_syntax; - - va_list va; - va_start(va, fmt); - error.text = vformat_string(fmt, va); - va_end(va); - - errors->push_back(error); - return true; -} - void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring *output) const { assert(output != NULL); if (! errors.empty()) { const parse_error_t err = errors.at(0); - output->append(err.describe(src)); - output->push_back(L'\n'); // Determine which line we're on assert(err.source_start <= src.size()); @@ -2930,13 +2918,16 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro const wchar_t *filename = this->current_filename(); if (filename) { - append_format(*output, _(L"line %lu of '%ls'\n"), which_line, filename); + append_format(*output, _(L"fish: line %lu of %ls:\n"), which_line, user_presentable_path(filename).c_str()); } else { - append_format(*output, L"%ls: ", _(L"Standard input"), which_line); + append_format(*output, L"fish: %ls:", _(L"Error:")); } + output->append(err.describe(src)); + output->push_back(L'\n'); + this->stack_trace(current_block, *output); } } From 3e9153d955aa2e908f1dd661c8b91556ca8e0ee8 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 16 Dec 2013 16:52:23 -0800 Subject: [PATCH 062/108] Clean up some error messages. Don't show the line in the error message if it's the first line and we're interactive, since then it's obvious --- parse_constants.h | 13 ++++++------- parse_tree.cpp | 4 ++-- parse_tree.h | 4 ++-- parse_util.cpp | 2 +- parser.cpp | 7 +++++-- reader.cpp | 21 +++++++-------------- 6 files changed, 23 insertions(+), 28 deletions(-) diff --git a/parse_constants.h b/parse_constants.h index e3eebbf5d..a322073e7 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -188,14 +188,13 @@ typedef unsigned int parser_test_error_bits_t; */ #define INVALID_CASE_ERR_MSG _( L"'case' builtin not inside of switch block") -/** - Error when using loop control builtins (break or continue) outside of loop -*/ -#define INVALID_LOOP_ERR_MSG _( L"Loop control command while not inside of loop" ) +/** Error when using break outside of loop */ +#define INVALID_BREAK_ERR_MSG _( L"break command while not inside of loop" ) -/** - Error when using return builtin outside of function definition -*/ +/** Error when using continue outside of loop */ +#define INVALID_CONTINUE_ERR_MSG _( L"continue command while not inside of loop" ) + +/** Error when using return builtin outside of function definition */ #define INVALID_RETURN_ERR_MSG _( L"'return' builtin command outside of function definition" ) /** diff --git a/parse_tree.cpp b/parse_tree.cpp index ad83a0d60..81b6c1633 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -12,10 +12,10 @@ static bool production_is_empty(const production_t *production) } /** Returns a string description of this parse error */ -wcstring parse_error_t::describe(const wcstring &src) const +wcstring parse_error_t::describe(const wcstring &src, bool skip_caret) const { wcstring result = text; - if (source_start < src.size() && source_start + source_length <= src.size()) + if (! skip_caret && source_start < src.size() && source_start + source_length <= src.size()) { // Locate the beginning of this line of source size_t line_start = 0; diff --git a/parse_tree.h b/parse_tree.h index 8a0b3eedd..bdc622d93 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -33,8 +33,8 @@ struct parse_error_t size_t source_start; size_t source_length; - /** Return a string describing the error, suitable for presentation to the user */ - wcstring describe(const wcstring &src) const; + /** Return a string describing the error, suitable for presentation to the user. If skip_caret is false, the offending line with a caret is printed as well */ + wcstring describe(const wcstring &src, bool skip_caret = false) const; }; typedef std::vector parse_error_list_t; diff --git a/parse_util.cpp b/parse_util.cpp index f95679591..ff7a022c2 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -1099,7 +1099,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars if (! found_loop && ! first_argument_is_help(node_tree, node, buff_src)) { - errored = append_syntax_error(&parse_errors, node, INVALID_LOOP_ERR_MSG); + errored = append_syntax_error(&parse_errors, node, (command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG)); } } } diff --git a/parser.cpp b/parser.cpp index ee6948a85..59d0f51bc 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2922,10 +2922,13 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro } else { - append_format(*output, L"fish: %ls:", _(L"Error:")); + output->append(L"fish: "); } - output->append(err.describe(src)); + // Don't include the caret if we're interactive, this is the first line of text, and our source is at its beginning, because then it's obvious + bool skip_caret = (get_is_interactive() && which_line == 1 && err.source_start == 0); + + output->append(err.describe(src, skip_caret)); output->push_back(L'\n'); this->stack_trace(current_block, *output); diff --git a/reader.cpp b/reader.cpp index 87c014642..1506ae3e9 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2478,22 +2478,15 @@ int reader_shell_test(const wchar_t *b) if (res & PARSER_TEST_ERROR) { - const int tmp[1] = {0}; - const int tmp2[1] = {0}; - const wcstring empty; - - s_write(&data->screen, - empty, - empty, - empty, - 0, - tmp, - tmp2, - 0); - wcstring sb; parser_t::principal_parser().get_backtrace(bstr, errors, &sb); - fwprintf(stderr, L"%ls", sb.c_str()); + + // ensure we end with a newline. Also add an initial newline, because it's likely the user just hit enter and so there's junk on the current line + if (! string_suffixes_string(L"\n", sb)) + { + sb.push_back(L'\n'); + } + fwprintf(stderr, L"\n%ls", sb.c_str()); } return res; } From af21dfd2947d08e3ae435d84d8b7dc784788d887 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 16 Dec 2013 17:18:32 -0800 Subject: [PATCH 063/108] Make the tests compile again --- fish_tests.cpp | 26 +++++++++++++------------- parse_util.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 0c273643a..af65b70ab 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -585,63 +585,63 @@ static void test_parser() parser_t parser(PARSER_TYPE_GENERAL, true); say(L"Testing block nesting"); - if (!parser.detect_errors(L"if; end")) + if (!parse_util_detect_errors(L"if; end")) { err(L"Incomplete if statement undetected"); } - if (!parser.detect_errors(L"if test; echo")) + if (!parse_util_detect_errors(L"if test; echo")) { err(L"Missing end undetected"); } - if (!parser.detect_errors(L"if test; end; end")) + if (!parse_util_detect_errors(L"if test; end; end")) { err(L"Unbalanced end undetected"); } say(L"Testing detection of invalid use of builtin commands"); - if (!parser.detect_errors(L"case foo")) + if (!parse_util_detect_errors(L"case foo")) { err(L"'case' command outside of block context undetected"); } - if (!parser.detect_errors(L"switch ggg; if true; case foo;end;end")) + if (!parse_util_detect_errors(L"switch ggg; if true; case foo;end;end")) { err(L"'case' command outside of switch block context undetected"); } - if (!parser.detect_errors(L"else")) + if (!parse_util_detect_errors(L"else")) { err(L"'else' command outside of conditional block context undetected"); } - if (!parser.detect_errors(L"else if")) + if (!parse_util_detect_errors(L"else if")) { err(L"'else if' command outside of conditional block context undetected"); } - if (!parser.detect_errors(L"if false; else if; end")) + if (!parse_util_detect_errors(L"if false; else if; end")) { err(L"'else if' missing command undetected"); } - if (!parser.detect_errors(L"break")) + if (!parse_util_detect_errors(L"break")) { err(L"'break' command outside of loop block context undetected"); } - if (parser.detect_errors(L"break --help")) + if (parse_util_detect_errors(L"break --help")) { err(L"'break --help' incorrectly marked as error"); } - if (! parser.detect_errors(L"while false ; function foo ; break ; end ; end ")) + if (! parse_util_detect_errors(L"while false ; function foo ; break ; end ; end ")) { err(L"'break' command inside function allowed to break from loop outside it"); } - if (!parser.detect_errors(L"exec ls|less") || !parser.detect_errors(L"echo|return")) + if (!parse_util_detect_errors(L"exec ls|less") || !parse_util_detect_errors(L"echo|return")) { err(L"Invalid pipe command undetected"); } - if (parser.detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end ")) + if (parse_util_detect_errors(L"for i in foo ; switch $i ; case blah ; break; end; end ")) { err(L"'break' command inside switch falsely reported as error"); } diff --git a/parse_util.h b/parse_util.h index 28e263ed9..862e5a621 100644 --- a/parse_util.h +++ b/parse_util.h @@ -163,6 +163,6 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote) /** Given a string, parse it as fish code and then return the indents. The return value has the same size as the string */ std::vector parse_util_compute_indents(const wcstring &src); -parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors); +parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, parse_error_list_t *out_errors = NULL); #endif From 384987cd5b9bcc2fd194c8343e00c1931c174a73 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 16 Dec 2013 22:32:08 -0800 Subject: [PATCH 064/108] Remove detect_errors2 --- parser.cpp | 719 ----------------------------------------------------- parser.h | 1 - 2 files changed, 720 deletions(-) diff --git a/parser.cpp b/parser.cpp index 59d0f51bc..0d47e5e13 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2935,725 +2935,6 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro } } -parser_test_error_bits_t parser_t::detect_errors2(const wchar_t *buff, wcstring *out, const wchar_t *prefix) -{ - ASSERT_IS_MAIN_THREAD(); - - /* - Set to one if a command name has been given for the currently - parsed process specification - */ - int had_cmd=0; - int err=0; - int unfinished = 0; - - // This is very nearly a stack, but sometimes we have to inspect non-top elements (e.g. return) - std::vector block_infos; - - /* - Set to 1 if the current command is inside a pipeline - */ - int is_pipeline = 0; - - /* - Set to one if the currently specified process can not be used inside a pipeline - */ - int forbid_pipeline = 0; - - /* - Set to one if an additional process specification is needed - */ - bool needs_cmd = false; - - /* - Counter on the number of arguments this function has encountered - so far. Is set to -1 when the count is unknown, i.e. after - encountering an argument that contains substitutions that can - expand to more/less arguemtns then 1. - */ - int arg_count=0; - - /* - The currently validated command. - */ - wcstring command; - bool has_command = false; - - CHECK(buff, 1); - - tokenizer_t tok(buff, 0); - - scoped_push tokenizer_push(¤t_tokenizer, &tok); - scoped_push tokenizer_pos_push(¤t_tokenizer_pos); - - for (;; tok_next(&tok)) - { - current_tokenizer_pos = tok_get_pos(&tok); - - int last_type = tok_last_type(&tok); - int end_of_cmd = 0; - - switch (last_type) - { - case TOK_STRING: - { - if (!had_cmd) - { - int mark = tok_get_pos(&tok); - had_cmd = 1; - arg_count=0; - - command = tok_last(&tok); - - // Pass SKIP_HOME_DIRECTORIES for https://github.com/fish-shell/fish-shell/issues/512 - has_command = expand_one(command, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_HOME_DIRECTORIES); - if (! has_command) - { - command = L""; - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - ILLEGAL_CMD_ERR_MSG, - tok_last(&tok)); - - print_errors(*out, prefix); - } - break; - } - - if (needs_cmd) - { - /* - end is not a valid command when a followup - command is needed, such as after 'and' or - 'while' - */ - if (contains(command, - L"end")) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - COND_ERR_MSG); - - print_errors(*out, prefix); - } - } - - needs_cmd = false; - } - - /* - Decrement block count on end command - */ - if (command == L"end") - { - tok_next(&tok); - tok_set_pos(&tok, mark); - - /* Test that end is not used when not inside any block */ - if (block_infos.empty()) - { - err = 1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_END_ERR_MSG); - print_errors(*out, prefix); - const wcstring h = builtin_help_get(*this, L"end"); - if (! h.empty()) - append_format(*out, L"%ls", h.c_str()); - } - } - else - { - block_infos.pop_back(); - - } - } - - /* - Handle block commands - */ - if (parser_keywords_is_block(command)) - { - struct block_info_t info = {current_tokenizer_pos, parser_get_block_type(command)}; - block_infos.push_back(info); - tok_next(&tok); - tok_set_pos(&tok, mark); - } - - /* - If parser_keywords_is_subcommand is true, the command - accepts a second command as it's first - argument. If parser_skip_arguments is true, the - second argument is optional. - */ - if (parser_keywords_is_subcommand(command) && !parser_keywords_skip_arguments(command)) - { - needs_cmd = true; - had_cmd = 0; - } - - if (contains(command, - L"or", - L"and")) - { - /* - 'or' and 'and' can not be used inside pipelines - */ - if (is_pipeline) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - EXEC_ERR_MSG); - - print_errors(*out, prefix); - - } - } - } - - /* - There are a lot of situations where pipelines - are forbidden, including when using the exec - builtin. - */ - if (parser_is_pipe_forbidden(command)) - { - if (is_pipeline) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - EXEC_ERR_MSG); - - print_errors(*out, prefix); - - } - } - forbid_pipeline = 1; - } - - /* - Test that the case builtin is only used directly in a switch block - */ - if (command == L"case") - { - if (block_infos.empty() || block_infos.back().type != SWITCH) - { - err=1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_CASE_ERR_MSG); - - print_errors(*out, prefix); - const wcstring h = builtin_help_get(*this, L"case"); - if (h.size()) - append_format(*out, L"%ls", h.c_str()); - } - } - } - - /* - Test that the return bultin is only used within function definitions - */ - if (command == L"return") - { - bool found_func = false; - size_t block_idx = block_infos.size(); - while (block_idx--) - { - if (block_infos.at(block_idx).type == FUNCTION_DEF) - { - found_func = true; - break; - } - } - - if (!found_func) - { - /* - Peek to see if the next argument is - --help, in which case we'll allow it to - show the help. - */ - - int old_pos = tok_get_pos(&tok); - int is_help = 0; - - tok_next(&tok); - if (tok_last_type(&tok) == TOK_STRING) - { - wcstring first_arg = tok_last(&tok); - if (expand_one(first_arg, EXPAND_SKIP_CMDSUBST) && parser_t::is_help(first_arg.c_str(), 3)) - { - is_help = 1; - } - } - - tok_set_pos(&tok, old_pos); - - if (!is_help) - { - err=1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_RETURN_ERR_MSG); - print_errors(*out, prefix); - } - } - } - } - - - /* - Test that break and continue are only used within loop blocks - */ - if (contains(command, L"break", L"continue")) - { - bool found_loop = false; - size_t block_idx = block_infos.size(); - while (block_idx--) - { - block_type_t type = block_infos.at(block_idx).type; - if (type == WHILE || type == FOR) - { - found_loop = true; - break; - } - } - - if (!found_loop) - { - /* - Peek to see if the next argument is - --help, in which case we'll allow it to - show the help. - */ - - int old_pos = tok_get_pos(&tok); - int is_help = 0; - - tok_next(&tok); - if (tok_last_type(&tok) == TOK_STRING) - { - wcstring first_arg = tok_last(&tok); - if (expand_one(first_arg, EXPAND_SKIP_CMDSUBST) && parser_t::is_help(first_arg.c_str(), 3)) - { - is_help = 1; - } - } - - tok_set_pos(&tok, old_pos); - - if (!is_help) - { - err=1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_LOOP_ERR_MSG); - print_errors(*out, prefix); - } - } - } - } - - /* - Test that else and else-if are only used directly in an if-block - */ - if (command == L"else") - { - if (block_infos.empty() || block_infos.back().type != IF) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_ELSE_ERR_MSG, - command.c_str()); - - print_errors(*out, prefix); - } - } - } - } - else - { - err |= parser_test_argument(tok_last(&tok), out, prefix, tok_get_pos(&tok)); - - /* If possible, keep track of number of supplied arguments */ - if (arg_count >= 0 && expand_is_clean(tok_last(&tok))) - { - arg_count++; - } - else - { - arg_count = -1; - } - - if (has_command) - { - - /* - Try to make sure the second argument to 'for' is 'in' - */ - if (command == L"for") - { - if (arg_count == 1) - { - - if (wcsvarname(tok_last(&tok))) - { - - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_FOR_ERR_NAME, - L"for", - tok_last(&tok)); - - print_errors(*out, prefix); - } - } - - } - else if (arg_count == 2) - { - if (wcscmp(tok_last(&tok), L"in") != 0) - { - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_FOR_ERR_IN, - L"for"); - - print_errors(*out, prefix); - } - } - } - } - else if (command == L"else") - { - if (arg_count == 1) - { - /* Any second argument must be "if" */ - if (wcscmp(tok_last(&tok), L"if") != 0) - { - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_ELSEIF_ERR_ARGUMENT, - L"else"); - print_errors(*out, prefix); - } - } - else - { - /* Successfully detected "else if". Now we need a new command. */ - needs_cmd = true; - had_cmd = false; - } - } - } - } - - } - - break; - } - - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_FD: - case TOK_REDIRECT_NOCLOB: - { - if (!had_cmd) - { - err = 1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - INVALID_REDIRECTION_ERR_MSG); - print_errors(*out, prefix); - } - } - break; - } - - case TOK_END: - { - if (needs_cmd && !had_cmd) - { - err = 1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - print_errors(*out, prefix); - } - } - needs_cmd = false; - had_cmd = 0; - is_pipeline=0; - forbid_pipeline=0; - end_of_cmd = 1; - - break; - } - - case TOK_PIPE: - { - if (!had_cmd) - { - err=1; - if (out) - { - if (tok_get_pos(&tok)>0 && buff[tok_get_pos(&tok)-1] == L'|') - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_OR_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - - } - else - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - } - - print_errors(*out, prefix); - } - } - else if (forbid_pipeline) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - EXEC_ERR_MSG); - - print_errors(*out, prefix); - } - } - else - { - needs_cmd = true; - is_pipeline=1; - had_cmd=0; - end_of_cmd = 1; - - } - break; - } - - case TOK_BACKGROUND: - { - if (!had_cmd) - { - err = 1; - if (out) - { - if (tok_get_pos(&tok)>0 && buff[tok_get_pos(&tok)-1] == L'&') - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_AND_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - - } - else - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - CMD_ERR_MSG, - tok_get_desc(tok_last_type(&tok))); - } - - print_errors(*out, prefix); - } - } - - had_cmd = 0; - end_of_cmd = 1; - - break; - } - - case TOK_ERROR: - default: - if (tok_get_error(&tok) == TOK_UNTERMINATED_QUOTE) - { - unfinished = 1; - } - else - { - // Only print errors once - if (out && ! err) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - TOK_ERR_MSG, - tok_last(&tok)); - - - print_errors(*out, prefix); - } - err = 1; - } - - break; - } - - if (end_of_cmd) - { - if (has_command && command == L"for") - { - if (arg_count >= 0 && arg_count < 2) - { - /* - Not enough arguments to the for builtin - */ - err = 1; - - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_FOR_ERR_COUNT, - L"for", - arg_count); - - print_errors(*out, prefix); - } - } - } - else if (has_command && command == L"else") - { - if (arg_count == 1) - { - /* If we have any arguments, we must have at least two...either "else" or "else if foo..." */ - err = true; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - BUILTIN_ELSEIF_ERR_COUNT, - L"else", - arg_count); - - print_errors(*out, prefix); - - } - } - } - - } - - if (!tok_has_next(&tok)) - break; - - } - - if (needs_cmd) - { - err=1; - if (out) - { - error(SYNTAX_ERROR, - tok_get_pos(&tok), - COND_ERR_MSG); - - print_errors(*out, prefix); - } - } - - - if (out != NULL && ! block_infos.empty()) - { - const wchar_t *cmd; - int bad_pos = block_infos.back().position; - block_type_t bad_type = block_infos.back().type; - - error(SYNTAX_ERROR, bad_pos, BLOCK_END_ERR_MSG); - - print_errors(*out, prefix); - - cmd = parser_get_block_command(bad_type); - if (cmd) - { - const wcstring h = builtin_help_get(*this, cmd); - if (h.size()) - { - append_format(*out, L"%ls", h.c_str()); - } - } - - - } - - /* - Calculate exit status - */ - if (! block_infos.empty()) - unfinished = 1; - - parser_test_error_bits_t res = 0; - - if (err) - res |= PARSER_TEST_ERROR; - - if (unfinished) - res |= PARSER_TEST_INCOMPLETE; - - /* - Cleanup - */ - - error_code=0; - - - return res; - -} - block_t::block_t(block_type_t t) : block_type(t), made_fake(false), diff --git a/parser.h b/parser.h index 90b6a1c43..cbd59bf56 100644 --- a/parser.h +++ b/parser.h @@ -482,7 +482,6 @@ public: \param out if non-null, any errors in the command will be filled out into this buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer */ - parser_test_error_bits_t detect_errors2(const wchar_t *buff, wcstring *out_error_desc, const wchar_t *prefix); void get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring *output) const; /** From 739e529416c3917d1e75d1a41850762a327e6ea9 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 20 Dec 2013 14:37:40 -0800 Subject: [PATCH 065/108] Initial flailing around trying to adopt new parser for actual execution --- parser.cpp | 442 ++++++++++++++++++++++++++++++++++++++++++++++++++++- parser.h | 11 ++ proc.cpp | 2 +- proc.h | 7 +- 4 files changed, 453 insertions(+), 9 deletions(-) diff --git a/parser.cpp b/parser.cpp index 0d47e5e13..cd43b41a4 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1633,6 +1633,110 @@ void parser_t::parse_job_argument_list(process_t *p, } */ +#if 0 +process_t *parser_t::create_boolean_process(job_t *job, const parse_node_t &bool_statement, const parser_context_t &ctx) +{ + // Handle a boolean statement + bool skip_job = false; + assert(bool_statement.type == symbol_boolean_statement); + switch (specific_statement.production_idx) + { + // These magic numbers correspond to productions for boolean_statement + case 0: + // AND. Skip if the last job failed. + skip_job = (proc_get_last_status() != 0); + break; + + case 1: + // OR. Skip if the last job succeeded. + skip_job = (proc_get_last_status() == 0); + break; + + case 2: + // NOT. Negate it. + job_set_flag(job, JOB_NEGATE, !job_get_flag(job, JOB_NEGATE)); + break; + + default: + { + fprintf(stderr, "Unexpected production in boolean statement\n"); + PARSER_DIE(); + break; + } + } + + process_t *result = NULL; + if (! skip_job) + { + const parse_node_t &subject = *ctx.tree.get_child(bool_statement, 1, symbol_statement); + result = this->create_job_process(job, subject, ctx); + } + return result; +} + +/* Returns a process_t allocated with new. It's the caller's responsibility to delete it (!) */ +process_t *parser_t::create_job_process(job_t *job, const parse_node_t &statement_node, const parser_context_t &ctx) +{ + assert(statement_node.type == symbol_statement); + assert(statement_node.child_count == 1); + + // We may skip this job entirely, e.g. with an 'and' statement + bool skip_job = false; + + // Get the "specific statement" which is boolean / block / if / switch / decorated + const parse_node_t &specific_statement = *ctx.tree.get_child(statement_node, 0); + + process_t *result = NULL; + + switch (specific_statement.type) + { + case symbol_boolean_statement: + { + result = this->create_boolean_process(job, specific_statement, ctx); + break; + } + + case symbol_block_statement: + { + const parse_node_t &header = *ctx.tree.get_child(specific_statement, 0, symbol_block_header); + const parse_node_t &specific_header = *ctx.tree.get_child(header, 0); + switch (specific_header.type) + { + case symbol_for_header: + result = this->create_for_process(job, specific_header, specific_statement, ctx); + break; + + case symbol_while_header: + result = this->create_while_process(job, specific_header, specific_statement, ctx); + break; + + case symbol_function_header: + // No process is associated with creating a function + // TODO: create the darn function! + result = NULL; + break; + + case symbol_begin_header: + + break; + + default: + fprintf(stderr, "Unexpected header type\n"); + PARSER_DIE(); + break; + } + } + } + + // expand_one command + // handle booleans (and, not, or) + // set INTERNAL_EXEC + // implicit CD + + return proc; +} +#endif + /** Fully parse a single job. Does not call exec on it, but any command substitutions in the job will be executed. @@ -1642,9 +1746,7 @@ void parser_t::parse_job_argument_list(process_t *p, f \return 1 on success, 0 on error */ -int parser_t::parse_job(process_t *p, - job_t *j, - tokenizer_t *tok) +int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok) { std::vector args; // The list that will become the argv array for the program int use_function = 1; // May functions be considered when checking what action this command represents @@ -2335,6 +2437,206 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc } } +/** + Evaluates a job from a node tree. +*/ + +#if 0 +void parser_t::eval_job(const parse_node_t &job_node, const parser_context_t &ctx) +{ + assert(job_node.type == symbol_job); + this->job_start_pos = (int)job_node.source_start; + + // Get terminal modes + struct termios tmodes = {}; + if (get_is_interactive()) + { + if (tcgetattr(STDIN_FILENO, &tmodes)) + { + // need real error handling here + wperror(L"tcgetattr"); + return; + } + } + + /* Track whether we had an error */ + bool process_errored = false; + + /* Profiling support */ + long long t1 = 0, t2 = 0, t3 = 0; + const bool do_profile = profile; + profile_item_t *profile_item = NULL; + if (do_profile) + { + profile_item = new profile_item_t(); + profile_item->skipped = 1; + profile_items.push_back(profile_item); + t1 = get_time(); + } + + job_t *j = this->job_create(); + job_set_flag(j, JOB_FOREGROUND, 1); + job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); + job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ + && (!is_subshell && !is_event)); + job_set_flag(j, JOB_SKIP_NOTIFICATION, is_subshell \ + || is_block \ + || is_event \ + || (!get_is_interactive())); + + current_block->job = j; + + /* Tell the job what its command is */ + j->set_command(job_node.get_source(ctx.src)); + + /* Construct process_t structures for every statement in the job */ + const parse_node_t *statement_node = ctx.tree.get_child(job_node, 0, symbol_statement); + assert(statement_node != NULL); + + /* Create the process (may fail!) */ + j->first_process = this->create_job_process(j, *statement_node, ctx); + if (j->first_process == NULL) + process_errored = true; + + /* Construct process_ts for job continuations (pipelines), by walking the list until we hit the terminal (empty) job continuationf */ + const parse_node_t *job_cont = ctx.tree.get_child(job_node, 1, symbol_job_continuation); + process_t *last_process = j->first_process; + while (! process_errored && job_cont != NULL && job_cont->child_count > 0) + { + assert(job_cont->type == symbol_job_continuation); + + /* Get the statement node and make a process from it */ + const parse_node_t *statement_node = ctx.tree.get_child(*job_cont, 1, symbol_statement); + assert(statement_node != NULL); + + /* Store the new process (and maybe with an error) */ + last_process->next = this->create_job_process(j, *statement_node, ctx); + if (last_process->next == NULL) + process_errored = true; + + /* Link the process and get the next continuation */ + last_process = last_process->next; + job_cont = ctx.tree.get_child(*job_cont, 2, symbol_job_continuation); + } + + bool skip = false; + if (this->parse_job(j->first_process, j, job_node, ctx) && j->first_process->get_argv()) + { + if (do_profile) + { + t2 = get_time(); + profile_item->cmd = j->command(); + profile_item->skipped=current_block->skip; + } + + /* If we're an ELSEIF, then we may want to unskip, if we're skipping because of an IF */ + if (job_get_flag(j, JOB_ELSEIF)) + { + bool skip_elseif = job_should_skip_elseif(j, current_block); + + /* Record that we're entering an elseif */ + if (! skip_elseif) + { + /* We must be an IF block here */ + assert(current_block->type() == IF); + static_cast(current_block)->is_elseif_entry = true; + } + + /* Record that in the block too. This is similar to what builtin_else does. */ + current_block->skip = skip_elseif; + } + + skip = skip || current_block->skip; + skip = skip || job_get_flag(j, JOB_WILDCARD_ERROR); + skip = skip || job_get_flag(j, JOB_SKIP); + + if (!skip) + { + int was_builtin = 0; + if (j->first_process->type==INTERNAL_BUILTIN && !j->first_process->next) + was_builtin = 1; + scoped_push tokenizer_pos_push(¤t_tokenizer_pos, job_begin_pos); + exec_job(*this, j); + + /* Only external commands require a new fishd barrier */ + if (!was_builtin) + set_proc_had_barrier(false); + } + else + { + this->skipped_exec(j); + } + + if (do_profile) + { + t3 = get_time(); + profile_item->level=eval_level; + profile_item->parse = (int)(t2-t1); + profile_item->exec=(int)(t3-t2); + } + + if (current_block->type() == WHILE) + { + while_block_t *wb = static_cast(current_block); + switch (wb->status) + { + case WHILE_TEST_FIRST: + { + // PCA I added the 'wb->skip ||' part because we couldn't reliably + // control-C out of loops like this: while test 1 -eq 1; end + wb->skip = wb->skip || proc_get_last_status()!= 0; + wb->status = WHILE_TESTED; + } + break; + } + } + + if (current_block->type() == IF) + { + if_block_t *ib = static_cast(current_block); + + if (ib->skip) + { + /* Nothing */ + } + else if (! ib->if_expr_evaluated) + { + /* Execute the IF */ + bool if_result = (proc_get_last_status() == 0); + ib->any_branch_taken = if_result; + + /* Don't execute if the expression failed */ + current_block->skip = ! if_result; + ib->if_expr_evaluated = true; + } + else if (ib->is_elseif_entry && ! ib->any_branch_taken) + { + /* Maybe mark an ELSEIF branch as taken */ + bool elseif_taken = (proc_get_last_status() == 0); + ib->any_branch_taken = elseif_taken; + current_block->skip = ! elseif_taken; + ib->is_elseif_entry = false; + } + } + + } + else + { + /* + This job could not be properly parsed. We free it + instead, and set the status to 1. This should be + rare, since most errors should be detected by the + ahead of time validator. + */ + job_free(j); + + proc_set_last_status(1); + } + current_block->job = 0; + break; +} +#endif + /** Evaluates a job from the specified tokenizer. First calls parse_job to parse the job and then calls exec to execute it. @@ -2575,12 +2877,144 @@ void parser_t::eval_job(tokenizer_t *tok) } +#if 0 +int parser_t::eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) +{ + parser_context_t mut_ctx; + mut_ctx.src = cmd_str; + + /* Parse the tree */ + if (! parse_t::parse(cmd_str, parse_flag_none, &mut_ctx.tree, NULL)) + { + return 1; + } + + /* Make a const version for safety's sake */ + const parser_context_t &ctx = mut_ctx; + + CHECK_BLOCK(1); + + /* Record the current chain so we can put it back later */ + scoped_push block_io_push(&block_io, io); + scoped_push forbidden_function_push(&forbidden_function); + const size_t forbid_count = forbidden_function.size(); + const block_t *start_current_block = current_block; + + /* Do some stuff I haven't figured out yet */ + job_reap(0); + + /* Only certain blocks are allowed */ + if ((block_type != TOP) && + (block_type != SUBST)) + { + debug(1, + INVALID_SCOPE_ERR_MSG, + parser_t::get_block_desc(block_type)); + bugreport(); + return 1; + } + + eval_level++; + + this->push_block(new scope_block_t(block_type)); + + error_code = 0; + + event_fire(NULL); + + /* Execute the top job list */ + assert(! ctx.tree.empty()); + const parse_node_t *job_list = &ctx.tree.at(0); + assert(job_list->type == symbol_job_list); + while (job_list != NULL) + { + // These correspond to the three productions of job_list + // Try pulling out a job + const parse_node_t *job = NULL; + switch (job_list->production_idx) + { + case 0: // empty + job_list = NULL; + break; + + case 1: //job, job_list + job = ctx.tree.get_child(*job_list, 0, symbol_job); + job_list = ctx.tree.get_child(*job_list, 1, symbol_job_list); + break; + + case 2: //blank line, job_list + job = NULL; + job_list = ctx.tree.get_child(*job_list, 1, symbol_job_list); + break; + + default: //if we get here, it means more productions have been added to job_list, which is bad + PARSER_DIE(); + } + + if (job != NULL) + { + this->eval_job(*job, ctx); + } + } + + parser_t::pop_block(); + + while (start_current_block != current_block) + { + if (current_block == 0) + { + debug(0, + _(L"End of block mismatch. Program terminating.")); + bugreport(); + FATAL_EXIT(); + break; + } + + if ((!error_code) && (!exit_status()) && (!proc_get_last_status())) + { + + //debug( 2, L"Status %d\n", proc_get_last_status() ); + + debug(1, + L"%ls", parser_t::get_block_desc(current_block->type())); + debug(1, + BLOCK_END_ERR_MSG); + fwprintf(stderr, L"%ls", parser_t::current_line()); + + const wcstring h = builtin_help_get(*this, L"end"); + if (h.size()) + fwprintf(stderr, L"%ls", h.c_str()); + break; + + } + parser_t::pop_block(); + } + + this->print_errors_stderr(); + + while (forbidden_function.size() > forbid_count) + parser_t::allow_function(); + + /* + Restore previous eval state + */ + eval_level--; + + int code=error_code; + error_code=0; + + job_reap(0); + + return code; +} +#endif + int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) { const wchar_t * const cmd = cmd_str.c_str(); size_t forbid_count; int code; - block_t *start_current_block = current_block; + const block_t *start_current_block = current_block; /* Record the current chain so we can put it back later */ scoped_push block_io_push(&block_io, io); diff --git a/parser.h b/parser.h index cbd59bf56..878b2b574 100644 --- a/parser.h +++ b/parser.h @@ -295,6 +295,12 @@ struct profile_item_t struct tokenizer_t; +struct parser_context_t +{ + parse_node_tree_t tree; + wcstring src; +}; + class parser_t { private: @@ -341,11 +347,15 @@ private: /* No copying allowed */ parser_t(const parser_t&); parser_t& operator=(const parser_t&); + + process_t *create_job_process(job_t *job, const parse_node_t &statement_node, const parser_context_t &ctx); + process_t *create_boolean_process(job_t *job, const parse_node_t &bool_statement, const parser_context_t &ctx); void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector&, bool); int parse_job(process_t *p, job_t *j, tokenizer_t *tok); void skipped_exec(job_t * j); void eval_job(tokenizer_t *tok); + void eval_job(const parse_node_t &job_node, const parser_context_t &ctx); int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset); void print_errors(wcstring &target, const wchar_t *prefix); void print_errors_stderr(); @@ -394,6 +404,7 @@ public: \return 0 on success, 1 otherwise */ int eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type); + int eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type); /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. diff --git a/proc.cpp b/proc.cpp index a774d3597..1405343eb 100644 --- a/proc.cpp +++ b/proc.cpp @@ -515,7 +515,7 @@ static void handle_child_status(pid_t pid, int status) process_t::process_t() : argv_array(), argv0_narrow(), - type(0), + type(), actual_cmd(), pid(0), pipe_write_fd(0), diff --git a/proc.h b/proc.h index a2062d7e3..a8f26a5e6 100644 --- a/proc.h +++ b/proc.h @@ -54,7 +54,7 @@ /** Types of processes */ -enum +enum process_type_t { /** A regular external command @@ -81,8 +81,7 @@ enum */ INTERNAL_BUFFER, -} -; +}; enum { @@ -151,7 +150,7 @@ public: INTERNAL_BUILTIN, \c INTERNAL_FUNCTION, \c INTERNAL_BLOCK, INTERNAL_EXEC, or INTERNAL_BUFFER */ - int type; + enum process_type_t type; /** Sets argv */ From ebc8bd6ff51762e7037a1e72feebc9ecdd0b0380 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 20 Dec 2013 17:45:49 -0800 Subject: [PATCH 066/108] Update the backtrace call to pass a block index instead of a block, fixing the build --- parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser.cpp b/parser.cpp index c93ae0713..31aff34f9 100644 --- a/parser.cpp +++ b/parser.cpp @@ -3380,7 +3380,7 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro output->append(err.describe(src, skip_caret)); output->push_back(L'\n'); - this->stack_trace(current_block, *output); + this->stack_trace(0, *output); } } From b6af3e51abd54963948d5523ce01459fadedebbf Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 23 Dec 2013 14:53:56 -0800 Subject: [PATCH 067/108] Very early work in adopting new parser for actual execution of commands. Not turned on yet. --- highlight.cpp | 2 +- parse_tree.cpp | 18 +- parse_tree.h | 6 +- parser.cpp | 576 ++++++++++++++++++++++++++++++++++--------------- parser.h | 40 ++-- tokenizer.cpp | 20 +- tokenizer.h | 7 +- 7 files changed, 461 insertions(+), 208 deletions(-) diff --git a/highlight.cpp b/highlight.cpp index 3c60150cd..e9923fb00 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1862,7 +1862,7 @@ void highlighter_t::color_redirection(const parse_node_t &redirection_node) if (redirection_primitive != NULL) { wcstring target; - const enum token_type redirect_type = this->parse_tree.type_for_redirection(redirection_node, this->buff, &target); + const enum token_type redirect_type = this->parse_tree.type_for_redirection(redirection_node, this->buff, NULL, &target); /* We may get a TOK_NONE redirection type, e.g. if the redirection is invalid */ this->color_node(*redirection_primitive, redirect_type == TOK_NONE ? HIGHLIGHT_ERROR : HIGHLIGHT_REDIRECTION); diff --git a/parse_tree.cpp b/parse_tree.cpp index 81b6c1633..d970200b0 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1083,6 +1083,20 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod return result; } +const parse_node_t &parse_node_tree_t::find_child(const parse_node_t &parent, parse_token_type_t type) const +{ + for (size_t i=0; i < parent.child_count; i++) + { + const parse_node_t *child = this->get_child(parent, i); + if (child->type == type) + { + return *child; + } + } + PARSE_ASSERT(0); + return *(parse_node_t *)(NULL); //unreachable +} + const parse_node_t *parse_node_tree_t::get_parent(const parse_node_t &node, parse_token_type_t expected_type) const { const parse_node_t *result = NULL; @@ -1277,7 +1291,7 @@ bool parse_node_tree_t::plain_statement_is_in_pipeline(const parse_node_t &node, return result; } -enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, wcstring *out_target) const +enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redirection_node, const wcstring &src, int *out_fd, wcstring *out_target) const { assert(redirection_node.type == symbol_redirection); enum token_type result = TOK_NONE; @@ -1286,7 +1300,7 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi if (redirection_primitive != NULL && redirection_primitive->has_source()) { - result = redirection_type_for_string(redirection_primitive->get_source(src)); + result = redirection_type_for_string(redirection_primitive->get_source(src), out_fd); } if (out_target != NULL) { diff --git a/parse_tree.h b/parse_tree.h index bdc622d93..87e3ecec8 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -166,6 +166,10 @@ public: */ const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; + /* Find the first direct child of the given node of the given type. asserts on failure + */ + const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const; + /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; @@ -197,7 +201,7 @@ public: bool plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ - enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, wcstring *out_target) const; + enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const; /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ const parse_node_t *header_node_for_block_statement(const parse_node_t &node); diff --git a/parser.cpp b/parser.cpp index 31aff34f9..2a34f0dc3 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1648,13 +1648,12 @@ void parser_t::parse_job_argument_list(process_t *p, } */ -#if 0 process_t *parser_t::create_boolean_process(job_t *job, const parse_node_t &bool_statement, const parser_context_t &ctx) { // Handle a boolean statement bool skip_job = false; assert(bool_statement.type == symbol_boolean_statement); - switch (specific_statement.production_idx) + switch (bool_statement.production_idx) { // These magic numbers correspond to productions for boolean_statement case 0: @@ -1689,15 +1688,280 @@ process_t *parser_t::create_boolean_process(job_t *job, const parse_node_t &bool return result; } +process_t *parser_t::create_for_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx) +{ + return NULL; +} + +process_t *parser_t::create_while_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx) +{ + return NULL; +} + +process_t *parser_t::create_begin_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx) +{ + return NULL; +} + +process_t *parser_t::create_plain_process(job_t *job, const parse_node_t &statement, const parser_context_t &ctx) +{ + /* Get the decoration */ + assert(statement.type == symbol_plain_statement); + + /* Get the command. We expect to always get it here. */ + wcstring cmd; + bool got_cmd = ctx.tree.command_for_plain_statement(statement, ctx.src, &cmd); + assert(got_cmd); + + /* Expand it as a command */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); + if (! expanded) + { + error(SYNTAX_ERROR, + statement.source_start, + ILLEGAL_CMD_ERR_MSG, + cmd.c_str()); + return 0; + } + + /* The list of arguments. The command is the first argument. TODO: count hack */ + const parse_node_t *unmatched_wildcard = NULL; + wcstring_list_t argument_list = this->determine_arguments(statement, &unmatched_wildcard, ctx); + argument_list.insert(argument_list.begin(), cmd); + + /* We were not able to expand any wildcards. Here is the first one that failed */ + if (unmatched_wildcard != NULL) + { + job_set_flag(job, JOB_WILDCARD_ERROR, 1); + proc_set_last_status(STATUS_UNMATCHED_WILDCARD); + error(EVAL_ERROR, unmatched_wildcard->source_start, WILDCARD_ERR_MSG, unmatched_wildcard->get_source(ctx.src).c_str()); + } + + /* The set of IO redirections that we construct for the process */ + const io_chain_t process_io_chain = this->determine_io_chain(statement, ctx); + + /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ + enum parse_statement_decoration_t decoration = ctx.tree.decoration_for_plain_statement(statement); + enum process_type_t process_type = EXTERNAL; + + /* exec hack */ + if (decoration != parse_statement_decoration_command && cmd == L"exec") + { + /* Either 'builtin exec' or just plain 'exec', and definitely not 'command exec'. Note we don't allow overriding exec with a function. */ + process_type = INTERNAL_EXEC; + } + else if (decoration == parse_statement_decoration_command) + { + /* Always a command */ + process_type = EXTERNAL; + } + else if (decoration == parse_statement_decoration_builtin) + { + /* What happens if this builtin is not valid? */ + process_type = INTERNAL_BUILTIN; + } + else if (function_exists(cmd)) + { + process_type = INTERNAL_FUNCTION; + } + else if (builtin_exists(cmd)) + { + process_type = INTERNAL_BUILTIN; + } + else + { + process_type = EXTERNAL; + } + + wcstring actual_cmd; + if (process_type == EXTERNAL) + { + /* Determine the actual command. Need to support implicit cd here */ + bool has_command = path_get_path(cmd, &actual_cmd); + + if (! has_command) + { + /* TODO: support fish_command_not_found, implicit cd, etc. here */ + } + + } + + /* Return the process */ + process_t *result = new process_t(); + result->type = process_type; + result->set_argv(argument_list); + result->set_io_chain(process_io_chain); + result->actual_cmd = actual_cmd; + return result; +} + +/* Determine the list of arguments, expanding stuff. If we have a wildcard and none could be expanded, return the unexpandable wildcard node by reference. */ +wcstring_list_t parser_t::determine_arguments(const parse_node_t &statement, const parse_node_t **out_unmatched_wildcard_node, const parser_context_t &ctx) +{ + wcstring_list_t argument_list; + + /* Whether we failed to match any wildcards, and succeeded in matching any wildcards */ + bool unmatched_wildcard = false, matched_wildcard = false; + + /* First node that failed to expand as a wildcard (if any) */ + const parse_node_t *unmatched_wildcard_node = NULL; + + /* Get all argument nodes underneath the statement */ + const parse_node_tree_t::parse_node_list_t argument_nodes = ctx.tree.find_nodes(statement, symbol_argument); + argument_list.reserve(argument_nodes.size()); + for (size_t i=0; i < argument_nodes.size(); i++) + { + const parse_node_t &arg_node = *argument_nodes.at(i); + + /* Expect all arguments to have source */ + assert(arg_node.has_source()); + const wcstring arg_str = arg_node.get_source(ctx.src); + + /* Expand this string */ + std::vector arg_expanded; + int expand_ret = expand_string(arg_str, arg_expanded, 0); + switch (expand_ret) + { + case EXPAND_ERROR: + { + error(SYNTAX_ERROR, + arg_node.source_start, + _(L"Could not expand string '%ls'"), + arg_str.c_str()); + break; + } + + case EXPAND_WILDCARD_NO_MATCH: + { + /* Store the node that failed to expand */ + unmatched_wildcard = true; + if (! unmatched_wildcard_node) + { + unmatched_wildcard_node = &arg_node; + } + break; + } + + case EXPAND_WILDCARD_MATCH: + { + matched_wildcard = true; + break; + } + + case EXPAND_OK: + { + break; + } + } + + /* Now copy over any expanded arguments */ + for (size_t i=0; i < arg_expanded.size(); i++) + { + argument_list.push_back(arg_expanded.at(i).completion); + } + } + + /* Return if we had a wildcard problem */ + if (unmatched_wildcard && ! matched_wildcard) + { + *out_unmatched_wildcard_node = unmatched_wildcard_node; + } + + return argument_list; +} + +io_chain_t parser_t::determine_io_chain(const parse_node_t &statement,const parser_context_t &ctx) +{ + io_chain_t result; + + /* Get all redirection nodes underneath the statement */ + const parse_node_tree_t::parse_node_list_t redirect_nodes = ctx.tree.find_nodes(statement, symbol_redirection); + for (size_t i=0; i < redirect_nodes.size(); i++) + { + const parse_node_t &redirect_node = *redirect_nodes.at(i); + + int source_fd = -1; /* source fd */ + wcstring target; /* file path or target fd */ + enum token_type redirect_type = ctx.tree.type_for_redirection(redirect_node, ctx.src, &source_fd, &target); + + /* PCA: I can't justify this EXPAND_SKIP_VARIABLES flag. It was like this when I got here. */ + bool target_expanded = expand_one(target, no_exec ? EXPAND_SKIP_VARIABLES : 0); + if (! target_expanded || target.empty()) + { + /* Should improve this error message */ + error(SYNTAX_ERROR, + redirect_node.source_start, + _(L"Invalid redirection target: %ls"), + target.c_str()); + } + + + /* Generate the actual IO redirection */ + shared_ptr new_io; + assert(redirect_type != TOK_NONE); + switch (redirect_type) + { + case TOK_REDIRECT_FD: + { + if (target == L"-") + { + new_io.reset(new io_close_t(source_fd)); + } + else + { + wchar_t *end = NULL; + errno = 0; + int old_fd = fish_wcstoi(target.c_str(), &end, 10); + if (old_fd < 0 || errno || *end) + { + error(SYNTAX_ERROR, + redirect_node.source_start, + _(L"Requested redirection to something that is not a file descriptor %ls"), + target.c_str()); + } + else + { + new_io.reset(new io_fd_t(source_fd, old_fd)); + } + } + break; + } + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_IN: + case TOK_REDIRECT_NOCLOB: + { + int oflags = oflags_for_redirection_type(redirect_type); + io_file_t *new_io_file = new io_file_t(source_fd, target, oflags); + new_io.reset(new_io_file); + break; + } + + default: + { + // Should be unreachable + fprintf(stderr, "Unexpected redirection type %ld. aborting.\n", (long)redirect_type); + PARSER_DIE(); + break; + } + } + + /* Append the new_io if we got one */ + if (new_io.get() != NULL) + { + result.push_back(new_io); + } + } + return result; +} + /* Returns a process_t allocated with new. It's the caller's responsibility to delete it (!) */ process_t *parser_t::create_job_process(job_t *job, const parse_node_t &statement_node, const parser_context_t &ctx) { assert(statement_node.type == symbol_statement); assert(statement_node.child_count == 1); - // We may skip this job entirely, e.g. with an 'and' statement - bool skip_job = false; - // Get the "specific statement" which is boolean / block / if / switch / decorated const parse_node_t &specific_statement = *ctx.tree.get_child(statement_node, 0); @@ -1732,7 +1996,7 @@ process_t *parser_t::create_job_process(job_t *job, const parse_node_t &statemen break; case symbol_begin_header: - + result = this->create_begin_process(job, specific_header, specific_statement, ctx); break; default: @@ -1740,17 +2004,22 @@ process_t *parser_t::create_job_process(job_t *job, const parse_node_t &statemen PARSER_DIE(); break; } + break; } + + case symbol_decorated_statement: + { + const parse_node_t &plain_statement = ctx.tree.find_child(specific_statement, symbol_plain_statement); + result = this->create_plain_process(job, plain_statement, ctx); + break; + } + + default: + fprintf(stderr, "'%ls' not handled by new parser yet\n", specific_statement.describe().c_str()); } - // expand_one command - // handle booleans (and, not, or) - // set INTERNAL_EXEC - // implicit CD - - return proc; + return result; } -#endif /** Fully parse a single job. Does not call exec on it, but any command substitutions in the job will be executed. @@ -2456,7 +2725,6 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc Evaluates a job from a node tree. */ -#if 0 void parser_t::eval_job(const parse_node_t &job_node, const parser_context_t &ctx) { assert(job_node.type == symbol_job); @@ -2499,7 +2767,7 @@ void parser_t::eval_job(const parse_node_t &job_node, const parser_context_t &ct || is_event \ || (!get_is_interactive())); - current_block->job = j; + current_block()->job = j; /* Tell the job what its command is */ j->set_command(job_node.get_source(ctx.src)); @@ -2533,124 +2801,7 @@ void parser_t::eval_job(const parse_node_t &job_node, const parser_context_t &ct last_process = last_process->next; job_cont = ctx.tree.get_child(*job_cont, 2, symbol_job_continuation); } - - bool skip = false; - if (this->parse_job(j->first_process, j, job_node, ctx) && j->first_process->get_argv()) - { - if (do_profile) - { - t2 = get_time(); - profile_item->cmd = j->command(); - profile_item->skipped=current_block->skip; - } - - /* If we're an ELSEIF, then we may want to unskip, if we're skipping because of an IF */ - if (job_get_flag(j, JOB_ELSEIF)) - { - bool skip_elseif = job_should_skip_elseif(j, current_block); - - /* Record that we're entering an elseif */ - if (! skip_elseif) - { - /* We must be an IF block here */ - assert(current_block->type() == IF); - static_cast(current_block)->is_elseif_entry = true; - } - - /* Record that in the block too. This is similar to what builtin_else does. */ - current_block->skip = skip_elseif; - } - - skip = skip || current_block->skip; - skip = skip || job_get_flag(j, JOB_WILDCARD_ERROR); - skip = skip || job_get_flag(j, JOB_SKIP); - - if (!skip) - { - int was_builtin = 0; - if (j->first_process->type==INTERNAL_BUILTIN && !j->first_process->next) - was_builtin = 1; - scoped_push tokenizer_pos_push(¤t_tokenizer_pos, job_begin_pos); - exec_job(*this, j); - - /* Only external commands require a new fishd barrier */ - if (!was_builtin) - set_proc_had_barrier(false); - } - else - { - this->skipped_exec(j); - } - - if (do_profile) - { - t3 = get_time(); - profile_item->level=eval_level; - profile_item->parse = (int)(t2-t1); - profile_item->exec=(int)(t3-t2); - } - - if (current_block->type() == WHILE) - { - while_block_t *wb = static_cast(current_block); - switch (wb->status) - { - case WHILE_TEST_FIRST: - { - // PCA I added the 'wb->skip ||' part because we couldn't reliably - // control-C out of loops like this: while test 1 -eq 1; end - wb->skip = wb->skip || proc_get_last_status()!= 0; - wb->status = WHILE_TESTED; - } - break; - } - } - - if (current_block->type() == IF) - { - if_block_t *ib = static_cast(current_block); - - if (ib->skip) - { - /* Nothing */ - } - else if (! ib->if_expr_evaluated) - { - /* Execute the IF */ - bool if_result = (proc_get_last_status() == 0); - ib->any_branch_taken = if_result; - - /* Don't execute if the expression failed */ - current_block->skip = ! if_result; - ib->if_expr_evaluated = true; - } - else if (ib->is_elseif_entry && ! ib->any_branch_taken) - { - /* Maybe mark an ELSEIF branch as taken */ - bool elseif_taken = (proc_get_last_status() == 0); - ib->any_branch_taken = elseif_taken; - current_block->skip = ! elseif_taken; - ib->is_elseif_entry = false; - } - } - - } - else - { - /* - This job could not be properly parsed. We free it - instead, and set the status to 1. This should be - rare, since most errors should be detected by the - ahead of time validator. - */ - job_free(j); - - proc_set_last_status(1); - } - current_block->job = 0; - break; } -#endif /** Evaluates a job from the specified tokenizer. First calls @@ -2889,57 +3040,85 @@ void parser_t::eval_job(tokenizer_t *tok) } job_reap(0); - } -#if 0 -int parser_t::eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) +static void push_all_children(std::vector *execution_stack, const parse_node_t &node) { - parser_context_t mut_ctx; - mut_ctx.src = cmd_str; - - /* Parse the tree */ - if (! parse_t::parse(cmd_str, parse_flag_none, &mut_ctx.tree, NULL)) + // push nodes in reverse order, so the first node ends up on top + unsigned child_idx = node.child_count; + while (child_idx--) { - return 1; + execution_stack->push_back(node.child_offset(child_idx)); } - - /* Make a const version for safety's sake */ - const parser_context_t &ctx = mut_ctx; +} - CHECK_BLOCK(1); +void parser_t::execute_next(std::vector *execution_stack, const parser_context_t &ctx) +{ + assert(execution_stack != NULL); + assert(! execution_stack->empty()); - /* Record the current chain so we can put it back later */ - scoped_push block_io_push(&block_io, io); - scoped_push forbidden_function_push(&forbidden_function); - const size_t forbid_count = forbidden_function.size(); - const block_t *start_current_block = current_block; + /* Get the offset of the next node and remove it from the stack */ + node_offset_t next_offset = execution_stack->back(); + execution_stack->pop_back(); - /* Do some stuff I haven't figured out yet */ - job_reap(0); + /* Get the node */ + assert(next_offset < ctx.tree.size()); + const parse_node_t &node = ctx.tree.at(next_offset); - /* Only certain blocks are allowed */ - if ((block_type != TOP) && - (block_type != SUBST)) + /* Do something with it */ + switch (node.type) { - debug(1, - INVALID_SCOPE_ERR_MSG, - parser_t::get_block_desc(block_type)); - bugreport(); - return 1; + case symbol_job_list: + // These correspond to the three productions of job_list + switch (node.production_idx) + { + case 0: // empty + break; + + case 1: //job, job_list + push_all_children(execution_stack, node); + break; + + case 2: //blank line, job_list + execution_stack->push_back(node.child_offset(1)); + break; + + default: //if we get here, it means more productions have been added to job_list, which is bad + PARSER_DIE(); + break; + } + break; + + case symbol_job: //statement, job_continuation + push_all_children(execution_stack, node); + break; + + case symbol_job_continuation: + switch (node.production_idx) + { + case 0: //empty + break; + + case 1: //pipe, statement, job_continuation + execution_stack->push_back(node.child_offset(2)); + execution_stack->push_back(node.child_offset(1)); + break; + + default: + PARSER_DIE(); + break; + } + break; + } +} - eval_level++; - - this->push_block(new scope_block_t(block_type)); - - error_code = 0; - - event_fire(NULL); - - /* Execute the top job list */ - assert(! ctx.tree.empty()); - const parse_node_t *job_list = &ctx.tree.at(0); +/* Executes the job list at the given node offset */ +void parser_t::execute_job_list(node_offset_t idx, const parser_context_t &ctx) +{ + assert(idx < ctx.tree.size()); + + const parse_node_t *job_list = &ctx.tree.at(idx); assert(job_list->type == symbol_job_list); while (job_list != NULL) { @@ -2971,12 +3150,60 @@ int parser_t::eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_ty this->eval_job(*job, ctx); } } +} + +int parser_t::eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) +{ + parser_context_t mut_ctx; + mut_ctx.src = cmd_str; + + /* Parse the tree */ + if (! parse_t::parse(cmd_str, parse_flag_none, &mut_ctx.tree, NULL)) + { + return 1; + } + + /* Make a const version for safety's sake */ + const parser_context_t &ctx = mut_ctx; + + CHECK_BLOCK(1); + + /* Record the current chain so we can put it back later */ + scoped_push block_io_push(&block_io, io); + scoped_push forbidden_function_push(&forbidden_function); + const size_t forbid_count = forbidden_function.size(); + const block_t * const start_current_block = this->current_block(); + + /* Do some stuff I haven't figured out yet */ + job_reap(0); + + /* Only certain blocks are allowed */ + if ((block_type != TOP) && + (block_type != SUBST)) + { + debug(1, + INVALID_SCOPE_ERR_MSG, + parser_t::get_block_desc(block_type)); + bugreport(); + return 1; + } + + eval_level++; + + this->push_block(new scope_block_t(block_type)); + + error_code = 0; + + event_fire(NULL); + + /* Execute the top level job list */ + execute_job_list(0, ctx); parser_t::pop_block(); - while (start_current_block != current_block) + while (start_current_block != this->current_block()) { - if (current_block == 0) + if (this->current_block() == NULL) { debug(0, _(L"End of block mismatch. Program terminating.")); @@ -2991,7 +3218,7 @@ int parser_t::eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_ty //debug( 2, L"Status %d\n", proc_get_last_status() ); debug(1, - L"%ls", parser_t::get_block_desc(current_block->type())); + L"%ls", parser_t::get_block_desc(current_block()->type())); debug(1, BLOCK_END_ERR_MSG); fwprintf(stderr, L"%ls", parser_t::current_line()); @@ -3022,7 +3249,6 @@ int parser_t::eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_ty return code; } -#endif int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) { diff --git a/parser.h b/parser.h index 8ee2fb82a..fab73acfd 100644 --- a/parser.h +++ b/parser.h @@ -96,37 +96,16 @@ public: bool had_command; /**< Set to non-zero once a command has been executed in this block */ int tok_pos; /**< The start index of the block */ - /** - Status for the current loop block. Can be any of the values from the loop_status enum. - */ + /** Status for the current loop block. Can be any of the values from the loop_status enum. */ int loop_status; - /** - The job that is currently evaluated in the specified block. - */ + /** The job that is currently evaluated in the specified block. */ job_t *job; -#if 0 - union - { - int while_state; /**< True if the loop condition has not yet been evaluated*/ - wchar_t *for_variable; /**< Name of the variable to loop over */ - int if_state; /**< The state of the if block, can be one of IF_STATE_UNTESTED, IF_STATE_FALSE, IF_STATE_TRUE */ - wchar_t *switch_value; /**< The value to test in a switch block */ - const wchar_t *source_dest; /**< The name of the file to source*/ - event_t *event; /**&, bool); int parse_job(process_t *p, job_t *j, tokenizer_t *tok); @@ -399,6 +386,9 @@ public: */ int eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type); int eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type); + + void execute_job_list(node_offset_t idx, const parser_context_t &ctx); + void execute_next(std::vector *execution_stack, const parser_context_t &ctx); /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. diff --git a/tokenizer.cpp b/tokenizer.cpp index 0705e620a..0b0032836 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -14,7 +14,7 @@ segments. #include #include #include - +#include #include "fallback.h" #include "util.h" @@ -522,7 +522,7 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type * return idx; } -enum token_type redirection_type_for_string(const wcstring &str) +enum token_type redirection_type_for_string(const wcstring &str, int *out_fd) { enum token_type mode = TOK_NONE; int fd = 0; @@ -530,9 +530,25 @@ enum token_type redirection_type_for_string(const wcstring &str) /* Redirections only, no pipes */ if (mode == TOK_PIPE || fd < 0) mode = TOK_NONE; + if (out_fd != NULL) + *out_fd = fd; return mode; } +int oflags_for_redirection_type(enum token_type type) +{ + switch (type) + { + case TOK_REDIRECT_APPEND: return O_CREAT | O_APPEND | O_WRONLY; + case TOK_REDIRECT_OUT: return O_CREAT | O_WRONLY | O_TRUNC; + case TOK_REDIRECT_NOCLOB: return O_CREAT | O_EXCL | O_WRONLY; + case TOK_REDIRECT_IN: return O_RDONLY; + + default: + return -1; + } +} + wchar_t tok_last_quote(tokenizer_t *tok) { CHECK(tok, 0); diff --git a/tokenizer.h b/tokenizer.h index 8e130f0e7..17b1bcb96 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -187,8 +187,11 @@ const wchar_t *tok_get_desc(int type); */ int tok_get_error(tokenizer_t *tok); -/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid */ -enum token_type redirection_type_for_string(const wcstring &str); +/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */ +enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL); + +/* Helper function to return oflags (as in open(2)) for a redirection type */ +int oflags_for_redirection_type(enum token_type type); enum move_word_style_t { From 5b1a53265233f837e272aa0936267e04c8848856 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 24 Dec 2013 13:17:24 -0800 Subject: [PATCH 068/108] Factor execution aspects from parser_t to parse_execution_context_t . Still searching for best way to use new parser for execution. --- fish.xcodeproj/project.pbxproj | 8 +- parse_execution.cpp | 601 +++++++++++++++++++++++++++++++++ parse_execution.h | 76 +++++ parse_productions.cpp | 4 +- parse_tree.h | 4 +- parser.cpp | 3 +- parser.h | 3 + 7 files changed, 693 insertions(+), 6 deletions(-) create mode 100644 parse_execution.cpp create mode 100644 parse_execution.h diff --git a/fish.xcodeproj/project.pbxproj b/fish.xcodeproj/project.pbxproj index aa7f9c18a..ceb694ee7 100644 --- a/fish.xcodeproj/project.pbxproj +++ b/fish.xcodeproj/project.pbxproj @@ -65,6 +65,7 @@ D033781115DC6D4C00A634BA /* completions in CopyFiles */ = {isa = PBXBuildFile; fileRef = D025C02715D1FEA100B9DB63 /* completions */; }; D033781215DC6D5200A634BA /* functions in CopyFiles */ = {isa = PBXBuildFile; fileRef = D025C02815D1FEA100B9DB63 /* functions */; }; D033781315DC6D5400A634BA /* tools in CopyFiles */ = {isa = PBXBuildFile; fileRef = D025C02915D1FEA100B9DB63 /* tools */; }; + D052D80B1868F7FC003ABCBD /* parse_execution.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D052D8091868F7FC003ABCBD /* parse_execution.cpp */; }; D07B247315BCC15700D4ADB4 /* add-shell in Resources */ = {isa = PBXBuildFile; fileRef = D07B247215BCC15700D4ADB4 /* add-shell */; }; D07B247615BCC4BE00D4ADB4 /* install.sh in Resources */ = {isa = PBXBuildFile; fileRef = D07B247515BCC4BE00D4ADB4 /* install.sh */; }; D07D266A15E33B86009E43F6 /* config.fish in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0C4FD9415A7D7EE00212EF1 /* config.fish */; }; @@ -386,6 +387,8 @@ D025C02915D1FEA100B9DB63 /* tools */ = {isa = PBXFileReference; lastKnownFileType = folder; name = tools; path = share/tools; sourceTree = ""; }; D031890915E36D9800D9CC39 /* base */ = {isa = PBXFileReference; lastKnownFileType = text; path = base; sourceTree = BUILT_PRODUCTS_DIR; }; D03EE83814DF88B200FC7150 /* lru.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = lru.h; sourceTree = ""; }; + D052D8091868F7FC003ABCBD /* parse_execution.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_execution.cpp; sourceTree = ""; }; + D052D80A1868F7FC003ABCBD /* parse_execution.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_execution.h; sourceTree = ""; }; D07B247215BCC15700D4ADB4 /* add-shell */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = "add-shell"; path = "build_tools/osx_package_scripts/add-shell"; sourceTree = ""; }; D07B247515BCC4BE00D4ADB4 /* install.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = install.sh; path = osx/install.sh; sourceTree = ""; }; D0879AC616BF9A1A00E98E56 /* fish_term_icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = fish_term_icon.icns; path = osx/fish_term_icon.icns; sourceTree = ""; }; @@ -658,11 +661,13 @@ D0A0853C13B3ACEE0099B651 /* exec.cpp */, D0A0850C13B3ACEE0099B651 /* expand.h */, D0A0853D13B3ACEE0099B651 /* expand.cpp */, + D0D9B2B318555D92001AE279 /* parse_constants.h */, D0FE8EE6179CA8A5008C9F21 /* parse_productions.h */, D0FE8EE7179FB75F008C9F21 /* parse_productions.cpp */, - D0D9B2B318555D92001AE279 /* parse_constants.h */, D0C52F361765284C00BFAB82 /* parse_tree.h */, D0C52F351765284C00BFAB82 /* parse_tree.cpp */, + D052D80A1868F7FC003ABCBD /* parse_execution.h */, + D052D8091868F7FC003ABCBD /* parse_execution.cpp */, D0A0850D13B3ACEE0099B651 /* fallback.h */, D0A0853E13B3ACEE0099B651 /* fallback.cpp */, D0A0850E13B3ACEE0099B651 /* function.h */, @@ -1222,6 +1227,7 @@ D0D02A83159839D5008E62BD /* iothread.cpp in Sources */, D0D02A84159839D5008E62BD /* parse_util.cpp in Sources */, D0D02A85159839D5008E62BD /* path.cpp in Sources */, + D052D80B1868F7FC003ABCBD /* parse_execution.cpp in Sources */, D0D02A86159839D5008E62BD /* postfork.cpp in Sources */, D0D02A87159839D5008E62BD /* screen.cpp in Sources */, D0D02A88159839D5008E62BD /* signal.cpp in Sources */, diff --git a/parse_execution.cpp b/parse_execution.cpp new file mode 100644 index 000000000..9bcce5bef --- /dev/null +++ b/parse_execution.cpp @@ -0,0 +1,601 @@ +/**\file parse_execution.cpp + + Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.). + +*/ + +#include "parse_execution.h" +#include "complete.h" +#include "builtin.h" +#include "parser.h" +#include "expand.h" +#include "wutil.h" +#include "path.h" + + +parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring s, parser_t *p) : tree(t), src(s), parser(p) +{ +} + +/* Utilities */ + +wcstring parse_execution_context_t::get_source(const parse_node_t &node) const +{ + return node.get_source(this->src); +} + +const parse_node_t *parse_execution_context_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) +{ + return this->tree.get_child(parent, which, expected_type); +} + +node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) const +{ + /* Pointer arithmetic, very hackish */ + const parse_node_t *addr = &node; + const parse_node_t *base = &this->tree.at(0); + assert(addr >= base); + node_offset_t offset = addr - base; + assert(offset < this->tree.size()); + return offset; +} + +/* Stack manipulation */ + +void parse_execution_context_t::stack_push(const parse_node_t *job_or_job_list, statement_completion_handler_t completion_handler, const parse_node_t *node) +{ + const struct parse_execution_stack_element_t elem = {job_or_job_list, completion_handler, node}; + job_stack.push_back(elem); +} + +process_t *parse_execution_context_t::create_for_process(job_t *job, const parse_node_t &header, const parse_node_t &statement) +{ + assert(header.type == symbol_for_header); + const wcstring for_variable = get_source(*get_child(header, 1, parse_token_type_string)); + const parse_node_t &arg_list = *get_child(header, 3, symbol_argument_list); + + for_block_t *fb = new for_block_t(for_variable); + fb->sequence = this->determine_arguments(arg_list, NULL); + fb->node_offset = this->get_offset(statement); + parser->push_block(fb); + return NULL; +} + +process_t *parse_execution_context_t::create_while_process(job_t *job, const parse_node_t &header, const parse_node_t &statement) +{ + assert(header.type == symbol_while_header); + while_block_t *wb = new while_block_t(); + wb->status = WHILE_TEST_FIRST; + wb->node_offset = this->get_offset(statement); + parser->push_block(wb); + return NULL; +} + +process_t *parse_execution_context_t::create_begin_process(job_t *job, const parse_node_t &header, const parse_node_t &statement) +{ + assert(header.type == symbol_begin_header); + scope_block_t *bb = new scope_block_t(BEGIN); + parser->push_block(bb); + return NULL; +} + +bool parse_execution_context_t::append_error(const parse_node_t &node, const wchar_t *fmt, ...) +{ + parse_error_t error; + error.source_start = node.source_start; + error.source_length = node.source_length; + error.code = parse_error_syntax; //hackish + + va_list va; + va_start(va, fmt); + error.text = vformat_string(fmt, va); + va_end(va); + + this->errors.push_back(error); + return true; +} + +process_t *parse_execution_context_t::create_plain_process(job_t *job, const parse_node_t &statement) +{ + /* Get the decoration */ + assert(statement.type == symbol_plain_statement); + + /* Get the command. We expect to always get it here. */ + wcstring cmd; + bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd); + assert(got_cmd); + + /* Expand it as a command */ + bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); + if (! expanded) + { + append_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); + return 0; + } + + /* The list of arguments. The command is the first argument. TODO: count hack */ + const parse_node_t *unmatched_wildcard = NULL; + wcstring_list_t argument_list = this->determine_arguments(statement, &unmatched_wildcard); + argument_list.insert(argument_list.begin(), cmd); + + /* We were not able to expand any wildcards. Here is the first one that failed */ + if (unmatched_wildcard != NULL) + { + job_set_flag(job, JOB_WILDCARD_ERROR, 1); + proc_set_last_status(STATUS_UNMATCHED_WILDCARD); + append_error(*unmatched_wildcard, WILDCARD_ERR_MSG, unmatched_wildcard->get_source(src).c_str()); + } + + /* The set of IO redirections that we construct for the process */ + const io_chain_t process_io_chain = this->determine_io_chain(statement); + + /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ + enum parse_statement_decoration_t decoration = tree.decoration_for_plain_statement(statement); + enum process_type_t process_type = EXTERNAL; + + /* exec hack */ + if (decoration != parse_statement_decoration_command && cmd == L"exec") + { + /* Either 'builtin exec' or just plain 'exec', and definitely not 'command exec'. Note we don't allow overriding exec with a function. */ + process_type = INTERNAL_EXEC; + } + else if (decoration == parse_statement_decoration_command) + { + /* Always a command */ + process_type = EXTERNAL; + } + else if (decoration == parse_statement_decoration_builtin) + { + /* What happens if this builtin is not valid? */ + process_type = INTERNAL_BUILTIN; + } + else if (function_exists(cmd)) + { + process_type = INTERNAL_FUNCTION; + } + else if (builtin_exists(cmd)) + { + process_type = INTERNAL_BUILTIN; + } + else + { + process_type = EXTERNAL; + } + + wcstring actual_cmd; + if (process_type == EXTERNAL) + { + /* Determine the actual command. Need to support implicit cd here */ + bool has_command = path_get_path(cmd, &actual_cmd); + + if (! has_command) + { + /* TODO: support fish_command_not_found, implicit cd, etc. here */ + } + + } + + /* Return the process */ + process_t *result = new process_t(); + result->type = process_type; + result->set_argv(argument_list); + result->set_io_chain(process_io_chain); + result->actual_cmd = actual_cmd; + return result; +} + +/* Determine the list of arguments, expanding stuff. If we have a wildcard and none could be expanded, return the unexpandable wildcard node by reference. */ +wcstring_list_t parse_execution_context_t::determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node) +{ + wcstring_list_t argument_list; + + /* Whether we failed to match any wildcards, and succeeded in matching any wildcards */ + bool unmatched_wildcard = false, matched_wildcard = false; + + /* First node that failed to expand as a wildcard (if any) */ + const parse_node_t *unmatched_wildcard_node = NULL; + + /* Get all argument nodes underneath the statement */ + const parse_node_tree_t::parse_node_list_t argument_nodes = tree.find_nodes(parent, symbol_argument); + argument_list.reserve(argument_nodes.size()); + for (size_t i=0; i < argument_nodes.size(); i++) + { + const parse_node_t &arg_node = *argument_nodes.at(i); + + /* Expect all arguments to have source */ + assert(arg_node.has_source()); + const wcstring arg_str = arg_node.get_source(src); + + /* Expand this string */ + std::vector arg_expanded; + int expand_ret = expand_string(arg_str, arg_expanded, 0); + switch (expand_ret) + { + case EXPAND_ERROR: + { + this->append_error(arg_node, + _(L"Could not expand string '%ls'"), + arg_str.c_str()); + break; + } + + case EXPAND_WILDCARD_NO_MATCH: + { + /* Store the node that failed to expand */ + unmatched_wildcard = true; + if (! unmatched_wildcard_node) + { + unmatched_wildcard_node = &arg_node; + } + break; + } + + case EXPAND_WILDCARD_MATCH: + { + matched_wildcard = true; + break; + } + + case EXPAND_OK: + { + break; + } + } + + /* Now copy over any expanded arguments */ + for (size_t i=0; i < arg_expanded.size(); i++) + { + argument_list.push_back(arg_expanded.at(i).completion); + } + } + + /* Return if we had a wildcard problem */ + if (unmatched_wildcard && ! matched_wildcard) + { + *out_unmatched_wildcard_node = unmatched_wildcard_node; + } + + return argument_list; +} + +io_chain_t parse_execution_context_t::determine_io_chain(const parse_node_t &statement) +{ + io_chain_t result; + + /* Get all redirection nodes underneath the statement */ + const parse_node_tree_t::parse_node_list_t redirect_nodes = tree.find_nodes(statement, symbol_redirection); + for (size_t i=0; i < redirect_nodes.size(); i++) + { + const parse_node_t &redirect_node = *redirect_nodes.at(i); + + int source_fd = -1; /* source fd */ + wcstring target; /* file path or target fd */ + enum token_type redirect_type = tree.type_for_redirection(redirect_node, src, &source_fd, &target); + + /* PCA: I can't justify this EXPAND_SKIP_VARIABLES flag. It was like this when I got here. */ + bool target_expanded = expand_one(target, no_exec ? EXPAND_SKIP_VARIABLES : 0); + if (! target_expanded || target.empty()) + { + /* Should improve this error message */ + this->append_error(redirect_node, + _(L"Invalid redirection target: %ls"), + target.c_str()); + } + + + /* Generate the actual IO redirection */ + shared_ptr new_io; + assert(redirect_type != TOK_NONE); + switch (redirect_type) + { + case TOK_REDIRECT_FD: + { + if (target == L"-") + { + new_io.reset(new io_close_t(source_fd)); + } + else + { + wchar_t *end = NULL; + errno = 0; + int old_fd = fish_wcstoi(target.c_str(), &end, 10); + if (old_fd < 0 || errno || *end) + { + this->append_error(redirect_node, + _(L"Requested redirection to something that is not a file descriptor %ls"), + target.c_str()); + } + else + { + new_io.reset(new io_fd_t(source_fd, old_fd)); + } + } + break; + } + + case TOK_REDIRECT_OUT: + case TOK_REDIRECT_APPEND: + case TOK_REDIRECT_IN: + case TOK_REDIRECT_NOCLOB: + { + int oflags = oflags_for_redirection_type(redirect_type); + io_file_t *new_io_file = new io_file_t(source_fd, target, oflags); + new_io.reset(new_io_file); + break; + } + + default: + { + // Should be unreachable + fprintf(stderr, "Unexpected redirection type %ld. aborting.\n", (long)redirect_type); + PARSER_DIE(); + break; + } + } + + /* Append the new_io if we got one */ + if (new_io.get() != NULL) + { + result.push_back(new_io); + } + } + return result; +} + +process_t *parse_execution_context_t::create_boolean_process(job_t *job, const parse_node_t &bool_statement) +{ + // Handle a boolean statement + bool skip_job = false; + assert(bool_statement.type == symbol_boolean_statement); + switch (bool_statement.production_idx) + { + // These magic numbers correspond to productions for boolean_statement + case 0: + // AND. Skip if the last job failed. + skip_job = (proc_get_last_status() != 0); + break; + + case 1: + // OR. Skip if the last job succeeded. + skip_job = (proc_get_last_status() == 0); + break; + + case 2: + // NOT. Negate it. + job_set_flag(job, JOB_NEGATE, !job_get_flag(job, JOB_NEGATE)); + break; + + default: + { + fprintf(stderr, "Unexpected production in boolean statement\n"); + PARSER_DIE(); + break; + } + } + + process_t *result = NULL; + if (! skip_job) + { + const parse_node_t &subject = *tree.get_child(bool_statement, 1, symbol_statement); + result = this->create_job_process(job, subject); + } + return result; +} + + +/* Returns a process_t allocated with new. It's the caller's responsibility to delete it (!) */ +process_t *parse_execution_context_t::create_job_process(job_t *job, const parse_node_t &statement_node) +{ + assert(statement_node.type == symbol_statement); + assert(statement_node.child_count == 1); + + // Get the "specific statement" which is boolean / block / if / switch / decorated + const parse_node_t &specific_statement = *get_child(statement_node, 0); + + process_t *result = NULL; + + switch (specific_statement.type) + { + case symbol_boolean_statement: + { + result = this->create_boolean_process(job, specific_statement); + break; + } + + case symbol_block_statement: + { + const parse_node_t &header = *get_child(specific_statement, 0, symbol_block_header); + const parse_node_t &specific_header = *get_child(header, 0); + switch (specific_header.type) + { + case symbol_for_header: + result = this->create_for_process(job, specific_header, specific_statement); + break; + + case symbol_while_header: + result = this->create_while_process(job, specific_header, specific_statement); + break; + + case symbol_function_header: + // No process is associated with creating a function + // TODO: create the darn function! + result = NULL; + break; + + case symbol_begin_header: + result = this->create_begin_process(job, specific_header, specific_statement); + break; + + default: + fprintf(stderr, "Unexpected header type\n"); + PARSER_DIE(); + break; + } + break; + } + + case symbol_decorated_statement: + { + const parse_node_t &plain_statement = tree.find_child(specific_statement, symbol_plain_statement); + result = this->create_plain_process(job, plain_statement); + break; + } + + default: + fprintf(stderr, "'%ls' not handled by new parser yet\n", specific_statement.describe().c_str()); + } + + return result; +} + + +void parse_execution_context_t::eval_job(job_t *j, const parse_node_t &job_node) +{ + assert(job_node.type == symbol_job); + + /* Track whether we had an error */ + bool process_errored = false; + + /* Tell the job what its command is */ + j->set_command(get_source(job_node)); + + /* We are going ot construct process_t structures for every statement in the job. Get the first statement. */ + const parse_node_t *statement_node = get_child(job_node, 0, symbol_statement); + assert(statement_node != NULL); + + /* Create the process (may fail!) */ + j->first_process = this->create_job_process(j, *statement_node); + if (j->first_process == NULL) + process_errored = true; + + /* Construct process_ts for job continuations (pipelines), by walking the list until we hit the terminal (empty) job continuationf */ + const parse_node_t *job_cont = get_child(job_node, 1, symbol_job_continuation); + process_t *last_process = j->first_process; + while (! process_errored && job_cont != NULL && job_cont->child_count > 0) + { + assert(job_cont->type == symbol_job_continuation); + + /* Get the statement node and make a process from it */ + const parse_node_t *statement_node = get_child(*job_cont, 1, symbol_statement); + assert(statement_node != NULL); + + /* Store the new process (and maybe with an error) */ + last_process->next = this->create_job_process(j, *statement_node); + if (last_process->next == NULL) + process_errored = true; + + /* Link the process and get the next continuation */ + last_process = last_process->next; + job_cont = get_child(*job_cont, 2, symbol_job_continuation); + } +} + +void parse_execution_context_t::eval_1_job(const parse_node_t &job_node) +{ + // Get terminal modes + struct termios tmodes = {}; + if (get_is_interactive()) + { + if (tcgetattr(STDIN_FILENO, &tmodes)) + { + // need real error handling here + wperror(L"tcgetattr"); + return; + } + } + + /* Profiling support */ + long long t1 = 0, t2 = 0, t3 = 0; + const bool do_profile = profile; + profile_item_t *profile_item = NULL; + if (do_profile) + { + profile_item = new profile_item_t(); + profile_item->skipped = 1; + profile_items.push_back(profile_item); + t1 = get_time(); + } + + job_t *j = parser->job_create(); + job_set_flag(j, JOB_FOREGROUND, 1); + job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); + job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ + && (!is_subshell && !is_event)); + job_set_flag(j, JOB_SKIP_NOTIFICATION, is_subshell \ + || is_block \ + || is_event \ + || (!get_is_interactive())); + + parser->current_block()->job = j; + + this->eval_job(j, job_node); + +} + +void parse_execution_context_t::eval_next_stack_elem() +{ + // Pop the next thing to do + assert(! job_stack.empty()); + const parse_execution_stack_element_t elem = job_stack.back(); + job_stack.pop_back(); + + assert(elem.job_or_job_list->type == symbol_job || elem.job_or_job_list->type == symbol_job_list); + + if (elem.job_or_job_list->type == symbol_job) + { + const parse_node_t *job = elem.job_or_job_list; + this->eval_1_job(*job); + } + else + { + const parse_node_t *job_list = elem.job_or_job_list; + while (job_list != NULL) + { + assert(job_list->type == symbol_job_list); + + // These correspond to the three productions of job_list + // Try pulling out a job + const parse_node_t *job = NULL; + switch (job_list->production_idx) + { + case 0: // empty + job_list = NULL; + break; + + case 1: //job, job_list + job = get_child(*job_list, 0, symbol_job); + job_list = get_child(*job_list, 1, symbol_job_list); + break; + + case 2: //blank line, job_list + job = NULL; + job_list = get_child(*job_list, 1, symbol_job_list); + break; + + default: //if we get here, it means more productions have been added to job_list, which is bad + PARSER_DIE(); + } + + if (job != NULL) + { + this->eval_1_job(*job); + } + } + } + + /* Invoke any completion handler */ + if (elem.completion_handler) + { + assert(elem.node != NULL); + (this->*elem.completion_handler)(*elem.node); + } +} + +void parse_execution_context_t::eval_job_list(const parse_node_t &job_node) +{ + this->stack_push(&job_node, NULL, NULL); + while (! job_stack.empty()) + { + this->eval_next_stack_elem(); + } +} diff --git a/parse_execution.h b/parse_execution.h new file mode 100644 index 000000000..6495f9441 --- /dev/null +++ b/parse_execution.h @@ -0,0 +1,76 @@ +/**\file parse_execution.h + + Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.). +*/ + +#ifndef FISH_PARSE_EXECUTION_H +#define FISH_PARSE_EXECUTION_H + +#include "config.h" +#include "util.h" +#include "parse_tree.h" +#include "proc.h" + +class job_t; +struct profile_item_t; + +class parse_execution_context_t +{ + private: + const parse_node_tree_t tree; + const wcstring src; + parser_t * const parser; + parse_error_list_t errors; + + std::vector profile_items; + + /* We maintain a stack of job lists to be executed, and something to do after the execution is finished. This is a pointer to member function that takes a node, a status, and the statement that was executed */ + typedef void (parse_execution_context_t::*statement_completion_handler_t)(const parse_node_t &node); + + struct parse_execution_stack_element_t + { + // These point into our tree, which is immutable + const parse_node_t *job_or_job_list; + statement_completion_handler_t completion_handler; + const parse_node_t *node; + }; + std::vector job_stack; + + void stack_push(const parse_node_t *job_or_job_list, statement_completion_handler_t completion_handler, const parse_node_t *node); + + /* No copying allowed */ + parse_execution_context_t(const parse_execution_context_t&); + parse_execution_context_t& operator=(const parse_execution_context_t&); + + /* Report an error. Always returns true. */ + bool append_error(const parse_node_t &node, const wchar_t *fmt, ...); + + wcstring get_source(const parse_node_t &node) const; + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid); + + node_offset_t get_offset(const parse_node_t &node) const; + + process_t *create_job_process(job_t *job, const parse_node_t &statement_node); + process_t *create_boolean_process(job_t *job, const parse_node_t &bool_statement); + process_t *create_for_process(job_t *job, const parse_node_t &header, const parse_node_t &statement); + process_t *create_while_process(job_t *job, const parse_node_t &header, const parse_node_t &statement); + process_t *create_begin_process(job_t *job, const parse_node_t &header, const parse_node_t &statement); + process_t *create_plain_process(job_t *job, const parse_node_t &statement); + + wcstring_list_t determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node); + io_chain_t determine_io_chain(const parse_node_t &statement); + + void eval_1_job(const parse_node_t &job_node); + void eval_job(job_t *j, const parse_node_t &job_node); + + void eval_next_stack_elem(); + + public: + parse_execution_context_t(const parse_node_tree_t &t, const wcstring s, parser_t *p); + + void eval_job_list(const parse_node_t &job_node); + +}; + + +#endif diff --git a/parse_productions.cpp b/parse_productions.cpp index f79e945ae..3325f50dd 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -302,13 +302,13 @@ RESOLVE(block_header) PRODUCTIONS(for_header) = { - {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_arguments_or_redirections_list} + {KEYWORD(parse_keyword_for), parse_token_type_string, KEYWORD(parse_keyword_in), symbol_argument_list} }; RESOLVE_ONLY(for_header) PRODUCTIONS(while_header) = { - {KEYWORD(parse_keyword_while), symbol_statement} + {KEYWORD(parse_keyword_while), symbol_job} }; RESOLVE_ONLY(while_header) diff --git a/parse_tree.h b/parse_tree.h index 87e3ecec8..9a5d7c238 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -241,8 +241,8 @@ public: block_statement = block_header job_list end_command arguments_or_redirections_list block_header = for_header | while_header | function_header | begin_header - for_header = FOR var_name IN arguments_or_redirections_list - while_header = WHILE statement + for_header = FOR var_name IN argument_list + while_header = WHILE job begin_header = BEGIN function_header = FUNCTION function_name argument_list diff --git a/parser.cpp b/parser.cpp index 2a34f0dc3..821d86c29 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2772,7 +2772,7 @@ void parser_t::eval_job(const parse_node_t &job_node, const parser_context_t &ct /* Tell the job what its command is */ j->set_command(job_node.get_source(ctx.src)); - /* Construct process_t structures for every statement in the job */ + /* We are going to construct process_t structures for every statement in the job. Get the first statement. */ const parse_node_t *statement_node = ctx.tree.get_child(job_node, 0, symbol_statement); assert(statement_node != NULL); @@ -3616,6 +3616,7 @@ block_t::block_t(block_type_t t) : skip(), had_command(), tok_pos(), + node_offset(NODE_OFFSET_INVALID), loop_status(), job(), src_filename(), diff --git a/parser.h b/parser.h index fab73acfd..cdb4b5902 100644 --- a/parser.h +++ b/parser.h @@ -95,6 +95,8 @@ public: bool skip; /**< Whether execution of the commands in this block should be skipped */ bool had_command; /**< Set to non-zero once a command has been executed in this block */ int tok_pos; /**< The start index of the block */ + + node_offset_t node_offset; /* Offset of the node */ /** Status for the current loop block. Can be any of the values from the loop_status enum. */ int loop_status; @@ -277,6 +279,7 @@ struct parser_context_t class parser_t { + friend class parse_execution_context_t; private: enum parser_type_t parser_type; From 924b8cbe24023985d7bafcec7b79303e2e7cc2e4 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 26 Dec 2013 12:24:00 -0800 Subject: [PATCH 069/108] New ideas about how to use new parser for execution. Beginnings of implementation. --- parse_execution.cpp | 234 ++++++++++++++++++++++---------------------- parse_execution.h | 29 ++---- parser.cpp | 8 ++ parser.h | 3 + proc.cpp | 1 + proc.h | 5 +- 6 files changed, 143 insertions(+), 137 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index 9bcce5bef..6af7f7e85 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -13,7 +13,7 @@ #include "path.h" -parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring s, parser_t *p) : tree(t), src(s), parser(p) +parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring s, parser_t *p) : tree(t), src(s), parser(p), eval_level(0) { } @@ -31,53 +31,47 @@ const parse_node_t *parse_execution_context_t::get_child(const parse_node_t &par node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) const { - /* Pointer arithmetic, very hackish */ + /* Get the offset of a node via pointer arithmetic, very hackish */ const parse_node_t *addr = &node; const parse_node_t *base = &this->tree.at(0); assert(addr >= base); node_offset_t offset = addr - base; assert(offset < this->tree.size()); + assert(&tree.at(offset) == &node); return offset; } -/* Stack manipulation */ -void parse_execution_context_t::stack_push(const parse_node_t *job_or_job_list, statement_completion_handler_t completion_handler, const parse_node_t *node) +bool parse_execution_context_t::should_cancel() const { - const struct parse_execution_stack_element_t elem = {job_or_job_list, completion_handler, node}; - job_stack.push_back(elem); + return false; } -process_t *parse_execution_context_t::create_for_process(job_t *job, const parse_node_t &header, const parse_node_t &statement) -{ - assert(header.type == symbol_for_header); - const wcstring for_variable = get_source(*get_child(header, 1, parse_token_type_string)); - const parse_node_t &arg_list = *get_child(header, 3, symbol_argument_list); - - for_block_t *fb = new for_block_t(for_variable); - fb->sequence = this->determine_arguments(arg_list, NULL); - fb->node_offset = this->get_offset(statement); - parser->push_block(fb); - return NULL; -} - -process_t *parse_execution_context_t::create_while_process(job_t *job, const parse_node_t &header, const parse_node_t &statement) +void parse_execution_context_t::run_while_process(const parse_node_t &header, const parse_node_t &statement) { assert(header.type == symbol_while_header); + assert(statement.type == symbol_block_statement); + while_block_t *wb = new while_block_t(); wb->status = WHILE_TEST_FIRST; wb->node_offset = this->get_offset(statement); parser->push_block(wb); - return NULL; + + // The condition of the while loop, as a job + const parse_node_t &while_condition = *get_child(header, 1, symbol_job); + + // The contents of the while loop, as a job list + const parse_node_t &block_contents = *get_child(statement, 2, symbol_job_list); + + // A while loop is a while loop! + while (! this->should_cancel() && this->run_1_job(while_condition) == EXIT_SUCCESS) + { + this->run_job_list(block_contents); + } + + parser->pop_block(wb); } -process_t *parse_execution_context_t::create_begin_process(job_t *job, const parse_node_t &header, const parse_node_t &statement) -{ - assert(header.type == symbol_begin_header); - scope_block_t *bb = new scope_block_t(BEGIN); - parser->push_block(bb); - return NULL; -} bool parse_execution_context_t::append_error(const parse_node_t &node, const wchar_t *fmt, ...) { @@ -382,6 +376,16 @@ process_t *parse_execution_context_t::create_boolean_process(job_t *job, const p return result; } +process_t *parse_execution_context_t::create_block_process(job_t *job, const parse_node_t &statement_node) +{ + /* We handle block statements by creating INTERNAL_BLOCKs, that will bounce back to us when it's time to execute them */ + assert(statement_node.type == symbol_block_statement || statement_node.type == symbol_if_statement || statement_node.type == symbol_switch_statement); + process_t *result = new process_t(); + result->type = INTERNAL_BLOCK; + result->internal_block_node = this->get_offset(statement_node); + return result; +} + /* Returns a process_t allocated with new. It's the caller's responsibility to delete it (!) */ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse_node_t &statement_node) @@ -403,34 +407,10 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse } case symbol_block_statement: + case symbol_if_statement: + case symbol_switch_statement: { - const parse_node_t &header = *get_child(specific_statement, 0, symbol_block_header); - const parse_node_t &specific_header = *get_child(header, 0); - switch (specific_header.type) - { - case symbol_for_header: - result = this->create_for_process(job, specific_header, specific_statement); - break; - - case symbol_while_header: - result = this->create_while_process(job, specific_header, specific_statement); - break; - - case symbol_function_header: - // No process is associated with creating a function - // TODO: create the darn function! - result = NULL; - break; - - case symbol_begin_header: - result = this->create_begin_process(job, specific_header, specific_statement); - break; - - default: - fprintf(stderr, "Unexpected header type\n"); - PARSER_DIE(); - break; - } + result = this->create_block_process(job, specific_statement); break; } @@ -443,13 +423,15 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse default: fprintf(stderr, "'%ls' not handled by new parser yet\n", specific_statement.describe().c_str()); + PARSER_DIE(); + break; } return result; } -void parse_execution_context_t::eval_job(job_t *j, const parse_node_t &job_node) +void parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse_node_t &job_node) { assert(job_node.type == symbol_job); @@ -459,7 +441,7 @@ void parse_execution_context_t::eval_job(job_t *j, const parse_node_t &job_node) /* Tell the job what its command is */ j->set_command(get_source(job_node)); - /* We are going ot construct process_t structures for every statement in the job. Get the first statement. */ + /* We are going to construct process_t structures for every statement in the job. Get the first statement. */ const parse_node_t *statement_node = get_child(job_node, 0, symbol_statement); assert(statement_node != NULL); @@ -468,7 +450,7 @@ void parse_execution_context_t::eval_job(job_t *j, const parse_node_t &job_node) if (j->first_process == NULL) process_errored = true; - /* Construct process_ts for job continuations (pipelines), by walking the list until we hit the terminal (empty) job continuationf */ + /* Construct process_ts for job continuations (pipelines), by walking the list until we hit the terminal (empty) job continuation */ const parse_node_t *job_cont = get_child(job_node, 1, symbol_job_continuation); process_t *last_process = j->first_process; while (! process_errored && job_cont != NULL && job_cont->child_count > 0) @@ -490,7 +472,7 @@ void parse_execution_context_t::eval_job(job_t *j, const parse_node_t &job_node) } } -void parse_execution_context_t::eval_1_job(const parse_node_t &job_node) +int parse_execution_context_t::run_1_job(const parse_node_t &job_node) { // Get terminal modes struct termios tmodes = {}; @@ -500,12 +482,17 @@ void parse_execution_context_t::eval_1_job(const parse_node_t &job_node) { // need real error handling here wperror(L"tcgetattr"); - return; + return EXIT_FAILURE; } } + /* Increment the eval_level for the duration of this command */ + scoped_push saved_eval_level(&eval_level, eval_level + 1); + + /* TODO: blocks-without-redirections optimization */ + /* Profiling support */ - long long t1 = 0, t2 = 0, t3 = 0; + long long start_time = 0, parse_time = 0, exec_time = 0; const bool do_profile = profile; profile_item_t *profile_item = NULL; if (do_profile) @@ -513,7 +500,7 @@ void parse_execution_context_t::eval_1_job(const parse_node_t &job_node) profile_item = new profile_item_t(); profile_item->skipped = 1; profile_items.push_back(profile_item); - t1 = get_time(); + start_time = get_time(); } job_t *j = parser->job_create(); @@ -528,74 +515,87 @@ void parse_execution_context_t::eval_1_job(const parse_node_t &job_node) parser->current_block()->job = j; - this->eval_job(j, job_node); + this->populate_job_from_job_node(j, job_node); -} - -void parse_execution_context_t::eval_next_stack_elem() -{ - // Pop the next thing to do - assert(! job_stack.empty()); - const parse_execution_stack_element_t elem = job_stack.back(); - job_stack.pop_back(); - - assert(elem.job_or_job_list->type == symbol_job || elem.job_or_job_list->type == symbol_job_list); - - if (elem.job_or_job_list->type == symbol_job) + if (do_profile) { - const parse_node_t *job = elem.job_or_job_list; - this->eval_1_job(*job); + parse_time = get_time(); + profile_item->cmd = j->command(); + profile_item->skipped=parser->current_block()->skip; } - else + + /* Check to see if this contained any external commands */ + bool job_contained_external_command = false; + for (const process_t *proc = j->first_process; proc != NULL; proc = proc->next) { - const parse_node_t *job_list = elem.job_or_job_list; - while (job_list != NULL) + if (proc->type == EXTERNAL) { - assert(job_list->type == symbol_job_list); - - // These correspond to the three productions of job_list - // Try pulling out a job - const parse_node_t *job = NULL; - switch (job_list->production_idx) - { - case 0: // empty - job_list = NULL; - break; - - case 1: //job, job_list - job = get_child(*job_list, 0, symbol_job); - job_list = get_child(*job_list, 1, symbol_job_list); - break; - - case 2: //blank line, job_list - job = NULL; - job_list = get_child(*job_list, 1, symbol_job_list); - break; - - default: //if we get here, it means more productions have been added to job_list, which is bad - PARSER_DIE(); - } - - if (job != NULL) - { - this->eval_1_job(*job); - } + job_contained_external_command = true; + break; } } - /* Invoke any completion handler */ - if (elem.completion_handler) + /* Only external commands require a new fishd barrier */ + if (!job_contained_external_command) + set_proc_had_barrier(false); + + /* Need support for skipped_exec here */ + + if (do_profile) { - assert(elem.node != NULL); - (this->*elem.completion_handler)(*elem.node); + exec_time = get_time(); + profile_item->level=eval_level; + profile_item->parse = (int)(parse_time-start_time); + profile_item->exec=(int)(exec_time-parse_time); } + + job_reap(0); + + return proc_get_last_status(); } -void parse_execution_context_t::eval_job_list(const parse_node_t &job_node) +void parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) { - this->stack_push(&job_node, NULL, NULL); - while (! job_stack.empty()) + assert(job_list_node.type == symbol_job_list); + + const parse_node_t *job_list = &job_list_node; + while (job_list != NULL) { - this->eval_next_stack_elem(); + assert(job_list->type == symbol_job_list); + + // These correspond to the three productions of job_list + // Try pulling out a job + const parse_node_t *job = NULL; + switch (job_list->production_idx) + { + case 0: // empty + job_list = NULL; + break; + + case 1: //job, job_list + job = get_child(*job_list, 0, symbol_job); + job_list = get_child(*job_list, 1, symbol_job_list); + break; + + case 2: //blank line, job_list + job = NULL; + job_list = get_child(*job_list, 1, symbol_job_list); + break; + + default: //if we get here, it means more productions have been added to job_list, which is bad + PARSER_DIE(); + } + + if (job != NULL) + { + this->run_1_job(*job); + } } + +} + + +void parse_execution_context_t::eval_job_list(const parse_node_t &job_list_node) +{ + this->run_job_list(job_list_node); } diff --git a/parse_execution.h b/parse_execution.h index 6495f9441..8977e7e36 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -22,26 +22,16 @@ class parse_execution_context_t parser_t * const parser; parse_error_list_t errors; + int eval_level; std::vector profile_items; - /* We maintain a stack of job lists to be executed, and something to do after the execution is finished. This is a pointer to member function that takes a node, a status, and the statement that was executed */ - typedef void (parse_execution_context_t::*statement_completion_handler_t)(const parse_node_t &node); - - struct parse_execution_stack_element_t - { - // These point into our tree, which is immutable - const parse_node_t *job_or_job_list; - statement_completion_handler_t completion_handler; - const parse_node_t *node; - }; - std::vector job_stack; - - void stack_push(const parse_node_t *job_or_job_list, statement_completion_handler_t completion_handler, const parse_node_t *node); - /* No copying allowed */ parse_execution_context_t(const parse_execution_context_t&); parse_execution_context_t& operator=(const parse_execution_context_t&); + /* Should I cancel */ + bool should_cancel() const; + /* Report an error. Always returns true. */ bool append_error(const parse_node_t &node, const wchar_t *fmt, ...); @@ -52,16 +42,17 @@ class parse_execution_context_t process_t *create_job_process(job_t *job, const parse_node_t &statement_node); process_t *create_boolean_process(job_t *job, const parse_node_t &bool_statement); - process_t *create_for_process(job_t *job, const parse_node_t &header, const parse_node_t &statement); - process_t *create_while_process(job_t *job, const parse_node_t &header, const parse_node_t &statement); - process_t *create_begin_process(job_t *job, const parse_node_t &header, const parse_node_t &statement); process_t *create_plain_process(job_t *job, const parse_node_t &statement); + process_t *create_block_process(job_t *job, const parse_node_t &statement_node); + + void run_while_process(const parse_node_t &header, const parse_node_t &statement); wcstring_list_t determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node); io_chain_t determine_io_chain(const parse_node_t &statement); - void eval_1_job(const parse_node_t &job_node); - void eval_job(job_t *j, const parse_node_t &job_node); + int run_1_job(const parse_node_t &job_node); + void run_job_list(const parse_node_t &job_list_node); + void populate_job_from_job_node(job_t *j, const parse_node_t &job_node); void eval_next_stack_elem(); diff --git a/parser.cpp b/parser.cpp index 821d86c29..ad31cce07 100644 --- a/parser.cpp +++ b/parser.cpp @@ -427,6 +427,12 @@ void parser_t::pop_block() delete old; } +void parser_t::pop_block(const block_t *expected) +{ + assert(expected == this->current_block()); + this->pop_block(); +} + const wchar_t *parser_t::get_block_desc(int block) const { for (size_t i=0; block_lookup[i].desc; i++) @@ -2908,7 +2914,9 @@ void parser_t::eval_job(tokenizer_t *tok) { int was_builtin = 0; if (j->first_process->type==INTERNAL_BUILTIN && !j->first_process->next) + { was_builtin = 1; + } scoped_push tokenizer_pos_push(¤t_tokenizer_pos, job_begin_pos); exec_job(*this, j); diff --git a/parser.h b/parser.h index cdb4b5902..13adfc71b 100644 --- a/parser.h +++ b/parser.h @@ -466,6 +466,9 @@ public: /** Remove the outermost block namespace */ void pop_block(); + + /** Remove the outermost block, asserting it's the given one */ + void pop_block(const block_t *b); /** Return a description of the given blocktype */ const wchar_t *get_block_desc(int block) const; diff --git a/proc.cpp b/proc.cpp index 1405343eb..f6a20eb2d 100644 --- a/proc.cpp +++ b/proc.cpp @@ -516,6 +516,7 @@ process_t::process_t() : argv_array(), argv0_narrow(), type(), + internal_block_node(NODE_OFFSET_INVALID), actual_cmd(), pid(0), pipe_write_fd(0), diff --git a/proc.h b/proc.h index a8f26a5e6..0a2949ca2 100644 --- a/proc.h +++ b/proc.h @@ -20,6 +20,7 @@ #include "util.h" #include "io.h" #include "common.h" +#include "parse_tree.h" /** The status code use when a command was not found @@ -151,7 +152,9 @@ public: INTERNAL_EXEC, or INTERNAL_BUFFER */ enum process_type_t type; - + + /* For internal block processes only, the node offset of the block */ + node_offset_t internal_block_node; /** Sets argv */ void set_argv(const wcstring_list_t &argv) From 562946d055080ad52b8c13b9c9f6a02e7b8c029b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 26 Dec 2013 12:55:10 -0800 Subject: [PATCH 070/108] Cleanup thrashing around in parser_t. New parser execution lives in parse_execution.cpp --- parse_execution.cpp | 107 ++++--- parse_execution.h | 10 +- parser.cpp | 673 -------------------------------------------- parser.h | 21 -- 4 files changed, 81 insertions(+), 730 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index 6af7f7e85..60c0d2627 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -10,6 +10,7 @@ #include "parser.h" #include "expand.h" #include "wutil.h" +#include "exec.h" #include "path.h" @@ -24,7 +25,7 @@ wcstring parse_execution_context_t::get_source(const parse_node_t &node) const return node.get_source(this->src); } -const parse_node_t *parse_execution_context_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) +const parse_node_t *parse_execution_context_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const { return this->tree.get_child(parent, which, expected_type); } @@ -91,6 +92,8 @@ bool parse_execution_context_t::append_error(const parse_node_t &node, const wch process_t *parse_execution_context_t::create_plain_process(job_t *job, const parse_node_t &statement) { + bool errored = false; + /* Get the decoration */ assert(statement.type == symbol_plain_statement); @@ -99,29 +102,37 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd); assert(got_cmd); - /* Expand it as a command */ + /* Expand it as a command. Return NULL on failure. */ bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); if (! expanded) { - append_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); - return 0; + errored = append_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); } + if (errored) + return NULL; + /* The list of arguments. The command is the first argument. TODO: count hack */ const parse_node_t *unmatched_wildcard = NULL; wcstring_list_t argument_list = this->determine_arguments(statement, &unmatched_wildcard); argument_list.insert(argument_list.begin(), cmd); - /* We were not able to expand any wildcards. Here is the first one that failed */ + /* If we were not able to expand any wildcards, here is the first one that failed */ if (unmatched_wildcard != NULL) { job_set_flag(job, JOB_WILDCARD_ERROR, 1); proc_set_last_status(STATUS_UNMATCHED_WILDCARD); - append_error(*unmatched_wildcard, WILDCARD_ERR_MSG, unmatched_wildcard->get_source(src).c_str()); + errored = append_error(*unmatched_wildcard, WILDCARD_ERR_MSG, unmatched_wildcard->get_source(src).c_str()); } + if (errored) + return NULL; + /* The set of IO redirections that we construct for the process */ - const io_chain_t process_io_chain = this->determine_io_chain(statement); + io_chain_t process_io_chain; + errored = ! this->determine_io_chain(statement, &process_io_chain); + if (errored) + return NULL; /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ enum parse_statement_decoration_t decoration = tree.decoration_for_plain_statement(statement); @@ -165,16 +176,21 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par if (! has_command) { /* TODO: support fish_command_not_found, implicit cd, etc. here */ + errored = true; } - + return NULL; } - /* Return the process */ - process_t *result = new process_t(); - result->type = process_type; - result->set_argv(argument_list); - result->set_io_chain(process_io_chain); - result->actual_cmd = actual_cmd; + /* Return the process, or NULL on error */ + process_t *result = NULL; + if (! errored) + { + result = new process_t(); + result->type = process_type; + result->set_argv(argument_list); + result->set_io_chain(process_io_chain); + result->actual_cmd = actual_cmd; + } return result; } @@ -252,9 +268,10 @@ wcstring_list_t parse_execution_context_t::determine_arguments(const parse_node_ return argument_list; } -io_chain_t parse_execution_context_t::determine_io_chain(const parse_node_t &statement) +bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain) { io_chain_t result; + bool errored = false; /* Get all redirection nodes underneath the statement */ const parse_node_tree_t::parse_node_list_t redirect_nodes = tree.find_nodes(statement, symbol_redirection); @@ -271,7 +288,7 @@ io_chain_t parse_execution_context_t::determine_io_chain(const parse_node_t &sta if (! target_expanded || target.empty()) { /* Should improve this error message */ - this->append_error(redirect_node, + errored = append_error(redirect_node, _(L"Invalid redirection target: %ls"), target.c_str()); } @@ -295,7 +312,7 @@ io_chain_t parse_execution_context_t::determine_io_chain(const parse_node_t &sta int old_fd = fish_wcstoi(target.c_str(), &end, 10); if (old_fd < 0 || errno || *end) { - this->append_error(redirect_node, + errored = append_error(redirect_node, _(L"Requested redirection to something that is not a file descriptor %ls"), target.c_str()); } @@ -333,7 +350,12 @@ io_chain_t parse_execution_context_t::determine_io_chain(const parse_node_t &sta result.push_back(new_io); } } - return result; + + if (out_chain && ! errored) + { + std::swap(*out_chain, result); + } + return ! errored; } process_t *parse_execution_context_t::create_boolean_process(job_t *job, const parse_node_t &bool_statement) @@ -416,6 +438,7 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse case symbol_decorated_statement: { + /* Get the plain statement. It will pull out the decoration itself */ const parse_node_t &plain_statement = tree.find_child(specific_statement, symbol_plain_statement); result = this->create_plain_process(job, plain_statement); break; @@ -431,7 +454,7 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse } -void parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse_node_t &job_node) +bool parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse_node_t &job_node) { assert(job_node.type == symbol_job); @@ -470,6 +493,9 @@ void parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse last_process = last_process->next; job_cont = get_child(*job_cont, 2, symbol_job_continuation); } + + /* Return success */ + return ! process_errored; } int parse_execution_context_t::run_1_job(const parse_node_t &job_node) @@ -515,8 +541,10 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) parser->current_block()->job = j; - this->populate_job_from_job_node(j, job_node); + /* Populate the job. This may fail for reasons like command_not_found */ + bool process_errored = ! this->populate_job_from_job_node(j, job_node); + /* Store time it took to 'parse' the command */ if (do_profile) { parse_time = get_time(); @@ -524,34 +552,49 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) profile_item->skipped=parser->current_block()->skip; } - /* Check to see if this contained any external commands */ - bool job_contained_external_command = false; - for (const process_t *proc = j->first_process; proc != NULL; proc = proc->next) + if (! process_errored) { - if (proc->type == EXTERNAL) + /* Check to see if this contained any external commands */ + bool job_contained_external_command = false; + for (const process_t *proc = j->first_process; proc != NULL; proc = proc->next) { - job_contained_external_command = true; - break; + if (proc->type == EXTERNAL) + { + job_contained_external_command = true; + break; + } + } + + /* Actually execute the job */ + exec_job(*this->parser, j); + + /* Only external commands require a new fishd barrier */ + if (!job_contained_external_command) + { + set_proc_had_barrier(false); } } - - /* Only external commands require a new fishd barrier */ - if (!job_contained_external_command) - set_proc_had_barrier(false); /* Need support for skipped_exec here */ - if (do_profile) { exec_time = get_time(); profile_item->level=eval_level; profile_item->parse = (int)(parse_time-start_time); profile_item->exec=(int)(exec_time-parse_time); + profile_item->skipped = process_errored; } + + /* Set the last status to 1 if the job could not be executed */ + if (process_errored) + proc_set_last_status(1); + const int ret = proc_get_last_status(); + /* Clean up jobs. Do this after we've determined the return value, since this may trigger event handlers */ job_reap(0); - return proc_get_last_status(); + /* All done */ + return ret; } void parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) diff --git a/parse_execution.h b/parse_execution.h index 8977e7e36..f6c7c1207 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -35,9 +35,9 @@ class parse_execution_context_t /* Report an error. Always returns true. */ bool append_error(const parse_node_t &node, const wchar_t *fmt, ...); + /* Utilities */ wcstring get_source(const parse_node_t &node) const; - const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid); - + const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; node_offset_t get_offset(const parse_node_t &node) const; process_t *create_job_process(job_t *job, const parse_node_t &statement_node); @@ -48,11 +48,13 @@ class parse_execution_context_t void run_while_process(const parse_node_t &header, const parse_node_t &statement); wcstring_list_t determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node); - io_chain_t determine_io_chain(const parse_node_t &statement); + + /* Determines the IO chain. Returns true on success, false on error */ + bool determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain); int run_1_job(const parse_node_t &job_node); void run_job_list(const parse_node_t &job_list_node); - void populate_job_from_job_node(job_t *j, const parse_node_t &job_node); + bool populate_job_from_job_node(job_t *j, const parse_node_t &job_node); void eval_next_stack_elem(); diff --git a/parser.cpp b/parser.cpp index ad31cce07..adcd22087 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1643,390 +1643,6 @@ void parser_t::parse_job_argument_list(process_t *p, p->set_io_chain(process_io_chain); } -/* - static void print_block_stack( block_t *b ) - { - if( !b ) - return; - print_block_stack( b->outer ); - - debug( 0, L"Block type %ls, skip: %d", parser_get_block_desc( b->type ), b->skip ); - } -*/ - -process_t *parser_t::create_boolean_process(job_t *job, const parse_node_t &bool_statement, const parser_context_t &ctx) -{ - // Handle a boolean statement - bool skip_job = false; - assert(bool_statement.type == symbol_boolean_statement); - switch (bool_statement.production_idx) - { - // These magic numbers correspond to productions for boolean_statement - case 0: - // AND. Skip if the last job failed. - skip_job = (proc_get_last_status() != 0); - break; - - case 1: - // OR. Skip if the last job succeeded. - skip_job = (proc_get_last_status() == 0); - break; - - case 2: - // NOT. Negate it. - job_set_flag(job, JOB_NEGATE, !job_get_flag(job, JOB_NEGATE)); - break; - - default: - { - fprintf(stderr, "Unexpected production in boolean statement\n"); - PARSER_DIE(); - break; - } - } - - process_t *result = NULL; - if (! skip_job) - { - const parse_node_t &subject = *ctx.tree.get_child(bool_statement, 1, symbol_statement); - result = this->create_job_process(job, subject, ctx); - } - return result; -} - -process_t *parser_t::create_for_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx) -{ - return NULL; -} - -process_t *parser_t::create_while_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx) -{ - return NULL; -} - -process_t *parser_t::create_begin_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx) -{ - return NULL; -} - -process_t *parser_t::create_plain_process(job_t *job, const parse_node_t &statement, const parser_context_t &ctx) -{ - /* Get the decoration */ - assert(statement.type == symbol_plain_statement); - - /* Get the command. We expect to always get it here. */ - wcstring cmd; - bool got_cmd = ctx.tree.command_for_plain_statement(statement, ctx.src, &cmd); - assert(got_cmd); - - /* Expand it as a command */ - bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); - if (! expanded) - { - error(SYNTAX_ERROR, - statement.source_start, - ILLEGAL_CMD_ERR_MSG, - cmd.c_str()); - return 0; - } - - /* The list of arguments. The command is the first argument. TODO: count hack */ - const parse_node_t *unmatched_wildcard = NULL; - wcstring_list_t argument_list = this->determine_arguments(statement, &unmatched_wildcard, ctx); - argument_list.insert(argument_list.begin(), cmd); - - /* We were not able to expand any wildcards. Here is the first one that failed */ - if (unmatched_wildcard != NULL) - { - job_set_flag(job, JOB_WILDCARD_ERROR, 1); - proc_set_last_status(STATUS_UNMATCHED_WILDCARD); - error(EVAL_ERROR, unmatched_wildcard->source_start, WILDCARD_ERR_MSG, unmatched_wildcard->get_source(ctx.src).c_str()); - } - - /* The set of IO redirections that we construct for the process */ - const io_chain_t process_io_chain = this->determine_io_chain(statement, ctx); - - /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ - enum parse_statement_decoration_t decoration = ctx.tree.decoration_for_plain_statement(statement); - enum process_type_t process_type = EXTERNAL; - - /* exec hack */ - if (decoration != parse_statement_decoration_command && cmd == L"exec") - { - /* Either 'builtin exec' or just plain 'exec', and definitely not 'command exec'. Note we don't allow overriding exec with a function. */ - process_type = INTERNAL_EXEC; - } - else if (decoration == parse_statement_decoration_command) - { - /* Always a command */ - process_type = EXTERNAL; - } - else if (decoration == parse_statement_decoration_builtin) - { - /* What happens if this builtin is not valid? */ - process_type = INTERNAL_BUILTIN; - } - else if (function_exists(cmd)) - { - process_type = INTERNAL_FUNCTION; - } - else if (builtin_exists(cmd)) - { - process_type = INTERNAL_BUILTIN; - } - else - { - process_type = EXTERNAL; - } - - wcstring actual_cmd; - if (process_type == EXTERNAL) - { - /* Determine the actual command. Need to support implicit cd here */ - bool has_command = path_get_path(cmd, &actual_cmd); - - if (! has_command) - { - /* TODO: support fish_command_not_found, implicit cd, etc. here */ - } - - } - - /* Return the process */ - process_t *result = new process_t(); - result->type = process_type; - result->set_argv(argument_list); - result->set_io_chain(process_io_chain); - result->actual_cmd = actual_cmd; - return result; -} - -/* Determine the list of arguments, expanding stuff. If we have a wildcard and none could be expanded, return the unexpandable wildcard node by reference. */ -wcstring_list_t parser_t::determine_arguments(const parse_node_t &statement, const parse_node_t **out_unmatched_wildcard_node, const parser_context_t &ctx) -{ - wcstring_list_t argument_list; - - /* Whether we failed to match any wildcards, and succeeded in matching any wildcards */ - bool unmatched_wildcard = false, matched_wildcard = false; - - /* First node that failed to expand as a wildcard (if any) */ - const parse_node_t *unmatched_wildcard_node = NULL; - - /* Get all argument nodes underneath the statement */ - const parse_node_tree_t::parse_node_list_t argument_nodes = ctx.tree.find_nodes(statement, symbol_argument); - argument_list.reserve(argument_nodes.size()); - for (size_t i=0; i < argument_nodes.size(); i++) - { - const parse_node_t &arg_node = *argument_nodes.at(i); - - /* Expect all arguments to have source */ - assert(arg_node.has_source()); - const wcstring arg_str = arg_node.get_source(ctx.src); - - /* Expand this string */ - std::vector arg_expanded; - int expand_ret = expand_string(arg_str, arg_expanded, 0); - switch (expand_ret) - { - case EXPAND_ERROR: - { - error(SYNTAX_ERROR, - arg_node.source_start, - _(L"Could not expand string '%ls'"), - arg_str.c_str()); - break; - } - - case EXPAND_WILDCARD_NO_MATCH: - { - /* Store the node that failed to expand */ - unmatched_wildcard = true; - if (! unmatched_wildcard_node) - { - unmatched_wildcard_node = &arg_node; - } - break; - } - - case EXPAND_WILDCARD_MATCH: - { - matched_wildcard = true; - break; - } - - case EXPAND_OK: - { - break; - } - } - - /* Now copy over any expanded arguments */ - for (size_t i=0; i < arg_expanded.size(); i++) - { - argument_list.push_back(arg_expanded.at(i).completion); - } - } - - /* Return if we had a wildcard problem */ - if (unmatched_wildcard && ! matched_wildcard) - { - *out_unmatched_wildcard_node = unmatched_wildcard_node; - } - - return argument_list; -} - -io_chain_t parser_t::determine_io_chain(const parse_node_t &statement,const parser_context_t &ctx) -{ - io_chain_t result; - - /* Get all redirection nodes underneath the statement */ - const parse_node_tree_t::parse_node_list_t redirect_nodes = ctx.tree.find_nodes(statement, symbol_redirection); - for (size_t i=0; i < redirect_nodes.size(); i++) - { - const parse_node_t &redirect_node = *redirect_nodes.at(i); - - int source_fd = -1; /* source fd */ - wcstring target; /* file path or target fd */ - enum token_type redirect_type = ctx.tree.type_for_redirection(redirect_node, ctx.src, &source_fd, &target); - - /* PCA: I can't justify this EXPAND_SKIP_VARIABLES flag. It was like this when I got here. */ - bool target_expanded = expand_one(target, no_exec ? EXPAND_SKIP_VARIABLES : 0); - if (! target_expanded || target.empty()) - { - /* Should improve this error message */ - error(SYNTAX_ERROR, - redirect_node.source_start, - _(L"Invalid redirection target: %ls"), - target.c_str()); - } - - - /* Generate the actual IO redirection */ - shared_ptr new_io; - assert(redirect_type != TOK_NONE); - switch (redirect_type) - { - case TOK_REDIRECT_FD: - { - if (target == L"-") - { - new_io.reset(new io_close_t(source_fd)); - } - else - { - wchar_t *end = NULL; - errno = 0; - int old_fd = fish_wcstoi(target.c_str(), &end, 10); - if (old_fd < 0 || errno || *end) - { - error(SYNTAX_ERROR, - redirect_node.source_start, - _(L"Requested redirection to something that is not a file descriptor %ls"), - target.c_str()); - } - else - { - new_io.reset(new io_fd_t(source_fd, old_fd)); - } - } - break; - } - - case TOK_REDIRECT_OUT: - case TOK_REDIRECT_APPEND: - case TOK_REDIRECT_IN: - case TOK_REDIRECT_NOCLOB: - { - int oflags = oflags_for_redirection_type(redirect_type); - io_file_t *new_io_file = new io_file_t(source_fd, target, oflags); - new_io.reset(new_io_file); - break; - } - - default: - { - // Should be unreachable - fprintf(stderr, "Unexpected redirection type %ld. aborting.\n", (long)redirect_type); - PARSER_DIE(); - break; - } - } - - /* Append the new_io if we got one */ - if (new_io.get() != NULL) - { - result.push_back(new_io); - } - } - return result; -} - -/* Returns a process_t allocated with new. It's the caller's responsibility to delete it (!) */ -process_t *parser_t::create_job_process(job_t *job, const parse_node_t &statement_node, const parser_context_t &ctx) -{ - assert(statement_node.type == symbol_statement); - assert(statement_node.child_count == 1); - - // Get the "specific statement" which is boolean / block / if / switch / decorated - const parse_node_t &specific_statement = *ctx.tree.get_child(statement_node, 0); - - process_t *result = NULL; - - switch (specific_statement.type) - { - case symbol_boolean_statement: - { - result = this->create_boolean_process(job, specific_statement, ctx); - break; - } - - case symbol_block_statement: - { - const parse_node_t &header = *ctx.tree.get_child(specific_statement, 0, symbol_block_header); - const parse_node_t &specific_header = *ctx.tree.get_child(header, 0); - switch (specific_header.type) - { - case symbol_for_header: - result = this->create_for_process(job, specific_header, specific_statement, ctx); - break; - - case symbol_while_header: - result = this->create_while_process(job, specific_header, specific_statement, ctx); - break; - - case symbol_function_header: - // No process is associated with creating a function - // TODO: create the darn function! - result = NULL; - break; - - case symbol_begin_header: - result = this->create_begin_process(job, specific_header, specific_statement, ctx); - break; - - default: - fprintf(stderr, "Unexpected header type\n"); - PARSER_DIE(); - break; - } - break; - } - - case symbol_decorated_statement: - { - const parse_node_t &plain_statement = ctx.tree.find_child(specific_statement, symbol_plain_statement); - result = this->create_plain_process(job, plain_statement, ctx); - break; - } - - default: - fprintf(stderr, "'%ls' not handled by new parser yet\n", specific_statement.describe().c_str()); - } - - return result; -} - /** Fully parse a single job. Does not call exec on it, but any command substitutions in the job will be executed. @@ -2727,88 +2343,6 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc } } -/** - Evaluates a job from a node tree. -*/ - -void parser_t::eval_job(const parse_node_t &job_node, const parser_context_t &ctx) -{ - assert(job_node.type == symbol_job); - this->job_start_pos = (int)job_node.source_start; - - // Get terminal modes - struct termios tmodes = {}; - if (get_is_interactive()) - { - if (tcgetattr(STDIN_FILENO, &tmodes)) - { - // need real error handling here - wperror(L"tcgetattr"); - return; - } - } - - /* Track whether we had an error */ - bool process_errored = false; - - /* Profiling support */ - long long t1 = 0, t2 = 0, t3 = 0; - const bool do_profile = profile; - profile_item_t *profile_item = NULL; - if (do_profile) - { - profile_item = new profile_item_t(); - profile_item->skipped = 1; - profile_items.push_back(profile_item); - t1 = get_time(); - } - - job_t *j = this->job_create(); - job_set_flag(j, JOB_FOREGROUND, 1); - job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); - job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ - && (!is_subshell && !is_event)); - job_set_flag(j, JOB_SKIP_NOTIFICATION, is_subshell \ - || is_block \ - || is_event \ - || (!get_is_interactive())); - - current_block()->job = j; - - /* Tell the job what its command is */ - j->set_command(job_node.get_source(ctx.src)); - - /* We are going to construct process_t structures for every statement in the job. Get the first statement. */ - const parse_node_t *statement_node = ctx.tree.get_child(job_node, 0, symbol_statement); - assert(statement_node != NULL); - - /* Create the process (may fail!) */ - j->first_process = this->create_job_process(j, *statement_node, ctx); - if (j->first_process == NULL) - process_errored = true; - - /* Construct process_ts for job continuations (pipelines), by walking the list until we hit the terminal (empty) job continuationf */ - const parse_node_t *job_cont = ctx.tree.get_child(job_node, 1, symbol_job_continuation); - process_t *last_process = j->first_process; - while (! process_errored && job_cont != NULL && job_cont->child_count > 0) - { - assert(job_cont->type == symbol_job_continuation); - - /* Get the statement node and make a process from it */ - const parse_node_t *statement_node = ctx.tree.get_child(*job_cont, 1, symbol_statement); - assert(statement_node != NULL); - - /* Store the new process (and maybe with an error) */ - last_process->next = this->create_job_process(j, *statement_node, ctx); - if (last_process->next == NULL) - process_errored = true; - - /* Link the process and get the next continuation */ - last_process = last_process->next; - job_cont = ctx.tree.get_child(*job_cont, 2, symbol_job_continuation); - } -} - /** Evaluates a job from the specified tokenizer. First calls parse_job to parse the job and then calls exec to execute it. @@ -3050,213 +2584,6 @@ void parser_t::eval_job(tokenizer_t *tok) job_reap(0); } -static void push_all_children(std::vector *execution_stack, const parse_node_t &node) -{ - // push nodes in reverse order, so the first node ends up on top - unsigned child_idx = node.child_count; - while (child_idx--) - { - execution_stack->push_back(node.child_offset(child_idx)); - } -} - -void parser_t::execute_next(std::vector *execution_stack, const parser_context_t &ctx) -{ - assert(execution_stack != NULL); - assert(! execution_stack->empty()); - - /* Get the offset of the next node and remove it from the stack */ - node_offset_t next_offset = execution_stack->back(); - execution_stack->pop_back(); - - /* Get the node */ - assert(next_offset < ctx.tree.size()); - const parse_node_t &node = ctx.tree.at(next_offset); - - /* Do something with it */ - switch (node.type) - { - case symbol_job_list: - // These correspond to the three productions of job_list - switch (node.production_idx) - { - case 0: // empty - break; - - case 1: //job, job_list - push_all_children(execution_stack, node); - break; - - case 2: //blank line, job_list - execution_stack->push_back(node.child_offset(1)); - break; - - default: //if we get here, it means more productions have been added to job_list, which is bad - PARSER_DIE(); - break; - } - break; - - case symbol_job: //statement, job_continuation - push_all_children(execution_stack, node); - break; - - case symbol_job_continuation: - switch (node.production_idx) - { - case 0: //empty - break; - - case 1: //pipe, statement, job_continuation - execution_stack->push_back(node.child_offset(2)); - execution_stack->push_back(node.child_offset(1)); - break; - - default: - PARSER_DIE(); - break; - } - break; - - } -} - -/* Executes the job list at the given node offset */ -void parser_t::execute_job_list(node_offset_t idx, const parser_context_t &ctx) -{ - assert(idx < ctx.tree.size()); - - const parse_node_t *job_list = &ctx.tree.at(idx); - assert(job_list->type == symbol_job_list); - while (job_list != NULL) - { - // These correspond to the three productions of job_list - // Try pulling out a job - const parse_node_t *job = NULL; - switch (job_list->production_idx) - { - case 0: // empty - job_list = NULL; - break; - - case 1: //job, job_list - job = ctx.tree.get_child(*job_list, 0, symbol_job); - job_list = ctx.tree.get_child(*job_list, 1, symbol_job_list); - break; - - case 2: //blank line, job_list - job = NULL; - job_list = ctx.tree.get_child(*job_list, 1, symbol_job_list); - break; - - default: //if we get here, it means more productions have been added to job_list, which is bad - PARSER_DIE(); - } - - if (job != NULL) - { - this->eval_job(*job, ctx); - } - } -} - -int parser_t::eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) -{ - parser_context_t mut_ctx; - mut_ctx.src = cmd_str; - - /* Parse the tree */ - if (! parse_t::parse(cmd_str, parse_flag_none, &mut_ctx.tree, NULL)) - { - return 1; - } - - /* Make a const version for safety's sake */ - const parser_context_t &ctx = mut_ctx; - - CHECK_BLOCK(1); - - /* Record the current chain so we can put it back later */ - scoped_push block_io_push(&block_io, io); - scoped_push forbidden_function_push(&forbidden_function); - const size_t forbid_count = forbidden_function.size(); - const block_t * const start_current_block = this->current_block(); - - /* Do some stuff I haven't figured out yet */ - job_reap(0); - - /* Only certain blocks are allowed */ - if ((block_type != TOP) && - (block_type != SUBST)) - { - debug(1, - INVALID_SCOPE_ERR_MSG, - parser_t::get_block_desc(block_type)); - bugreport(); - return 1; - } - - eval_level++; - - this->push_block(new scope_block_t(block_type)); - - error_code = 0; - - event_fire(NULL); - - /* Execute the top level job list */ - execute_job_list(0, ctx); - - parser_t::pop_block(); - - while (start_current_block != this->current_block()) - { - if (this->current_block() == NULL) - { - debug(0, - _(L"End of block mismatch. Program terminating.")); - bugreport(); - FATAL_EXIT(); - break; - } - - if ((!error_code) && (!exit_status()) && (!proc_get_last_status())) - { - - //debug( 2, L"Status %d\n", proc_get_last_status() ); - - debug(1, - L"%ls", parser_t::get_block_desc(current_block()->type())); - debug(1, - BLOCK_END_ERR_MSG); - fwprintf(stderr, L"%ls", parser_t::current_line()); - - const wcstring h = builtin_help_get(*this, L"end"); - if (h.size()) - fwprintf(stderr, L"%ls", h.c_str()); - break; - - } - parser_t::pop_block(); - } - - this->print_errors_stderr(); - - while (forbidden_function.size() > forbid_count) - parser_t::allow_function(); - - /* - Restore previous eval state - */ - eval_level--; - - int code=error_code; - error_code=0; - - job_reap(0); - - return code; -} int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) { diff --git a/parser.h b/parser.h index 13adfc71b..80b2303f1 100644 --- a/parser.h +++ b/parser.h @@ -271,12 +271,6 @@ struct profile_item_t struct tokenizer_t; -struct parser_context_t -{ - parse_node_tree_t tree; - wcstring src; -}; - class parser_t { friend class parse_execution_context_t; @@ -327,22 +321,11 @@ private: parser_t(const parser_t&); parser_t& operator=(const parser_t&); - process_t *create_job_process(job_t *job, const parse_node_t &statement_node, const parser_context_t &ctx); - process_t *create_boolean_process(job_t *job, const parse_node_t &bool_statement, const parser_context_t &ctx); - process_t *create_for_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx); - process_t *create_while_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx); - process_t *create_begin_process(job_t *job, const parse_node_t &header, const parse_node_t &statement, const parser_context_t &ctx); - process_t *create_plain_process(job_t *job, const parse_node_t &statement, const parser_context_t &ctx); - - wcstring_list_t determine_arguments(const parse_node_t &statement, const parse_node_t **out_unmatched_wildcard_node, const parser_context_t &ctx); - io_chain_t determine_io_chain(const parse_node_t &statement,const parser_context_t &ctx); - void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector&, bool); int parse_job(process_t *p, job_t *j, tokenizer_t *tok); void skipped_exec(job_t * j); void eval_job(tokenizer_t *tok); - void eval_job(const parse_node_t &job_node, const parser_context_t &ctx); int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset); void print_errors(wcstring &target, const wchar_t *prefix); void print_errors_stderr(); @@ -388,11 +371,7 @@ public: \return 0 on success, 1 otherwise */ int eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type); - int eval2(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type); - void execute_job_list(node_offset_t idx, const parser_context_t &ctx); - void execute_next(std::vector *execution_stack, const parser_context_t &ctx); - /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. The output is inserted into output. From 6536ffe178922750756e860561c2205513067b3b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 26 Dec 2013 13:24:10 -0800 Subject: [PATCH 071/108] Facilities for turning on new AST-based parser. --- Makefile.in | 2 +- parse_execution.cpp | 31 +++++++++++++-------- parse_execution.h | 8 +++--- parser.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++--- parser.h | 12 +++++++-- reader.cpp | 7 ++--- 6 files changed, 100 insertions(+), 25 deletions(-) diff --git a/Makefile.in b/Makefile.in index 1bca59912..4e01f3d14 100644 --- a/Makefile.in +++ b/Makefile.in @@ -92,7 +92,7 @@ FISH_OBJS := function.o builtin.o complete.o env.o exec.o expand.o \ env_universal.o env_universal_common.o input_common.o event.o \ signal.o io.o parse_util.o common.o screen.o path.o autoload.o \ parser_keywords.o iothread.o color.o postfork.o \ - builtin_test.o parse_tree.o parse_productions.o + builtin_test.o parse_tree.o parse_productions.o parse_execution.cpp FISH_INDENT_OBJS := fish_indent.o print_help.o common.o \ parser_keywords.o wutil.o tokenizer.o diff --git a/parse_execution.cpp b/parse_execution.cpp index 60c0d2627..72ff611cc 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -14,7 +14,7 @@ #include "path.h" -parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring s, parser_t *p) : tree(t), src(s), parser(p), eval_level(0) +parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, const io_chain_t &io, parser_t *p) : tree(t), src(s), block_io(io), parser(p), eval_level(0) { } @@ -53,27 +53,27 @@ void parse_execution_context_t::run_while_process(const parse_node_t &header, co assert(header.type == symbol_while_header); assert(statement.type == symbol_block_statement); + /* Push a while block */ while_block_t *wb = new while_block_t(); wb->status = WHILE_TEST_FIRST; wb->node_offset = this->get_offset(statement); parser->push_block(wb); - // The condition of the while loop, as a job + /* The condition and contents of the while loop, as a job and job list respectively */ const parse_node_t &while_condition = *get_child(header, 1, symbol_job); - - // The contents of the while loop, as a job list const parse_node_t &block_contents = *get_child(statement, 2, symbol_job_list); - // A while loop is a while loop! + /* A while loop is a while loop! */ while (! this->should_cancel() && this->run_1_job(while_condition) == EXIT_SUCCESS) { this->run_job_list(block_contents); } + /* Done */ parser->pop_block(wb); } - +/* Appends an error to the error list. Always returns true, so you can assign the result to an 'errored' variable */ bool parse_execution_context_t::append_error(const parse_node_t &node, const wchar_t *fmt, ...) { parse_error_t error; @@ -90,6 +90,7 @@ bool parse_execution_context_t::append_error(const parse_node_t &node, const wch return true; } +/* Creates a 'normal' (non-block) process */ process_t *parse_execution_context_t::create_plain_process(job_t *job, const parse_node_t &statement) { bool errored = false; @@ -529,7 +530,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) start_time = get_time(); } - job_t *j = parser->job_create(); + job_t *j = parser->job_create(this->block_io); job_set_flag(j, JOB_FOREGROUND, 1); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ @@ -597,10 +598,11 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) return ret; } -void parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) +int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) { assert(job_list_node.type == symbol_job_list); + int result = 1; const parse_node_t *job_list = &job_list_node; while (job_list != NULL) { @@ -631,14 +633,21 @@ void parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) if (job != NULL) { - this->run_1_job(*job); + result = this->run_1_job(*job); } } + /* Returns the last job executed */ + return result; } -void parse_execution_context_t::eval_job_list(const parse_node_t &job_list_node) +int parse_execution_context_t::eval_top_level_job_list() { - this->run_job_list(job_list_node); + if (tree.empty()) + return EXIT_FAILURE; + + const parse_node_t &job_list = tree.at(0); + assert(job_list.type == symbol_job_list); + return this->run_job_list(job_list); } diff --git a/parse_execution.h b/parse_execution.h index f6c7c1207..0d679bb6e 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -19,6 +19,7 @@ class parse_execution_context_t private: const parse_node_tree_t tree; const wcstring src; + const io_chain_t block_io; parser_t * const parser; parse_error_list_t errors; @@ -53,15 +54,16 @@ class parse_execution_context_t bool determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain); int run_1_job(const parse_node_t &job_node); - void run_job_list(const parse_node_t &job_list_node); + int run_job_list(const parse_node_t &job_list_node); bool populate_job_from_job_node(job_t *j, const parse_node_t &job_node); void eval_next_stack_elem(); public: - parse_execution_context_t(const parse_node_tree_t &t, const wcstring s, parser_t *p); + parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, const io_chain_t &io, parser_t *p); - void eval_job_list(const parse_node_t &job_node); + /* Actually execute the job list described by the tree */ + int eval_top_level_job_list(); }; diff --git a/parser.cpp b/parser.cpp index adcd22087..da6d9f588 100644 --- a/parser.cpp +++ b/parser.cpp @@ -45,6 +45,7 @@ The fish parser. Contains functions for parsing and evaluating code. #include "signal.h" #include "complete.h" #include "parse_tree.h" +#include "parse_execution.h" /** Maximum number of function calls, i.e. recursion depth. @@ -1189,9 +1190,9 @@ int parser_t::is_help(const wchar_t *s, int min_match) (len >= (size_t)min_match && (wcsncmp(L"--help", s, len) == 0)); } -job_t *parser_t::job_create() +job_t *parser_t::job_create(const io_chain_t &io) { - job_t *res = new job_t(acquire_job_id(), this->block_io); + job_t *res = new job_t(acquire_job_id(), io); this->my_job_list.push_front(res); job_set_flag(res, @@ -2375,7 +2376,7 @@ void parser_t::eval_job(tokenizer_t *tok) { case TOK_STRING: { - job_t *j = this->job_create(); + job_t *j = this->job_create(this->block_io); job_set_flag(j, JOB_FOREGROUND, 1); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ @@ -2584,9 +2585,55 @@ void parser_t::eval_job(tokenizer_t *tok) job_reap(0); } +int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type) +{ + CHECK_BLOCK(1); + + /* Only certain blocks are allowed */ + if ((block_type != TOP) && + (block_type != SUBST)) + { + debug(1, + INVALID_SCOPE_ERR_MSG, + parser_t::get_block_desc(block_type)); + bugreport(); + return 1; + } + + /* Parse the source into a tree, if we can */ + parse_node_tree_t tree; + if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL)) + { + return 1; + } + + /* Not sure why we reap jobs here */ + job_reap(0); + + /* Append to the execution context stack */ + parse_execution_context_t *ctx = new parse_execution_context_t(tree, cmd, io, this); + execution_contexts.push_back(ctx); + + /* Start it up */ + int result = ctx->eval_top_level_job_list(); + + /* Clean up the execution context stack */ + assert(! execution_contexts.empty() && execution_contexts.back() == ctx); + execution_contexts.pop_back(); + delete ctx; + + /* Reap again */ + job_reap(0); + + return result; +} int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) { + + if (parser_use_ast()) + return this->eval_new_parser(cmd_str, io, block_type); + const wchar_t * const cmd = cmd_str.c_str(); size_t forbid_count; int code; @@ -3037,3 +3084,15 @@ breakpoint_block_t::breakpoint_block_t() : { } +bool parser_use_ast(void) +{ + env_var_t var = env_get_string(L"fish_new_parser"); + if (var.missing_or_empty()) + { + return false; + } + else + { + return from_string(var); + } +} diff --git a/parser.h b/parser.h index 80b2303f1..2f9291f03 100644 --- a/parser.h +++ b/parser.h @@ -270,6 +270,7 @@ struct profile_item_t }; struct tokenizer_t; +class parse_execution_context_t; class parser_t { @@ -285,6 +286,9 @@ private: /** Position of last error */ int err_pos; + + /** Stack of execution contexts. We own these pointers and must delete them */ + std::vector execution_contexts; /** Description of last error */ wcstring err_buff; @@ -331,7 +335,7 @@ private: void print_errors_stderr(); /** Create a job */ - job_t *job_create(); + job_t *job_create(const io_chain_t &io); public: std::vector profile_items; @@ -370,7 +374,8 @@ public: \return 0 on success, 1 otherwise */ - int eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type_t block_type); + int eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type); + int eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type); /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. @@ -531,5 +536,8 @@ public: const wchar_t *get_block_command(int type) const; }; +/* Temporary */ +bool parser_use_ast(void); + #endif diff --git a/reader.cpp b/reader.cpp index 5186b787d..5c5b094d9 100644 --- a/reader.cpp +++ b/reader.cpp @@ -3398,12 +3398,9 @@ const wchar_t *reader_readline(void) case 0: { /* Finished command, execute it. Don't add items that start with a leading space. */ - if (! data->command_line.empty() && data->command_line.at(0) != L' ') + if (data->history != NULL && ! data->command_line.empty() && data->command_line.at(0) != L' ') { - if (data->history != NULL) - { - data->history->add_with_file_detection(data->command_line); - } + data->history->add_with_file_detection(data->command_line); } finished=1; data->buff_pos=data->command_length(); From c1a13ae8bcfed6d1ce38453e7fadb2fee887a283 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 26 Dec 2013 13:36:43 -0800 Subject: [PATCH 072/108] Fix a few errors identified by testing new parser --- parse_execution.cpp | 8 ++++++++ parser.cpp | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/parse_execution.cpp b/parse_execution.cpp index 72ff611cc..2aa32f82e 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -545,6 +545,14 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) /* Populate the job. This may fail for reasons like command_not_found */ bool process_errored = ! this->populate_job_from_job_node(j, job_node); + /* If we errored, we have to clean up the job */ + if (process_errored) + { + assert(parser->current_block()->job == j); + parser->current_block()->job = NULL; + job_free(j); + } + /* Store time it took to 'parse' the command */ if (do_profile) { diff --git a/parser.cpp b/parser.cpp index da6d9f588..3ba3c89be 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2615,6 +2615,8 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl execution_contexts.push_back(ctx); /* Start it up */ + const block_t * const start_current_block = current_block(); + this->push_block(new scope_block_t(block_type)); int result = ctx->eval_top_level_job_list(); /* Clean up the execution context stack */ @@ -2622,6 +2624,21 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl execution_contexts.pop_back(); delete ctx; + /* Clean up the block stack */ + this->pop_block(); + while (start_current_block != current_block()) + { + if (current_block() == NULL) + { + debug(0, + _(L"End of block mismatch. Program terminating.")); + bugreport(); + FATAL_EXIT(); + break; + } + this->pop_block(); + } + /* Reap again */ job_reap(0); From a6ca809a4e4873f3fd16e4a763001a109afc2185 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 26 Dec 2013 14:52:15 -0800 Subject: [PATCH 073/108] Fix for issue where last job_list in tree would have a -1 production_idx because we never actually sent the terminal token type --- fish_tests.cpp | 6 ++++++ parse_constants.h | 2 ++ parse_tree.cpp | 32 +++++++++++++++++++++++--------- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index af65b70ab..83f8976d0 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2212,6 +2212,12 @@ static bool increment(std::vector &tokens) if (! incremented_in_keyword) { token.token_type++; + // Skip the very special parse_token_type_terminate, since that's always the last thing delivered + if (token.token_type == parse_token_type_terminate) + { + token.token_type++; + } + if (token.token_type > LAST_TERMINAL_TYPE) { token.token_type = FIRST_TERMINAL_TYPE; diff --git a/parse_constants.h b/parse_constants.h index a322073e7..b59f52e98 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -56,6 +56,8 @@ enum parse_token_type_t parse_token_type_redirection, parse_token_type_background, parse_token_type_end, + + // Special terminal type that means no more tokens forthcoming parse_token_type_terminate, // Very special terminal types that don't appear in the production list diff --git a/parse_tree.cpp b/parse_tree.cpp index d970200b0..e231eb9d1 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -224,6 +224,7 @@ wcstring keyword_description(parse_keyword_t k) wcstring parse_node_t::describe(void) const { wcstring result = token_type_description(type); + append_format(result, L" (prod %d)", this->production_idx); return result; } @@ -455,7 +456,7 @@ class parse_ll_t } else { - // Generate the parse node. Note that this push_back may invalidate node. + // Generate the parse node. parse_token_type_t child_type = production_element_type(elem); parse_node_t child = parse_node_t(child_type); child.parent = parent_node_idx; @@ -861,8 +862,10 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) } else { - // When a job_list encounters something like 'else', it returns an empty production to return control to the outer block. But if it's unbalanced, then we'll end up with an empty stack! So make sure that doesn't happen. This is the primary mechanism by which we detect e.g. unbalanced end. - if (symbol_stack.size() == 1 && production_is_empty(production)) + bool is_terminate = (token1.type == parse_token_type_terminate); + + // When a job_list encounters something like 'else', it returns an empty production to return control to the outer block. But if it's unbalanced, then we'll end up with an empty stack! So make sure that doesn't happen. This is the primary mechanism by which we detect e.g. unbalanced end. However, if we get a true terminate token, then we allow (expect) this to empty the stack + if (symbol_stack.size() == 1 && production_is_empty(production) && ! is_terminate) { this->parse_error_unbalancing_token(token1); break; @@ -872,8 +875,14 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) // Note that stack_elem is invalidated by popping the stack. symbol_stack_pop_push_production(production); - // Expect to not have an empty stack - assert(! symbol_stack.empty()); + // Expect to not have an empty stack, unless this was the terminate type + // Note we may not have an empty stack with the terminate type (i.e. incomplete input) + assert(is_terminate || ! symbol_stack.empty()); + + if (symbol_stack.empty()) + { + break; + } } } } @@ -931,12 +940,15 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) /* Placeholder invalid token */ static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, false, -1, -1}; +/* Terminal token */ +static const parse_token_t kTerminalToken = {parse_token_type_terminate, parse_keyword_none, false, -1, -1}; + /* Return a new parse token, advancing the tokenizer */ static inline parse_token_t next_parse_token(tokenizer_t *tok) { if (! tok_has_next(tok)) { - return kInvalidToken; + return kTerminalToken; } token_type tok_type = static_cast(tok_last_type(tok)); @@ -978,8 +990,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags /* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */ parse_token_t queue[2] = {kInvalidToken, next_parse_token(&tok)}; - /* Go until the most recently added token is invalid. Note this may mean we don't process anything if there were no tokens. */ - while (queue[1].type != token_type_invalid) + /* Loop until we get a terminal token */ + do { /* Push a new token onto the queue */ queue[0] = queue[1]; @@ -1010,7 +1022,9 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags break; } } - } + + /* If this was the last token, then stop the loop */ + } while (queue[0].type != parse_token_type_terminate); // Teach each node where its source range is From 6ce4b344e45baaa06bf593a5c0983da7a22eb64e Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 27 Dec 2013 01:38:43 -0800 Subject: [PATCH 074/108] Hook up for statements, if statements, and function definition in new parser --- builtin.cpp | 307 +++++++++++++++++++++++++++++++++++++++++- builtin.h | 5 +- exec.cpp | 51 +++++-- function.cpp | 1 - parse_execution.cpp | 294 ++++++++++++++++++++++++++++++++++++---- parse_execution.h | 18 ++- parse_productions.cpp | 2 +- parse_tree.cpp | 6 + parse_tree.h | 12 +- parse_util.cpp | 2 +- parser.cpp | 68 +++++++--- parser.h | 6 + proc.h | 4 + 13 files changed, 709 insertions(+), 67 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index a97efdbf6..11036e378 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -165,7 +165,7 @@ static const io_chain_t *real_io; /** Counts the number of non null pointers in the specified array */ -static int builtin_count_args(wchar_t **argv) +static int builtin_count_args(const wchar_t * const * argv) { int argc = 1; while (argv[argc] != NULL) @@ -1752,6 +1752,307 @@ static int builtin_pwd(parser_t &parser, wchar_t **argv) } } +/* This is nearly identical to builtin_function, and is intended to be the successor (with no block manipulation, no function/end split) */ +int define_function(parser_t &parser, const wcstring_list_t &args, const wcstring &contents, wcstring *out_err) +{ + assert(out_err != NULL); + + /* Hackish const_cast matches the one in builtin_run */ + const null_terminated_array_t argv_array(args); + wchar_t **argv = const_cast(argv_array.get()); + + int argc = builtin_count_args(argv); + int res=STATUS_BUILTIN_OK; + wchar_t *desc=0; + std::vector events; + std::auto_ptr named_arguments(NULL); + + wchar_t *name = 0; + bool shadows = true; + + woptind=0; + + const struct woption long_options[] = + { + { L"description", required_argument, 0, 'd' }, + { L"on-signal", required_argument, 0, 's' }, + { L"on-job-exit", required_argument, 0, 'j' }, + { L"on-process-exit", required_argument, 0, 'p' }, + { L"on-variable", required_argument, 0, 'v' }, + { L"on-event", required_argument, 0, 'e' }, + { L"help", no_argument, 0, 'h' }, + { L"argument-names", no_argument, 0, 'a' }, + { L"no-scope-shadowing", no_argument, 0, 'S' }, + { 0, 0, 0, 0 } + }; + + while (1 && (!res)) + { + int opt_index = 0; + + int opt = wgetopt_long(argc, + argv, + L"d:s:j:p:v:e:haS", + long_options, + &opt_index); + if (opt == -1) + break; + + switch (opt) + { + case 0: + if (long_options[opt_index].flag != 0) + break; + + + + append_format(*out_err, + BUILTIN_ERR_UNKNOWN, + argv[0], + long_options[opt_index].name); + + res = 1; + break; + + case 'd': + desc=woptarg; + break; + + case 's': + { + int sig = wcs2sig(woptarg); + + if (sig < 0) + { + append_format(*out_err, + _(L"%ls: Unknown signal '%ls'\n"), + argv[0], + woptarg); + res=1; + break; + } + events.push_back(event_t::signal_event(sig)); + break; + } + + case 'v': + { + if (wcsvarname(woptarg)) + { + append_format(*out_err, + _(L"%ls: Invalid variable name '%ls'\n"), + argv[0], + woptarg); + res=STATUS_BUILTIN_ERROR; + break; + } + + events.push_back(event_t::variable_event(woptarg)); + break; + } + + + case 'e': + { + events.push_back(event_t::generic_event(woptarg)); + break; + } + + case 'j': + case 'p': + { + pid_t pid; + wchar_t *end; + event_t e(EVENT_ANY); + + if ((opt == 'j') && + (wcscasecmp(woptarg, L"caller") == 0)) + { + int job_id = -1; + + if (is_subshell) + { + size_t block_idx = 0; + + /* Find the outermost substitution block */ + for (block_idx = 0; ; block_idx++) + { + const block_t *b = parser.block_at_index(block_idx); + if (b == NULL || b->type() == SUBST) + break; + } + + /* Go one step beyond that, to get to the caller */ + const block_t *caller_block = parser.block_at_index(block_idx + 1); + if (caller_block != NULL && caller_block->job != NULL) + { + job_id = caller_block->job->job_id; + } + } + + if (job_id == -1) + { + append_format(*out_err, + _(L"%ls: Cannot find calling job for event handler\n"), + argv[0]); + res=1; + } + else + { + e.type = EVENT_JOB_ID; + e.param1.job_id = job_id; + } + + } + else + { + errno = 0; + pid = fish_wcstoi(woptarg, &end, 10); + if (errno || !end || *end) + { + append_format(*out_err, + _(L"%ls: Invalid process id %ls\n"), + argv[0], + woptarg); + res=1; + break; + } + + + e.type = EVENT_EXIT; + e.param1.pid = (opt=='j'?-1:1)*abs(pid); + } + if (res) + { + /* nothing */ + } + else + { + events.push_back(e); + } + break; + } + + case 'a': + if (named_arguments.get() == NULL) + named_arguments.reset(new wcstring_list_t); + break; + + case 'S': + shadows = 0; + break; + + case 'h': + builtin_print_help(parser, argv[0], stdout_buffer); + return STATUS_BUILTIN_OK; + + case '?': + builtin_unknown_option(parser, argv[0], argv[woptind-1]); + res = 1; + break; + + } + + } + + if (!res) + { + + if (argc == woptind) + { + append_format(*out_err, + _(L"%ls: Expected function name\n"), + argv[0]); + res=1; + } + else if (wcsfuncname(argv[woptind])) + { + append_format(*out_err, + _(L"%ls: Illegal function name '%ls'\n"), + argv[0], + argv[woptind]); + + res=1; + } + else if (parser_keywords_is_reserved(argv[woptind])) + { + + append_format(*out_err, + _(L"%ls: The name '%ls' is reserved,\nand can not be used as a function name\n"), + argv[0], + argv[woptind]); + + res=1; + } + else if (! wcslen(argv[woptind])) + { + append_format(*out_err, _(L"%ls: No function name given\n"), argv[0]); + } + else + { + + name = argv[woptind++]; + + if (named_arguments.get()) + { + while (woptind < argc) + { + if (wcsvarname(argv[woptind])) + { + append_format(*out_err, + _(L"%ls: Invalid variable name '%ls'\n"), + argv[0], + argv[woptind]); + res = STATUS_BUILTIN_ERROR; + break; + } + + named_arguments->push_back(argv[woptind++]); + } + } + else if (woptind != argc) + { + append_format(*out_err, + _(L"%ls: Expected one argument, got %d\n"), + argv[0], + argc); + res=1; + + } + } + } + + if (res) + { + builtin_print_help(parser, argv[0], *out_err); + } + else + { + function_data_t d; + + d.name = name; + if (desc) + d.description = desc; + d.events.swap(events); + d.shadows = shadows; + if (named_arguments.get()) + d.named_arguments.swap(*named_arguments); + + for (size_t i=0; i &opened_fds) repeatedly reopened for every command in the block, which would reset the cursor position. - \return the transmogrified chain on sucess, or 0 on failiure + \return true on success, false on failure. Returns the output chain and opened_fds by reference */ -static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t &out_chain, std::vector &out_opened_fds) +static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t *out_chain, std::vector *out_opened_fds) { ASSERT_IS_MAIN_THREAD(); - assert(out_chain.empty()); + assert(out_chain != NULL && out_opened_fds != NULL); + assert(out_chain->empty()); /* Just to be clear what we do for an empty chain */ if (in_chain.empty()) @@ -479,8 +480,8 @@ static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t &out_chain, s if (success) { /* Yay */ - out_chain.swap(result_chain); - out_opened_fds.swap(opened_fds); + out_chain->swap(result_chain); + out_opened_fds->swap(opened_fds); } else { @@ -496,19 +497,24 @@ static bool io_transmogrify(const io_chain_t &in_chain, io_chain_t &out_chain, s Morph an io redirection chain into redirections suitable for passing to eval, call eval, and clean up morphed redirections. - \param def the code to evaluate + \param def the code to evaluate, or the empty string if none + \param node_offset the offset of the node to evalute, or NODE_OFFSET_INVALID \param block_type the type of block to push on evaluation \param io the io redirections to be performed on this block */ static void internal_exec_helper(parser_t &parser, - const wchar_t *def, + const wcstring &def, + node_offset_t node_offset, enum block_type_t block_type, const io_chain_t &ios) { + // If we have a valid node offset, then we must not have a string to execute + assert(node_offset == NODE_OFFSET_INVALID || def.empty()); + io_chain_t morphed_chain; std::vector opened_fds; - bool transmorgrified = io_transmogrify(ios, morphed_chain, opened_fds); + bool transmorgrified = io_transmogrify(ios, &morphed_chain, &opened_fds); int is_block_old=is_block; is_block=1; @@ -524,7 +530,14 @@ static void internal_exec_helper(parser_t &parser, signal_unblock(); - parser.eval(def, morphed_chain, block_type); + if (node_offset == NODE_OFFSET_INVALID) + { + parser.eval(def, morphed_chain, block_type); + } + else + { + parser.eval_block_node(node_offset, morphed_chain, block_type); + } signal_block(); @@ -926,7 +939,7 @@ void exec_job(parser_t &parser, job_t *j) if (! exec_error) { - internal_exec_helper(parser, def.c_str(), TOP, process_net_io_chain); + internal_exec_helper(parser, def, NODE_OFFSET_INVALID, TOP, process_net_io_chain); } parser.allow_function(); @@ -936,12 +949,14 @@ void exec_job(parser_t &parser, job_t *j) } case INTERNAL_BLOCK: + case INTERNAL_BLOCK_NODE: { if (p->next) { block_output_io_buffer.reset(io_buffer_t::create(0)); if (block_output_io_buffer.get() == NULL) { + /* We failed (e.g. no more fds could be created). */ exec_error = true; job_mark_process_as_failed(j, p); } @@ -954,12 +969,21 @@ void exec_job(parser_t &parser, job_t *j) if (! exec_error) { - internal_exec_helper(parser, p->argv0(), TOP, process_net_io_chain); + if (p->type == INTERNAL_BLOCK) + { + /* The block contents (as in, fish code) are stored in argv0 (ugh) */ + assert(p->argv0() != NULL); + internal_exec_helper(parser, p->argv0(), NODE_OFFSET_INVALID, TOP, process_net_io_chain); + } + else + { + assert(p->type == INTERNAL_BLOCK_NODE); + internal_exec_helper(parser, wcstring(), p->internal_block_node, TOP, process_net_io_chain); + } } break; - } - + case INTERNAL_BUILTIN: { int builtin_stdin=0; @@ -1115,6 +1139,7 @@ void exec_job(parser_t &parser, job_t *j) { case INTERNAL_BLOCK: + case INTERNAL_BLOCK_NODE: case INTERNAL_FUNCTION: { int status = proc_get_last_status(); diff --git a/function.cpp b/function.cpp index d10698745..eadcca7c6 100644 --- a/function.cpp +++ b/function.cpp @@ -186,7 +186,6 @@ void function_add(const function_data_t &data, const parser_t &parser) /* Remove the old function */ function_remove(data.name); - /* Create and store a new function */ const wchar_t *filename = reader_current_filename(); diff --git a/parse_execution.cpp b/parse_execution.cpp index 2aa32f82e..1a2b16b1f 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -48,7 +48,196 @@ bool parse_execution_context_t::should_cancel() const return false; } -void parse_execution_context_t::run_while_process(const parse_node_t &header, const parse_node_t &statement) +int parse_execution_context_t::run_if_statement(const parse_node_t &statement) +{ + assert(statement.type == symbol_if_statement); + + /* Push an if block */ + if_block_t *ib = new if_block_t(); + ib->node_offset = this->get_offset(statement); + parser->push_block(ib); + + /* We have a sequence of if clauses, with a final else, resulting in a single job list that we execute */ + const parse_node_t *job_list_to_execute = NULL; + const parse_node_t *if_clause = get_child(statement, 0, symbol_if_clause); + const parse_node_t *else_clause = get_child(statement, 1, symbol_else_clause); + for (;;) + { + assert(if_clause != NULL && else_clause != NULL); + const parse_node_t &condition = *get_child(*if_clause, 1, symbol_job); + fprintf(stderr, "run %ls\n", get_source(condition).c_str()); + if (run_1_job(condition) == EXIT_SUCCESS) + { + /* condition succeeded */ + job_list_to_execute = get_child(*if_clause, 3, symbol_job_list); + break; + } + else if (else_clause->child_count > 0) + { + /* 'if' condition failed, no else clause, we're done */ + job_list_to_execute = NULL; + break; + } + else + { + /* We have an 'else continuation' (either else-if or else) */ + const parse_node_t &else_cont = *get_child(*else_clause, 1, symbol_else_continuation); + assert(else_cont.production_idx < 2); + if (else_cont.production_idx == 0) + { + /* it's an 'else if', go to the next one */ + if_clause = get_child(else_cont, 0, symbol_if_clause); + else_clause = get_child(else_cont, 1, symbol_else_clause); + } + else + { + /* it's the final 'else', we're done */ + assert(else_cont.production_idx == 1); + job_list_to_execute = get_child(else_cont, 1, symbol_job_list); + break; + } + } + } + + /* Execute any job list we got */ + if (job_list_to_execute != NULL) + { + run_job_list(*job_list_to_execute); + } + + /* Done */ + parser->pop_block(ib); + + return proc_get_last_status(); +} + +int parse_execution_context_t::run_begin_statement(const parse_node_t &header, const parse_node_t &contents) +{ + assert(header.type == symbol_begin_header); + assert(contents.type == symbol_job_list); + + /* Basic begin/end block. Push a scope block. */ + scope_block_t *sb = new scope_block_t(BEGIN); + parser->push_block(sb); + parser->current_block()->tok_pos = parser->get_pos(); + + /* Run the job list */ + run_job_list(contents); + + /* Pop the block */ + parser->pop_block(sb); + + return proc_get_last_status(); +} + +/* Define a function */ +int parse_execution_context_t::run_function_statement(const parse_node_t &header, const parse_node_t &contents) +{ + assert(header.type == symbol_function_header); + assert(contents.type == symbol_job_list); + + /* Get arguments */ + const parse_node_t *unmatched_wildcard = NULL; + const wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); + + bool errored = false; + if (unmatched_wildcard != NULL) + { + errored = append_unmatched_wildcard_error(*unmatched_wildcard); + } + + if (! errored) + { + const wcstring contents_str = get_source(contents); + wcstring error_str; + int err = define_function(*parser, argument_list, contents_str, &error_str); + proc_set_last_status(err); + } + return proc_get_last_status(); +} + +int parse_execution_context_t::run_block_statement(const parse_node_t &statement) +{ + assert(statement.type == symbol_block_statement); + + const parse_node_t &block_header = *get_child(statement, 0, symbol_block_header); //block header + const parse_node_t &header = *get_child(block_header, 0); //specific header type (e.g. for loop) + const parse_node_t &contents = *get_child(statement, 2, symbol_job_list); //block contents + + int ret = 1; + switch (header.type) + { + case symbol_for_header: + ret = run_for_statement(header, contents); + break; + + case symbol_while_header: + ret = run_while_statement(header, contents); + break; + + case symbol_function_header: + ret = run_function_statement(header, contents); + break; + + case symbol_begin_header: + ret = run_begin_statement(header, contents); + break; + + default: + fprintf(stderr, "Unexpected block header: %ls\n", header.describe().c_str()); + PARSER_DIE(); + break; + } + + return proc_get_last_status(); +} + +int parse_execution_context_t::run_for_statement(const parse_node_t &header, const parse_node_t &block_contents) +{ + assert(header.type == symbol_for_header); + assert(block_contents.type == symbol_job_list); + + /* get the variable name: `for var_name in ...` */ + const parse_node_t &var_name_node = *get_child(header, 1, parse_token_type_string); + const wcstring for_var_name = get_source(var_name_node); + + /* get the contents to iterate over */ + const parse_node_t *unmatched_wildcard = NULL; + wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); + + /* Here we could do something with unmatched_wildcard. However it seems nicer to not make for loops complain about this, i.e. just iterate over a potentially empty list */ + + for_block_t *fb = new for_block_t(for_var_name); + parser->push_block(fb); + fb->tok_pos = parser->get_pos(); + + /* Note that we store the sequence of values in opposite order */ + std::reverse(argument_list.begin(), argument_list.end()); + fb->sequence = argument_list; + + /* Now drive the for loop. TODO: handle break, etc. */ + while (! fb->sequence.empty()) + { + const wcstring &for_variable = fb->variable; + const wcstring &val = fb->sequence.back(); + env_set(for_variable, val.c_str(), ENV_LOCAL); + fb->sequence.pop_back(); + fb->loop_status = LOOP_NORMAL; + fb->skip = 0; + + this->run_job_list(block_contents); + } + + return proc_get_last_status(); +} + + +int parse_execution_context_t::run_switch_statement(const parse_node_t &statement) +{ + return proc_get_last_status(); +} + +int parse_execution_context_t::run_while_statement(const parse_node_t &header, const parse_node_t &statement) { assert(header.type == symbol_while_header); assert(statement.type == symbol_block_statement); @@ -71,6 +260,8 @@ void parse_execution_context_t::run_while_process(const parse_node_t &header, co /* Done */ parser->pop_block(wb); + + return proc_get_last_status(); } /* Appends an error to the error list. Always returns true, so you can assign the result to an 'errored' variable */ @@ -90,6 +281,13 @@ bool parse_execution_context_t::append_error(const parse_node_t &node, const wch return true; } +/* Appends an unmatched wildcard error to the error list, and returns true. */ +bool parse_execution_context_t::append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard) +{ + proc_set_last_status(STATUS_UNMATCHED_WILDCARD); + return append_error(unmatched_wildcard, WILDCARD_ERR_MSG, get_source(unmatched_wildcard).c_str()); +} + /* Creates a 'normal' (non-block) process */ process_t *parse_execution_context_t::create_plain_process(job_t *job, const parse_node_t &statement) { @@ -122,8 +320,7 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par if (unmatched_wildcard != NULL) { job_set_flag(job, JOB_WILDCARD_ERROR, 1); - proc_set_last_status(STATUS_UNMATCHED_WILDCARD); - errored = append_error(*unmatched_wildcard, WILDCARD_ERR_MSG, unmatched_wildcard->get_source(src).c_str()); + errored = append_unmatched_wildcard_error(*unmatched_wildcard); } if (errored) @@ -179,7 +376,6 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par /* TODO: support fish_command_not_found, implicit cd, etc. here */ errored = true; } - return NULL; } /* Return the process, or NULL on error */ @@ -401,10 +597,10 @@ process_t *parse_execution_context_t::create_boolean_process(job_t *job, const p process_t *parse_execution_context_t::create_block_process(job_t *job, const parse_node_t &statement_node) { - /* We handle block statements by creating INTERNAL_BLOCKs, that will bounce back to us when it's time to execute them */ + /* We handle block statements by creating INTERNAL_BLOCK_NODE, that will bounce back to us when it's time to execute them */ assert(statement_node.type == symbol_block_statement || statement_node.type == symbol_if_statement || statement_node.type == symbol_switch_statement); process_t *result = new process_t(); - result->type = INTERNAL_BLOCK; + result->type = INTERNAL_BLOCK_NODE; result->internal_block_node = this->get_offset(statement_node); return result; } @@ -513,7 +709,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) } } - /* Increment the eval_level for the duration of this command */ + /* Increment the eval_level for the duration of this command */ scoped_push saved_eval_level(&eval_level, eval_level + 1); /* TODO: blocks-without-redirections optimization */ @@ -530,7 +726,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) start_time = get_time(); } - job_t *j = parser->job_create(this->block_io); + job_t *j = new job_t(acquire_job_id(), block_io); job_set_flag(j, JOB_FOREGROUND, 1); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ @@ -539,20 +735,20 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) || is_block \ || is_event \ || (!get_is_interactive())); - - parser->current_block()->job = j; + job_set_flag(j, JOB_CONTROL, + (job_control_mode==JOB_CONTROL_ALL) || + ((job_control_mode == JOB_CONTROL_INTERACTIVE) && (get_is_interactive()))); /* Populate the job. This may fail for reasons like command_not_found */ bool process_errored = ! this->populate_job_from_job_node(j, job_node); - - /* If we errored, we have to clean up the job */ + + /* Clean up the job on failure */ if (process_errored) { - assert(parser->current_block()->job == j); - parser->current_block()->job = NULL; - job_free(j); + delete j; + j = NULL; } - + /* Store time it took to 'parse' the command */ if (do_profile) { @@ -563,6 +759,10 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) if (! process_errored) { + /* Success. Give the job to the parser - it will clean it up. */ + parser->job_add(j); + parser->current_block()->job = j; + /* Check to see if this contained any external commands */ bool job_contained_external_command = false; for (const process_t *proc = j->first_process; proc != NULL; proc = proc->next) @@ -602,6 +802,13 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) /* Clean up jobs. Do this after we've determined the return value, since this may trigger event handlers */ job_reap(0); + /* Output any errors (hack) */ + if (! this->errors.empty()) + { + fprintf(stderr, "%ls\n", parse_errors_description(this->errors, this->src).c_str()); + this->errors.clear(); + } + /* All done */ return ret; } @@ -636,6 +843,7 @@ int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) break; default: //if we get here, it means more productions have been added to job_list, which is bad + fprintf(stderr, "Unexpected production in job_list: %lu\n", (unsigned long)job_list->production_idx); PARSER_DIE(); } @@ -649,13 +857,53 @@ int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) return result; } - -int parse_execution_context_t::eval_top_level_job_list() +int parse_execution_context_t::eval_node_at_offset(node_offset_t offset) { - if (tree.empty()) - return EXIT_FAILURE; + bool log_it = false; - const parse_node_t &job_list = tree.at(0); - assert(job_list.type == symbol_job_list); - return this->run_job_list(job_list); + /* Don't ever expect to have an empty tree if this is called */ + assert(! tree.empty()); + assert(offset < tree.size()); + + const parse_node_t &node = tree.at(offset); + + if (log_it) + { + fprintf(stderr, "eval node: %ls\n", get_source(node).c_str()); + } + + /* Currently, we only expect to execute the top level job list, or a block node. Assert that. */ + assert(node.type == symbol_job_list || + node.type == symbol_block_statement || + node.type == symbol_if_statement || + node.type == symbol_switch_statement); + + int ret = 1; + switch (node.type) + { + case symbol_job_list: + /* We should only get a job list if it's top level. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ + assert(offset == 0); + ret = this->run_job_list(node); + break; + + case symbol_block_statement: + ret = this->run_block_statement(node); + break; + + case symbol_if_statement: + ret = this->run_if_statement(node); + break; + + case symbol_switch_statement: + ret = this->run_switch_statement(node); + break; + + default: + /* In principle, we could support other node types. However we never expect to be passed them - see above. */ + fprintf(stderr, "Unexpected node %ls found in %s\n", node.describe().c_str(), __FUNCTION__); + PARSER_DIE(); + break; + } + return ret; } diff --git a/parse_execution.h b/parse_execution.h index 0d679bb6e..f465a5934 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -35,18 +35,28 @@ class parse_execution_context_t /* Report an error. Always returns true. */ bool append_error(const parse_node_t &node, const wchar_t *fmt, ...); + /* Wildcard error helper */ + bool append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); /* Utilities */ wcstring get_source(const parse_node_t &node) const; const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; node_offset_t get_offset(const parse_node_t &node) const; + /* These create process_t structures from statements */ process_t *create_job_process(job_t *job, const parse_node_t &statement_node); process_t *create_boolean_process(job_t *job, const parse_node_t &bool_statement); process_t *create_plain_process(job_t *job, const parse_node_t &statement); process_t *create_block_process(job_t *job, const parse_node_t &statement_node); - void run_while_process(const parse_node_t &header, const parse_node_t &statement); + /* These encapsulate the actual logic of various (block) statements. They just do what the statement says. */ + int run_block_statement(const parse_node_t &statement); + int run_for_statement(const parse_node_t &header, const parse_node_t &contents); + int run_if_statement(const parse_node_t &statement); + int run_switch_statement(const parse_node_t &statement); + int run_while_statement(const parse_node_t &header, const parse_node_t &contents); + int run_function_statement(const parse_node_t &header, const parse_node_t &contents); + int run_begin_statement(const parse_node_t &header, const parse_node_t &contents); wcstring_list_t determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node); @@ -57,13 +67,11 @@ class parse_execution_context_t int run_job_list(const parse_node_t &job_list_node); bool populate_job_from_job_node(job_t *j, const parse_node_t &job_node); - void eval_next_stack_elem(); - public: parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, const io_chain_t &io, parser_t *p); - /* Actually execute the job list described by the tree */ - int eval_top_level_job_list(); + /* Start executing at the given node offset, returning the exit status of the last process. */ + int eval_node_at_offset(node_offset_t offset); }; diff --git a/parse_productions.cpp b/parse_productions.cpp index 3325f50dd..a4b1de957 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -320,7 +320,7 @@ RESOLVE_ONLY(begin_header) PRODUCTIONS(function_header) = { - {KEYWORD(parse_keyword_function), parse_token_type_string, symbol_argument_list} + {KEYWORD(parse_keyword_function), symbol_argument, symbol_argument_list} }; RESOLVE_ONLY(function_header) diff --git a/parse_tree.cpp b/parse_tree.cpp index e231eb9d1..ad825a115 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -997,6 +997,12 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags queue[0] = queue[1]; queue[1] = next_parse_token(&tok); + /* If we are leaving things unterminated, then don't pass parse_token_type_terminate */ + if (queue[0].type == parse_token_type_terminate && (parse_flags & parse_flag_leave_unterminated)) + { + break; + } + /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */ this->parser->accept_tokens(queue[0], queue[1]); diff --git a/parse_tree.h b/parse_tree.h index 9a5d7c238..91cab4262 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -65,7 +65,11 @@ enum parse_flag_include_comments = 1 << 1, /* Indicate that the tokenizer should accept incomplete tokens */ - parse_flag_accept_incomplete_tokens = 1 << 2 + parse_flag_accept_incomplete_tokens = 1 << 2, + + /* Indicate that the parser should not generate the terminate token, allowing an 'unfinished' tree where some nodes may have no productions. */ + parse_flag_leave_unterminated = 1 << 3 + }; typedef unsigned int parse_tree_flags_t; @@ -124,7 +128,7 @@ public: wcstring describe(void) const; /* Constructor */ - explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0) + explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1) { } @@ -244,7 +248,9 @@ public: for_header = FOR var_name IN argument_list while_header = WHILE job begin_header = BEGIN - function_header = FUNCTION function_name argument_list + +# Functions take arguments, and require at least one (the name) + function_header = FUNCTION argument argument_list # A boolean statement is AND or OR or NOT diff --git a/parse_util.cpp b/parse_util.cpp index ff7a022c2..cf196db1f 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -994,7 +994,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars // Parse the input string into a parse tree // Some errors are detected here - bool parsed = parse_t::parse(buff_src, 0, &node_tree, &parse_errors); + bool parsed = parse_t::parse(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors); if (! parsed) { errored = true; diff --git a/parser.cpp b/parser.cpp index 3ba3c89be..1f331dc53 100644 --- a/parser.cpp +++ b/parser.cpp @@ -598,6 +598,12 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...) va_start(va, str); err_buff = vformat_string(str, va); va_end(va); + + if (parser_use_ast()) + { + fprintf(stderr, "parser error: %ls\n", err_buff.c_str()); + err_buff.clear(); + } } /** @@ -1190,6 +1196,13 @@ int parser_t::is_help(const wchar_t *s, int min_match) (len >= (size_t)min_match && (wcsncmp(L"--help", s, len) == 0)); } +void parser_t::job_add(job_t *job) +{ + assert(job != NULL); + assert(job->first_process != NULL); + this->my_job_list.push_front(job); +} + job_t *parser_t::job_create(const io_chain_t &io) { job_t *res = new job_t(acquire_job_id(), io); @@ -2588,6 +2601,40 @@ void parser_t::eval_job(tokenizer_t *tok) int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type) { CHECK_BLOCK(1); + + /* Parse the source into a tree, if we can */ + parse_node_tree_t tree; + if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL)) + { + return 1; + } + + /* Append to the execution context stack */ + parse_execution_context_t *ctx = new parse_execution_context_t(tree, cmd, io, this); + execution_contexts.push_back(ctx); + + /* Execute the first node */ + int result = 1; + if (! tree.empty()) + { + result = this->eval_block_node(0, io_chain_t(), block_type); + } + + /* Clean up the execution context stack */ + assert(! execution_contexts.empty() && execution_contexts.back() == ctx); + execution_contexts.pop_back(); + delete ctx; + + return result; +} + +int parser_t::eval_block_node(node_offset_t node_idx, const io_chain_t &io, enum block_type_t block_type) +{ + // Paranoia. It's a little frightening that we're given only a node_idx and we interpret this in the topmost execution context's tree. What happens if these were to be interleaved? Fortunately that cannot happen. + parse_execution_context_t *ctx = execution_contexts.back(); + assert(ctx != NULL); + + CHECK_BLOCK(1); /* Only certain blocks are allowed */ if ((block_type != TOP) && @@ -2600,29 +2647,13 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl return 1; } - /* Parse the source into a tree, if we can */ - parse_node_tree_t tree; - if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL)) - { - return 1; - } - /* Not sure why we reap jobs here */ job_reap(0); - /* Append to the execution context stack */ - parse_execution_context_t *ctx = new parse_execution_context_t(tree, cmd, io, this); - execution_contexts.push_back(ctx); - /* Start it up */ const block_t * const start_current_block = current_block(); this->push_block(new scope_block_t(block_type)); - int result = ctx->eval_top_level_job_list(); - - /* Clean up the execution context stack */ - assert(! execution_contexts.empty() && execution_contexts.back() == ctx); - execution_contexts.pop_back(); - delete ctx; + int result = ctx->eval_node_at_offset(node_idx); /* Clean up the block stack */ this->pop_block(); @@ -2643,6 +2674,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl job_reap(0); return result; + } int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type) @@ -2983,7 +3015,7 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro assert(output != NULL); if (! errors.empty()) { - const parse_error_t err = errors.at(0); + const parse_error_t &err = errors.at(0); // Determine which line we're on assert(err.source_start <= src.size()); diff --git a/parser.h b/parser.h index 2f9291f03..f013a3b92 100644 --- a/parser.h +++ b/parser.h @@ -336,6 +336,9 @@ private: /** Create a job */ job_t *job_create(const io_chain_t &io); + + /** Adds a job to the beginning of the job list. */ + void job_add(job_t *job); public: std::vector profile_items; @@ -377,6 +380,9 @@ public: int eval(const wcstring &cmd_str, const io_chain_t &io, enum block_type_t block_type); int eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum block_type_t block_type); + /** Evaluates a block node at the given node offset in the topmost execution context */ + int eval_block_node(node_offset_t node_idx, const io_chain_t &io, enum block_type_t block_type); + /** Evaluate line as a list of parameters, i.e. tokenize it and perform parameter expansion and cmdsubst execution on the tokens. The output is inserted into output. diff --git a/proc.h b/proc.h index 0a2949ca2..510f549a2 100644 --- a/proc.h +++ b/proc.h @@ -73,6 +73,10 @@ enum process_type_t A block of commands */ INTERNAL_BLOCK, + + /** A block of commands, represented as a node */ + INTERNAL_BLOCK_NODE, + /** The exec builtin */ From 715823a6665a63df5a7ae6e5a9adcbe287626c1c Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 27 Dec 2013 03:58:42 -0800 Subject: [PATCH 075/108] Bringup of function definitions, switch statements with new parser --- builtin.cpp | 7 ++- builtin.h | 2 +- exec.cpp | 2 + expand.cpp | 2 +- parse_execution.cpp | 124 ++++++++++++++++++++++++++++++++++++++---- parse_productions.cpp | 2 +- parse_productions.h | 2 +- parse_tree.h | 6 +- reader.cpp | 1 + 9 files changed, 129 insertions(+), 19 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 11036e378..215edc596 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -1753,10 +1753,15 @@ static int builtin_pwd(parser_t &parser, wchar_t **argv) } /* This is nearly identical to builtin_function, and is intended to be the successor (with no block manipulation, no function/end split) */ -int define_function(parser_t &parser, const wcstring_list_t &args, const wcstring &contents, wcstring *out_err) +int define_function(parser_t &parser, const wcstring_list_t &c_args, const wcstring &contents, wcstring *out_err) { assert(out_err != NULL); + /* wgetopt expects 'function' as the first argument. Make a new wcstring_list with that property. */ + wcstring_list_t args; + args.push_back(L"function"); + args.insert(args.end(), c_args.begin(), c_args.end()); + /* Hackish const_cast matches the one in builtin_run */ const null_terminated_array_t argv_array(args); wchar_t **argv = const_cast(argv_array.get()); diff --git a/builtin.h b/builtin.h index 9900f856e..7162de235 100644 --- a/builtin.h +++ b/builtin.h @@ -178,7 +178,7 @@ const wchar_t *builtin_complete_get_temporary_buffer(); */ wcstring builtin_help_get(parser_t &parser, const wchar_t *cmd); -/** Defines a function, like builtin_function. Returns 0 on success. */ +/** Defines a function, like builtin_function. Returns 0 on success. args should NOT contain 'function' as the first argument. */ int define_function(parser_t &parser, const wcstring_list_t &args, const wcstring &contents, wcstring *out_err); diff --git a/exec.cpp b/exec.cpp index c1fb54be2..e7277169b 100644 --- a/exec.cpp +++ b/exec.cpp @@ -1556,6 +1556,8 @@ static int exec_subshell_internal(const wcstring &cmd, wcstring_list_t *lst, boo int prev_subshell = is_subshell; const int prev_status = proc_get_last_status(); char sep=0; + + //fprintf(stderr, "subcmd %ls\n", cmd.c_str()); const env_var_t ifs = env_get_string(L"IFS"); diff --git a/expand.cpp b/expand.cpp index f743f8ab6..a73118b89 100644 --- a/expand.cpp +++ b/expand.cpp @@ -1867,7 +1867,7 @@ bool expand_one(wcstring &string, expand_flags_t flags) return true; } - if (expand_string(string, completions, flags)) + if (expand_string(string, completions, flags | EXPAND_NO_DESCRIPTIONS)) { if (completions.size() == 1) { diff --git a/parse_execution.cpp b/parse_execution.cpp index 1a2b16b1f..662506d50 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -5,7 +5,9 @@ */ #include "parse_execution.h" +#include "parse_util.h" #include "complete.h" +#include "wildcard.h" #include "builtin.h" #include "parser.h" #include "expand.h" @@ -65,14 +67,13 @@ int parse_execution_context_t::run_if_statement(const parse_node_t &statement) { assert(if_clause != NULL && else_clause != NULL); const parse_node_t &condition = *get_child(*if_clause, 1, symbol_job); - fprintf(stderr, "run %ls\n", get_source(condition).c_str()); if (run_1_job(condition) == EXIT_SUCCESS) { /* condition succeeded */ job_list_to_execute = get_child(*if_clause, 3, symbol_job_list); break; } - else if (else_clause->child_count > 0) + else if (else_clause->child_count == 0) { /* 'if' condition failed, no else clause, we're done */ job_list_to_execute = NULL; @@ -119,7 +120,6 @@ int parse_execution_context_t::run_begin_statement(const parse_node_t &header, c /* Basic begin/end block. Push a scope block. */ scope_block_t *sb = new scope_block_t(BEGIN); parser->push_block(sb); - parser->current_block()->tok_pos = parser->get_pos(); /* Run the job list */ run_job_list(contents); @@ -138,7 +138,7 @@ int parse_execution_context_t::run_function_statement(const parse_node_t &header /* Get arguments */ const parse_node_t *unmatched_wildcard = NULL; - const wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); + wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); bool errored = false; if (unmatched_wildcard != NULL) @@ -152,6 +152,11 @@ int parse_execution_context_t::run_function_statement(const parse_node_t &header wcstring error_str; int err = define_function(*parser, argument_list, contents_str, &error_str); proc_set_last_status(err); + + if (! error_str.empty()) + { + this->append_error(header, L"%ls", error_str.c_str()); + } } return proc_get_last_status(); } @@ -197,19 +202,17 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con assert(header.type == symbol_for_header); assert(block_contents.type == symbol_job_list); - /* get the variable name: `for var_name in ...` */ + /* Get the variable name: `for var_name in ...` */ const parse_node_t &var_name_node = *get_child(header, 1, parse_token_type_string); const wcstring for_var_name = get_source(var_name_node); - /* get the contents to iterate over */ + /* Get the contents to iterate over. Here we could do something with unmatched_wildcard. However it seems nicer to not make for loops complain about this, i.e. just iterate over a potentially empty list + */ const parse_node_t *unmatched_wildcard = NULL; - wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); - - /* Here we could do something with unmatched_wildcard. However it seems nicer to not make for loops complain about this, i.e. just iterate over a potentially empty list */ + wcstring_list_t argument_list = this->determine_arguments(header, NULL); for_block_t *fb = new for_block_t(for_var_name); parser->push_block(fb); - fb->tok_pos = parser->get_pos(); /* Note that we store the sequence of values in opposite order */ std::reverse(argument_list.begin(), argument_list.end()); @@ -234,6 +237,103 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con int parse_execution_context_t::run_switch_statement(const parse_node_t &statement) { + assert(statement.type == symbol_switch_statement); + bool errored = false; + const parse_node_t *matching_case_item = NULL; + + /* Get the switch variable */ + const parse_node_t &switch_value_node = *get_child(statement, 1, parse_token_type_string); + const wcstring switch_value = get_source(switch_value_node); + + /* Expand it */ + std::vector switch_values_expanded; + int expand_ret = expand_string(switch_value, switch_values_expanded, EXPAND_NO_DESCRIPTIONS); + switch (expand_ret) + { + case EXPAND_ERROR: + { + errored = append_error(switch_value_node, + _(L"Could not expand string '%ls'"), + switch_value.c_str()); + break; + } + + case EXPAND_WILDCARD_NO_MATCH: + { + /* Store the node that failed to expand */ + errored = append_error(switch_value_node, WILDCARD_ERR_MSG, switch_value.c_str()); + break; + } + + case EXPAND_WILDCARD_MATCH: + case EXPAND_OK: + { + break; + } + } + + if (! errored && switch_values_expanded.size() != 1) + { + errored = append_error(switch_value_node, + _(L"switch: Expected exactly one argument, got %lu\n"), + switch_values_expanded.size()); + } + const wcstring &switch_value_expanded = switch_values_expanded.at(0).completion; + + if (! errored) + { + /* Expand case statements */ + const parse_node_t *case_item_list = get_child(statement, 3, symbol_case_item_list); + while (matching_case_item == NULL && case_item_list->child_count > 0) + { + if (case_item_list->production_idx == 2) + { + /* Hackish: blank line */ + case_item_list = get_child(*case_item_list, 1, symbol_case_item_list); + continue; + } + + /* Pull out this case item and the rest of the list */ + const parse_node_t &case_item = *get_child(*case_item_list, 0, symbol_case_item); + + /* Pull out the argument list */ + const parse_node_t &arg_list = *get_child(case_item, 1, symbol_argument_list); + + /* Expand arguments. We explicitly ignore unmatched_wildcard. That is, a case item list may have a wildcard that fails to expand to anything. */ + const wcstring_list_t case_args = this->determine_arguments(arg_list, NULL); + + for (size_t i=0; i < case_args.size(); i++) + { + const wcstring &arg = case_args.at(i); + + /* Unescape wildcards so they can be expanded again */ + wchar_t *unescaped_arg = parse_util_unescape_wildcards(arg.c_str()); + bool match = wildcard_match(switch_value_expanded, unescaped_arg); + free(unescaped_arg); + + /* If this matched, we're done */ + if (match) + { + matching_case_item = &case_item; + break; + } + } + + /* Remainder of the list */ + case_item_list = get_child(*case_item_list, 1, symbol_case_item_list); + } + } + + if (! errored && matching_case_item) + { + /* Success, evaluate the job list */ + const parse_node_t *job_list = get_child(*matching_case_item, 3, symbol_job_list); + this->run_job_list(*job_list); + } + + // Oops, this is stomping STATUS_WILDCARD_ERROR. TODO: Don't! + if (errored) + proc_set_last_status(STATUS_BUILTIN_ERROR); return proc_get_last_status(); } @@ -415,7 +515,7 @@ wcstring_list_t parse_execution_context_t::determine_arguments(const parse_node_ /* Expand this string */ std::vector arg_expanded; - int expand_ret = expand_string(arg_str, arg_expanded, 0); + int expand_ret = expand_string(arg_str, arg_expanded, EXPAND_NO_DESCRIPTIONS); switch (expand_ret) { case EXPAND_ERROR: @@ -709,7 +809,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) } } - /* Increment the eval_level for the duration of this command */ + /* Increment the eval_level for the duration of this command */ scoped_push saved_eval_level(&eval_level, eval_level + 1); /* TODO: blocks-without-redirections optimization */ diff --git a/parse_productions.cpp b/parse_productions.cpp index a4b1de957..24962e534 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -231,7 +231,7 @@ RESOLVE(else_continuation) PRODUCTIONS(switch_statement) = { - { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command} + { KEYWORD(parse_keyword_switch), parse_token_type_string, parse_token_type_end, symbol_case_item_list, symbol_end_command, symbol_arguments_or_redirections_list} }; RESOLVE_ONLY(switch_statement) diff --git a/parse_productions.h b/parse_productions.h index e6c003ab2..df3ae9c96 100644 --- a/parse_productions.h +++ b/parse_productions.h @@ -13,7 +13,7 @@ namespace parse_productions { #define MAX_PRODUCTIONS 5 -#define MAX_SYMBOLS_PER_PRODUCTION 5 +#define MAX_SYMBOLS_PER_PRODUCTION 6 typedef uint32_t production_tag_t; diff --git a/parse_tree.h b/parse_tree.h index 91cab4262..266372e08 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -238,9 +238,11 @@ public: else_continuation = if_clause else_clause | STATEMENT_TERMINATOR job_list - switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list end_command + switch_statement = SWITCH STATEMENT_TERMINATOR case_item_list end_command arguments_or_redirections_list case_item_list = | - case_item case_item_list + case_item case_item_list | + case_item_list + case_item = CASE argument_list STATEMENT_TERMINATOR job_list block_statement = block_header job_list end_command arguments_or_redirections_list diff --git a/reader.cpp b/reader.cpp index 5c5b094d9..0905b3792 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2990,6 +2990,7 @@ const wchar_t *reader_readline(void) is_interactive_read = 1; c=input_readch(); is_interactive_read = was_interactive_read; + //fprintf(stderr, "C: %lx\n", (long)c); if (((!wchar_private(c))) && (c>31) && (c != 127)) { From 0f9de11a67a5bbf3fe0c3be7f55cb25a7987ed4d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 28 Dec 2013 16:18:38 -0800 Subject: [PATCH 076/108] Fix issues related to redirections and block level IO with new parser --- exec.cpp | 3 +- parse_execution.cpp | 89 +++++++++++++++++++++++++++++++-------------- parse_execution.h | 15 ++++---- parse_tree.h | 1 - parser.cpp | 11 +++--- tokenizer.cpp | 17 +++++++++ tokenizer.h | 3 ++ 7 files changed, 97 insertions(+), 42 deletions(-) diff --git a/exec.cpp b/exec.cpp index e7277169b..e150723db 100644 --- a/exec.cpp +++ b/exec.cpp @@ -820,7 +820,6 @@ void exec_job(parser_t &parser, job_t *j) { pipe_write.reset(new io_pipe_t(p->pipe_write_fd, false)); process_net_io_chain.push_back(pipe_write); - } /* The explicit IO redirections associated with the process */ @@ -1156,7 +1155,7 @@ void exec_job(parser_t &parser, job_t *j) No buffer, so we exit directly. This means we have to manually set the exit status. */ - if (p->next == 0) + if (p->next == NULL) { proc_set_last_status(job_get_flag(j, JOB_NEGATE)?(!status):status); } diff --git a/parse_execution.cpp b/parse_execution.cpp index 662506d50..72d4452cf 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -16,7 +16,7 @@ #include "path.h" -parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, const io_chain_t &io, parser_t *p) : tree(t), src(s), block_io(io), parser(p), eval_level(0) +parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, parser_t *p) : tree(t), src(s), parser(p), eval_level(0) { } @@ -45,9 +45,9 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co } -bool parse_execution_context_t::should_cancel() const +bool parse_execution_context_t::should_cancel_execution(const block_t *block) const { - return false; + return block && block->skip; } int parse_execution_context_t::run_if_statement(const parse_node_t &statement) @@ -63,11 +63,11 @@ int parse_execution_context_t::run_if_statement(const parse_node_t &statement) const parse_node_t *job_list_to_execute = NULL; const parse_node_t *if_clause = get_child(statement, 0, symbol_if_clause); const parse_node_t *else_clause = get_child(statement, 1, symbol_else_clause); - for (;;) + while (! should_cancel_execution(ib)) { assert(if_clause != NULL && else_clause != NULL); const parse_node_t &condition = *get_child(*if_clause, 1, symbol_job); - if (run_1_job(condition) == EXIT_SUCCESS) + if (run_1_job(condition, ib) == EXIT_SUCCESS) { /* condition succeeded */ job_list_to_execute = get_child(*if_clause, 3, symbol_job_list); @@ -103,7 +103,7 @@ int parse_execution_context_t::run_if_statement(const parse_node_t &statement) /* Execute any job list we got */ if (job_list_to_execute != NULL) { - run_job_list(*job_list_to_execute); + run_job_list(*job_list_to_execute, ib); } /* Done */ @@ -122,7 +122,7 @@ int parse_execution_context_t::run_begin_statement(const parse_node_t &header, c parser->push_block(sb); /* Run the job list */ - run_job_list(contents); + run_job_list(contents, sb); /* Pop the block */ parser->pop_block(sb); @@ -208,7 +208,6 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con /* Get the contents to iterate over. Here we could do something with unmatched_wildcard. However it seems nicer to not make for loops complain about this, i.e. just iterate over a potentially empty list */ - const parse_node_t *unmatched_wildcard = NULL; wcstring_list_t argument_list = this->determine_arguments(header, NULL); for_block_t *fb = new for_block_t(for_var_name); @@ -219,7 +218,7 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con fb->sequence = argument_list; /* Now drive the for loop. TODO: handle break, etc. */ - while (! fb->sequence.empty()) + while (! fb->sequence.empty() && ! should_cancel_execution(fb)) { const wcstring &for_variable = fb->variable; const wcstring &val = fb->sequence.back(); @@ -228,7 +227,7 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con fb->loop_status = LOOP_NORMAL; fb->skip = 0; - this->run_job_list(block_contents); + this->run_job_list(block_contents, fb); } return proc_get_last_status(); @@ -280,11 +279,14 @@ int parse_execution_context_t::run_switch_statement(const parse_node_t &statemen } const wcstring &switch_value_expanded = switch_values_expanded.at(0).completion; + switch_block_t *sb = new switch_block_t(switch_value_expanded); + parser->push_block(sb); + if (! errored) { /* Expand case statements */ const parse_node_t *case_item_list = get_child(statement, 3, symbol_case_item_list); - while (matching_case_item == NULL && case_item_list->child_count > 0) + while (matching_case_item == NULL && case_item_list->child_count > 0 && ! should_cancel_execution(sb)) { if (case_item_list->production_idx == 2) { @@ -328,34 +330,35 @@ int parse_execution_context_t::run_switch_statement(const parse_node_t &statemen { /* Success, evaluate the job list */ const parse_node_t *job_list = get_child(*matching_case_item, 3, symbol_job_list); - this->run_job_list(*job_list); + this->run_job_list(*job_list, sb); } + parser->pop_block(sb); + // Oops, this is stomping STATUS_WILDCARD_ERROR. TODO: Don't! if (errored) proc_set_last_status(STATUS_BUILTIN_ERROR); return proc_get_last_status(); } -int parse_execution_context_t::run_while_statement(const parse_node_t &header, const parse_node_t &statement) +int parse_execution_context_t::run_while_statement(const parse_node_t &header, const parse_node_t &block_contents) { assert(header.type == symbol_while_header); - assert(statement.type == symbol_block_statement); + assert(block_contents.type == symbol_job_list); /* Push a while block */ while_block_t *wb = new while_block_t(); wb->status = WHILE_TEST_FIRST; - wb->node_offset = this->get_offset(statement); + wb->node_offset = this->get_offset(header); parser->push_block(wb); /* The condition and contents of the while loop, as a job and job list respectively */ const parse_node_t &while_condition = *get_child(header, 1, symbol_job); - const parse_node_t &block_contents = *get_child(statement, 2, symbol_job_list); /* A while loop is a while loop! */ - while (! this->should_cancel() && this->run_1_job(while_condition) == EXIT_SUCCESS) + while (! this->should_cancel_execution(wb) && this->run_1_job(while_condition, wb) == EXIT_SUCCESS) { - this->run_job_list(block_contents); + this->run_job_list(block_contents, wb); } /* Done */ @@ -388,6 +391,8 @@ bool parse_execution_context_t::append_unmatched_wildcard_error(const parse_node return append_error(unmatched_wildcard, WILDCARD_ERR_MSG, get_source(unmatched_wildcard).c_str()); } + + /* Creates a 'normal' (non-block) process */ process_t *parse_execution_context_t::create_plain_process(job_t *job, const parse_node_t &statement) { @@ -565,13 +570,16 @@ wcstring_list_t parse_execution_context_t::determine_arguments(const parse_node_ return argument_list; } -bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain) +bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement_node, io_chain_t *out_chain) { io_chain_t result; bool errored = false; + /* We are called with a statement of varying types. We require that the statement have an arguments_or_redirections_list child. */ + const parse_node_t &args_and_redirections_list = tree.find_child(statement_node, symbol_arguments_or_redirections_list); + /* Get all redirection nodes underneath the statement */ - const parse_node_tree_t::parse_node_list_t redirect_nodes = tree.find_nodes(statement, symbol_redirection); + const parse_node_tree_t::parse_node_list_t redirect_nodes = tree.find_nodes(args_and_redirections_list, symbol_redirection); for (size_t i=0; i < redirect_nodes.size(); i++) { const parse_node_t &redirect_node = *redirect_nodes.at(i); @@ -699,9 +707,17 @@ process_t *parse_execution_context_t::create_block_process(job_t *job, const par { /* We handle block statements by creating INTERNAL_BLOCK_NODE, that will bounce back to us when it's time to execute them */ assert(statement_node.type == symbol_block_statement || statement_node.type == symbol_if_statement || statement_node.type == symbol_switch_statement); + + /* The set of IO redirections that we construct for the process */ + io_chain_t process_io_chain; + bool errored = ! this->determine_io_chain(statement_node, &process_io_chain); + if (errored) + return NULL; + process_t *result = new process_t(); result->type = INTERNAL_BLOCK_NODE; result->internal_block_node = this->get_offset(statement_node); + result->set_io_chain(process_io_chain); return result; } @@ -777,6 +793,10 @@ bool parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse { assert(job_cont->type == symbol_job_continuation); + /* Handle the pipe */ + const parse_node_t &pipe_node = *get_child(*job_cont, 0, parse_token_type_pipe); + last_process->pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); + /* Get the statement node and make a process from it */ const parse_node_t *statement_node = get_child(*job_cont, 1, symbol_statement); assert(statement_node != NULL); @@ -795,8 +815,20 @@ bool parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse return ! process_errored; } -int parse_execution_context_t::run_1_job(const parse_node_t &job_node) +int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const block_t *associated_block) { + bool log_it = false; + if (log_it) + { + fprintf(stderr, "%s: %ls\n", __FUNCTION__, get_source(job_node).c_str()); + } + + + if (should_cancel_execution(associated_block)) + { + return 1; + } + // Get terminal modes struct termios tmodes = {}; if (get_is_interactive()) @@ -913,13 +945,13 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node) return ret; } -int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) +int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node, const block_t *associated_block) { assert(job_list_node.type == symbol_job_list); int result = 1; const parse_node_t *job_list = &job_list_node; - while (job_list != NULL) + while (job_list != NULL && ! should_cancel_execution(associated_block)) { assert(job_list->type == symbol_job_list); @@ -949,7 +981,7 @@ int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) if (job != NULL) { - result = this->run_1_job(*job); + result = this->run_1_job(*job, associated_block); } } @@ -957,7 +989,7 @@ int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node) return result; } -int parse_execution_context_t::eval_node_at_offset(node_offset_t offset) +int parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io) { bool log_it = false; @@ -965,6 +997,9 @@ int parse_execution_context_t::eval_node_at_offset(node_offset_t offset) assert(! tree.empty()); assert(offset < tree.size()); + /* Apply this block IO for the duration of this function */ + scoped_push block_io_push(&block_io, io); + const parse_node_t &node = tree.at(offset); if (log_it) @@ -982,9 +1017,9 @@ int parse_execution_context_t::eval_node_at_offset(node_offset_t offset) switch (node.type) { case symbol_job_list: - /* We should only get a job list if it's top level. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ + /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ assert(offset == 0); - ret = this->run_job_list(node); + ret = this->run_job_list(node, associated_block); break; case symbol_block_statement: diff --git a/parse_execution.h b/parse_execution.h index f465a5934..901f79811 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -13,13 +13,14 @@ class job_t; struct profile_item_t; +struct block_t; class parse_execution_context_t { private: const parse_node_tree_t tree; const wcstring src; - const io_chain_t block_io; + io_chain_t block_io; parser_t * const parser; parse_error_list_t errors; @@ -30,8 +31,8 @@ class parse_execution_context_t parse_execution_context_t(const parse_execution_context_t&); parse_execution_context_t& operator=(const parse_execution_context_t&); - /* Should I cancel */ - bool should_cancel() const; + /* Should I cancel? */ + bool should_cancel_execution(const block_t *block) const; /* Report an error. Always returns true. */ bool append_error(const parse_node_t &node, const wchar_t *fmt, ...); @@ -63,15 +64,15 @@ class parse_execution_context_t /* Determines the IO chain. Returns true on success, false on error */ bool determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain); - int run_1_job(const parse_node_t &job_node); - int run_job_list(const parse_node_t &job_list_node); + int run_1_job(const parse_node_t &job_node, const block_t *associated_block); + int run_job_list(const parse_node_t &job_list_node, const block_t *associated_block); bool populate_job_from_job_node(job_t *j, const parse_node_t &job_node); public: - parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, const io_chain_t &io, parser_t *p); + parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, parser_t *p); /* Start executing at the given node offset, returning the exit status of the last process. */ - int eval_node_at_offset(node_offset_t offset); + int eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io); }; diff --git a/parse_tree.h b/parse_tree.h index 266372e08..aa7a0d984 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -259,7 +259,6 @@ public: boolean_statement = AND statement | OR statement | NOT statement # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" -# TODO: we should be able to construct plain_statements out of e.g. 'command --help' or even just 'command' decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement plain_statement = arguments_or_redirections_list optional_background diff --git a/parser.cpp b/parser.cpp index 1f331dc53..c3b9578a6 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2610,14 +2610,14 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl } /* Append to the execution context stack */ - parse_execution_context_t *ctx = new parse_execution_context_t(tree, cmd, io, this); + parse_execution_context_t *ctx = new parse_execution_context_t(tree, cmd, this); execution_contexts.push_back(ctx); /* Execute the first node */ int result = 1; if (! tree.empty()) { - result = this->eval_block_node(0, io_chain_t(), block_type); + result = this->eval_block_node(0, io, block_type); } /* Clean up the execution context stack */ @@ -2652,8 +2652,9 @@ int parser_t::eval_block_node(node_offset_t node_idx, const io_chain_t &io, enum /* Start it up */ const block_t * const start_current_block = current_block(); - this->push_block(new scope_block_t(block_type)); - int result = ctx->eval_node_at_offset(node_idx); + block_t *scope_block = new scope_block_t(block_type); + this->push_block(scope_block); + int result = ctx->eval_node_at_offset(node_idx, scope_block, io); /* Clean up the block stack */ this->pop_block(); @@ -3138,7 +3139,7 @@ bool parser_use_ast(void) env_var_t var = env_get_string(L"fish_new_parser"); if (var.missing_or_empty()) { - return false; + return 10; } else { diff --git a/tokenizer.cpp b/tokenizer.cpp index 0b0032836..4e2b402c6 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -535,6 +535,23 @@ enum token_type redirection_type_for_string(const wcstring &str, int *out_fd) return mode; } +int fd_redirected_by_pipe(const wcstring &str) +{ + /* Hack for the common case */ + if (str == L"|") + { + return STDOUT_FILENO; + } + + enum token_type mode = TOK_NONE; + int fd = 0; + read_redirection_or_fd_pipe(str.c_str(), &mode, &fd); + /* Pipes only */ + if (mode != TOK_PIPE || fd < 0) + fd = -1; + return fd; +} + int oflags_for_redirection_type(enum token_type type) { switch (type) diff --git a/tokenizer.h b/tokenizer.h index 17b1bcb96..c50aac99f 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -190,6 +190,9 @@ int tok_get_error(tokenizer_t *tok); /* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */ enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL); +/* Helper function to determine which fd is redirected by a pipe */ +int fd_redirected_by_pipe(const wcstring &str); + /* Helper function to return oflags (as in open(2)) for a redirection type */ int oflags_for_redirection_type(enum token_type type); From c632307eaa4fdd8ac09bb1a9bf031101b1e0b6a2 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 28 Dec 2013 16:33:26 -0800 Subject: [PATCH 077/108] Make eval_node_at_offset return an error indication instead of the exit status of the last command --- parse_execution.cpp | 16 +++++++++++----- parse_execution.h | 2 +- parser.cpp | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index 72d4452cf..4201c288b 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -1013,25 +1013,25 @@ int parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const b node.type == symbol_if_statement || node.type == symbol_switch_statement); - int ret = 1; + int status = 1; switch (node.type) { case symbol_job_list: /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ assert(offset == 0); - ret = this->run_job_list(node, associated_block); + status = this->run_job_list(node, associated_block); break; case symbol_block_statement: - ret = this->run_block_statement(node); + status = this->run_block_statement(node); break; case symbol_if_statement: - ret = this->run_if_statement(node); + status = this->run_if_statement(node); break; case symbol_switch_statement: - ret = this->run_switch_statement(node); + status = this->run_switch_statement(node); break; default: @@ -1040,5 +1040,11 @@ int parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const b PARSER_DIE(); break; } + + proc_set_last_status(status); + + /* Argh */ + int ret = errors.empty() ? 0 : 1; + errors.clear(); return ret; } diff --git a/parse_execution.h b/parse_execution.h index 901f79811..8d89158bd 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -71,7 +71,7 @@ class parse_execution_context_t public: parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, parser_t *p); - /* Start executing at the given node offset, returning the exit status of the last process. */ + /* Start executing at the given node offset. Returns 0 if there was no error, 1 if there was an error */ int eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io); }; diff --git a/parser.cpp b/parser.cpp index c3b9578a6..c60527814 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2625,7 +2625,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl execution_contexts.pop_back(); delete ctx; - return result; + return 0; } int parser_t::eval_block_node(node_offset_t node_idx, const io_chain_t &io, enum block_type_t block_type) From a42711e31cdb41e3c504ed161c07e56698d29e7a Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 28 Dec 2013 22:52:06 -0800 Subject: [PATCH 078/108] Support for break/continue with new parser execution --- parse_execution.cpp | 31 +++++++++++++++++++++++++++++-- parser.cpp | 7 +++++++ tests/test9.in | 34 ++++++++++++++++++++++++++++++++++ tests/test9.out | 6 ++++++ 4 files changed, 76 insertions(+), 2 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index 4201c288b..137b7e001 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -47,7 +47,7 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co bool parse_execution_context_t::should_cancel_execution(const block_t *block) const { - return block && block->skip; + return block && (block->skip || block->loop_status != LOOP_NORMAL); } int parse_execution_context_t::run_if_statement(const parse_node_t &statement) @@ -228,6 +228,19 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con fb->skip = 0; this->run_job_list(block_contents, fb); + + /* Handle break or continue */ + if (fb->loop_status == LOOP_CONTINUE) + { + /* Reset the loop state */ + fb->loop_status = LOOP_NORMAL; + fb->skip = false; + continue; + } + else if (fb->loop_status == LOOP_BREAK) + { + break; + } } return proc_get_last_status(); @@ -358,7 +371,21 @@ int parse_execution_context_t::run_while_statement(const parse_node_t &header, c /* A while loop is a while loop! */ while (! this->should_cancel_execution(wb) && this->run_1_job(while_condition, wb) == EXIT_SUCCESS) { + /* The block ought to go inside the loop (see #1212) */ this->run_job_list(block_contents, wb); + + /* Handle break or continue */ + if (wb->loop_status == LOOP_CONTINUE) + { + /* Reset the loop state */ + wb->loop_status = LOOP_NORMAL; + wb->skip = false; + continue; + } + else if (wb->loop_status == LOOP_BREAK) + { + break; + } } /* Done */ @@ -562,7 +589,7 @@ wcstring_list_t parse_execution_context_t::determine_arguments(const parse_node_ } /* Return if we had a wildcard problem */ - if (unmatched_wildcard && ! matched_wildcard) + if (out_unmatched_wildcard_node != NULL && unmatched_wildcard && ! matched_wildcard) { *out_unmatched_wildcard_node = unmatched_wildcard_node; } diff --git a/parser.cpp b/parser.cpp index c60527814..bd0471df0 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2602,6 +2602,13 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl { CHECK_BLOCK(1); + if (block_type != TOP && block_type != SUBST) + { + debug(1, INVALID_SCOPE_ERR_MSG, parser_t::get_block_desc(block_type)); + bugreport(); + return 1; + } + /* Parse the source into a tree, if we can */ parse_node_tree_t tree; if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL)) diff --git a/tests/test9.in b/tests/test9.in index a38fbc7c1..a16281f10 100644 --- a/tests/test9.in +++ b/tests/test9.in @@ -35,3 +35,37 @@ emit test3 foo bar # test empty argument emit + +echo "Test break and continue" +# This should output Ping once +for i in a b c + if not contains $i c ; continue ; end + echo Ping +end + +# This should output Pong not at all +for i in a b c + if not contains $i c ; break ; end + echo Pong +end + +# This should output Foop three times, and Boop not at all +set i a a a +while contains $i a + set -e i[-1] + echo Foop + continue + echo Boop +end + +# This should output Doop once +set i a a a +while contains $i a + set -e i[-1] + echo Doop + break + echo Darp +end + + +false diff --git a/tests/test9.out b/tests/test9.out index 8e19365cd..cf9054f8c 100644 --- a/tests/test9.out +++ b/tests/test9.out @@ -2,3 +2,9 @@ Testing that builtins can truncate files abc before:test1 received event test3 with args: foo bar +Test break and continue +Ping +Foop +Foop +Foop +Doop From a9787b769fce4327be5db4f361fb47208d4f79d1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 29 Dec 2013 16:23:26 -0800 Subject: [PATCH 079/108] Support for implicit cd, no-exec, and the exit builtin. All tests now pass (!). Error reporting still unsteady. --- exec.cpp | 6 + parse_execution.cpp | 275 ++++++++++++++++++++++++++++++++++++-------- parse_execution.h | 12 ++ parser.cpp | 4 +- reader.cpp | 2 +- reader.h | 2 +- tests/test9.in | 2 + 7 files changed, 250 insertions(+), 53 deletions(-) diff --git a/exec.cpp b/exec.cpp index e150723db..594a53857 100644 --- a/exec.cpp +++ b/exec.cpp @@ -577,6 +577,12 @@ static bool can_use_posix_spawn_for_job(const job_t *job, const process_t *proce /* What exec does if no_exec is set. This only has to handle block pushing and popping. See #624. */ static void exec_no_exec(parser_t &parser, const job_t *job) { + if (parser_use_ast()) + { + /* With the new parser, commands aren't responsible for pushing / popping blocks, so there's nothing to do */ + return; + } + /* Hack hack hack. If this is an 'end' job, then trigger a pop. If this is a job that would create a block, trigger a push. See #624 */ const process_t *p = job->first_process; if (p && p->type == INTERNAL_BUILTIN) diff --git a/parse_execution.cpp b/parse_execution.cpp index 137b7e001..0733fb8bb 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -11,6 +11,7 @@ #include "builtin.h" #include "parser.h" #include "expand.h" +#include "reader.h" #include "wutil.h" #include "exec.h" #include "path.h" @@ -47,7 +48,28 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co bool parse_execution_context_t::should_cancel_execution(const block_t *block) const { - return block && (block->skip || block->loop_status != LOOP_NORMAL); + return cancellation_reason(block) != execution_cancellation_none; +} + +parse_execution_context_t::execution_cancellation_reason_t parse_execution_context_t::cancellation_reason(const block_t *block) const +{ + if (shell_is_exiting()) + { + return execution_cancellation_exit; + } + else if (block && block->loop_status != LOOP_NORMAL) + { + /* Nasty hack - break and continue set the 'skip' flag as well as the loop status flag. */ + return execution_cancellation_loop_control; + } + else if (block && block->skip) + { + return execution_cancellation_skip; + } + else + { + return execution_cancellation_none; + } } int parse_execution_context_t::run_if_statement(const parse_node_t &statement) @@ -229,17 +251,20 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con this->run_job_list(block_contents, fb); - /* Handle break or continue */ - if (fb->loop_status == LOOP_CONTINUE) + if (this->cancellation_reason(fb) == execution_cancellation_loop_control) { - /* Reset the loop state */ - fb->loop_status = LOOP_NORMAL; - fb->skip = false; - continue; - } - else if (fb->loop_status == LOOP_BREAK) - { - break; + /* Handle break or continue */ + if (fb->loop_status == LOOP_CONTINUE) + { + /* Reset the loop state */ + fb->loop_status = LOOP_NORMAL; + fb->skip = false; + continue; + } + else if (fb->loop_status == LOOP_BREAK) + { + break; + } } } @@ -374,17 +399,20 @@ int parse_execution_context_t::run_while_statement(const parse_node_t &header, c /* The block ought to go inside the loop (see #1212) */ this->run_job_list(block_contents, wb); - /* Handle break or continue */ - if (wb->loop_status == LOOP_CONTINUE) + if (this->cancellation_reason(wb) == execution_cancellation_loop_control) { - /* Reset the loop state */ - wb->loop_status = LOOP_NORMAL; - wb->skip = false; - continue; - } - else if (wb->loop_status == LOOP_BREAK) - { - break; + /* Handle break or continue */ + if (wb->loop_status == LOOP_CONTINUE) + { + /* Reset the loop state */ + wb->loop_status = LOOP_NORMAL; + wb->skip = false; + continue; + } + else if (wb->loop_status == LOOP_BREAK) + { + break; + } } } @@ -418,15 +446,129 @@ bool parse_execution_context_t::append_unmatched_wildcard_error(const parse_node return append_error(unmatched_wildcard, WILDCARD_ERR_MSG, get_source(unmatched_wildcard).c_str()); } +/* Handle the case of command not found */ +void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str, const parse_node_t &statement_node, int err_code) +{ + assert(statement_node.type == symbol_plain_statement); + + /* + We couldn't find the specified command. + What we want to happen now is that the + specified job won't get executed, and an + error message is printed on-screen, but + otherwise, the parsing/execution of the + file continues. Because of this, we don't + want to call error(), since that would stop + execution of the file. Instead we let + p->actual_command be 0 (null), which will + cause the job to silently not execute. We + also print an error message and set the + status to 127 (This is the standard number + for this, used by other shells like bash + and zsh). + */ + + const wchar_t * const cmd = cmd_str.c_str(); + const wchar_t * const equals_ptr = wcschr(cmd, L'='); + if (equals_ptr != NULL) + { + /* Try to figure out if this is a pure variable assignment (foo=bar), or if this appears to be running a command (foo=bar ruby...) */ + + const wcstring name_str = wcstring(cmd, equals_ptr - cmd); //variable name, up to the = + const wcstring val_str = wcstring(equals_ptr + 1); //variable value, past the = + + + const parse_node_tree_t::parse_node_list_t args = tree.find_nodes(statement_node, symbol_argument, 1); + + if (! args.empty()) + { + const wcstring argument = get_source(*args.at(0)); + + wcstring ellipsis_str = wcstring(1, ellipsis_char); + if (ellipsis_str == L"$") + ellipsis_str = L"..."; + + /* Looks like a command */ + debug(0, + _(L"Unknown command '%ls'. Did you mean to run %ls with a modified environment? Try 'env %ls=%ls %ls%ls'. See the help section on the set command by typing 'help set'."), + cmd, + argument.c_str(), + name_str.c_str(), + val_str.c_str(), + argument.c_str(), + ellipsis_str.c_str()); + } + else + { + debug(0, + COMMAND_ASSIGN_ERR_MSG, + cmd, + name_str.c_str(), + val_str.c_str()); + } + } + else if (cmd[0]==L'$' || cmd[0] == VARIABLE_EXPAND || cmd[0] == VARIABLE_EXPAND_SINGLE) + { + + const env_var_t val_wstr = env_get_string(cmd+1); + const wchar_t *val = val_wstr.missing() ? NULL : val_wstr.c_str(); + if (val) + { + debug(0, + _(L"Variables may not be used as commands. Instead, define a function like 'function %ls; %ls $argv; end' or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), + cmd+1, + val, + cmd, + cmd); + } + else + { + debug(0, + _(L"Variables may not be used as commands. Instead, define a function or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), + cmd, + cmd); + } + } + else if (wcschr(cmd, L'$')) + { + debug(0, + _(L"Commands may not contain variables. Use the eval builtin instead, like 'eval %ls'. See the help section for the eval command by typing 'help eval'."), + cmd, + cmd); + } + else if (err_code!=ENOENT) + { + debug(0, + _(L"The file '%ls' is not executable by this user"), + cmd?cmd:L"UNKNOWN"); + } + else + { + /* + Handle unrecognized commands with standard + command not found handler that can make better + error messages + */ + + wcstring_list_t event_args; + event_args.push_back(cmd_str); + event_fire_generic(L"fish_command_not_found", &event_args); + } + + /* Set the last proc status appropriately */ + proc_set_last_status(err_code==ENOENT?STATUS_UNKNOWN_COMMAND:STATUS_NOT_EXECUTABLE); +} /* Creates a 'normal' (non-block) process */ process_t *parse_execution_context_t::create_plain_process(job_t *job, const parse_node_t &statement) { + assert(statement.type == symbol_plain_statement); + bool errored = false; - /* Get the decoration */ - assert(statement.type == symbol_plain_statement); + /* We may decide that a command should be an implicit cd */ + bool use_implicit_cd = false; /* Get the command. We expect to always get it here. */ wcstring cmd; @@ -442,28 +584,7 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par if (errored) return NULL; - - /* The list of arguments. The command is the first argument. TODO: count hack */ - const parse_node_t *unmatched_wildcard = NULL; - wcstring_list_t argument_list = this->determine_arguments(statement, &unmatched_wildcard); - argument_list.insert(argument_list.begin(), cmd); - - /* If we were not able to expand any wildcards, here is the first one that failed */ - if (unmatched_wildcard != NULL) - { - job_set_flag(job, JOB_WILDCARD_ERROR, 1); - errored = append_unmatched_wildcard_error(*unmatched_wildcard); - } - - if (errored) - return NULL; - - /* The set of IO redirections that we construct for the process */ - io_chain_t process_io_chain; - errored = ! this->determine_io_chain(statement, &process_io_chain); - if (errored) - return NULL; - + /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ enum parse_statement_decoration_t decoration = tree.decoration_for_plain_statement(statement); enum process_type_t process_type = EXTERNAL; @@ -500,15 +621,71 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par wcstring actual_cmd; if (process_type == EXTERNAL) { - /* Determine the actual command. Need to support implicit cd here */ + /* Determine the actual command. This may be an implicit cd. */ bool has_command = path_get_path(cmd, &actual_cmd); - if (! has_command) + /* If there was no command, then we care about the value of errno after checking for it, to distinguish between e.g. no file vs permissions problem */ + const int no_cmd_err_code = errno; + + /* If the specified command does not exist, and is undecorated, try using an implicit cd. */ + if (! has_command && decoration == parse_statement_decoration_none) { - /* TODO: support fish_command_not_found, implicit cd, etc. here */ + /* Implicit cd requires an empty argument and redirection list */ + const parse_node_t *args = get_child(statement, 1, symbol_arguments_or_redirections_list); + if (args->child_count == 0) + { + /* Ok, no arguments or redirections; check to see if the first argument is a directory */ + wcstring implicit_cd_path; + use_implicit_cd = path_can_be_implicit_cd(cmd, &implicit_cd_path); + } + } + + if (! has_command && ! use_implicit_cd) + { + /* No command */ + this->handle_command_not_found(cmd, statement, no_cmd_err_code); errored = true; } } + if (errored) + return NULL; + + /* The argument list and set of IO redirections that we will construct for the process */ + wcstring_list_t argument_list; + io_chain_t process_io_chain; + if (use_implicit_cd) + { + /* Implicit cd is simple */ + argument_list.push_back(L"cd"); + argument_list.push_back(cmd); + actual_cmd.clear(); + + /* If we have defined a wrapper around cd, use it, otherwise use the cd builtin */ + process_type = function_exists(L"cd") ? INTERNAL_FUNCTION : INTERNAL_BUILTIN; + } + else + { + /* Form the list of arguments. The command is the first argument. TODO: count hack */ + const parse_node_t *unmatched_wildcard = NULL; + argument_list = this->determine_arguments(statement, &unmatched_wildcard); + argument_list.insert(argument_list.begin(), cmd); + + /* If we were not able to expand any wildcards, here is the first one that failed */ + if (unmatched_wildcard != NULL) + { + job_set_flag(job, JOB_WILDCARD_ERROR, 1); + errored = append_unmatched_wildcard_error(*unmatched_wildcard); + } + + if (errored) + return NULL; + + /* The set of IO redirections that we construct for the process */ + errored = ! this->determine_io_chain(statement, &process_io_chain); + if (errored) + return NULL; + } + /* Return the process, or NULL on error */ process_t *result = NULL; @@ -953,7 +1130,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const blo profile_item->skipped = process_errored; } - /* Set the last status to 1 if the job could not be executed */ + /* Set the last status to 1 if the job could not be executed. TODO: Don't stomp STATUS_UNKNOWN_COMMAND / STATUS_NOT_EXECUTABLE */ if (process_errored) proc_set_last_status(1); const int ret = proc_get_last_status(); diff --git a/parse_execution.h b/parse_execution.h index 8d89158bd..f68cad5fd 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -34,11 +34,23 @@ class parse_execution_context_t /* Should I cancel? */ bool should_cancel_execution(const block_t *block) const; + /* Ways that we can stop executing a block. These are in a sort of ascending order of importance, e.g. `exit` should trump `break` */ + enum execution_cancellation_reason_t + { + execution_cancellation_none, + execution_cancellation_loop_control, + execution_cancellation_skip, + execution_cancellation_exit + }; + execution_cancellation_reason_t cancellation_reason(const block_t *block) const; + /* Report an error. Always returns true. */ bool append_error(const parse_node_t &node, const wchar_t *fmt, ...); /* Wildcard error helper */ bool append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); + void handle_command_not_found(const wcstring &cmd, const parse_node_t &statement_node, int err_code); + /* Utilities */ wcstring get_source(const parse_node_t &node) const; const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; diff --git a/parser.cpp b/parser.cpp index bd0471df0..6b7d19091 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2740,7 +2740,7 @@ int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_typ while (tok_has_next(current_tokenizer) && !error_code && !sanity_check() && - !exit_status()) + !shell_is_exiting()) { this->eval_job(current_tokenizer); event_fire(NULL); @@ -2759,7 +2759,7 @@ int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_typ break; } - if ((!error_code) && (!exit_status()) && (!proc_get_last_status())) + if ((!error_code) && (!shell_is_exiting()) && (!proc_get_last_status())) { //debug( 2, L"Status %d\n", proc_get_last_status() ); diff --git a/reader.cpp b/reader.cpp index 0905b3792..f5ae62f48 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2736,7 +2736,7 @@ static void reader_super_highlight_me_plenty(size_t match_highlight_pos) } -int exit_status() +bool shell_is_exiting() { if (get_is_interactive()) return job_list_is_empty() && data->end_loop; diff --git a/reader.h b/reader.h index b954c1bea..e028e2f03 100644 --- a/reader.h +++ b/reader.h @@ -217,7 +217,7 @@ void reader_set_exit_on_interrupt(bool flag); /** Returns true if the shell is exiting, 0 otherwise. */ -int exit_status(); +bool shell_is_exiting(); /** The readers interrupt signal handler. Cancels all currently running blocks. diff --git a/tests/test9.in b/tests/test9.in index a16281f10..e449a21dd 100644 --- a/tests/test9.in +++ b/tests/test9.in @@ -67,5 +67,7 @@ while contains $i a echo Darp end +# Test implicit cd. This should do nothing. +./ false From 7356a0f6c86239de19195ae13c035544448eeca5 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 31 Dec 2013 14:37:37 -0800 Subject: [PATCH 080/108] Clean up and rationalize error handling in parse_execution.cpp --- parse_constants.h | 2 +- parse_execution.cpp | 542 ++++++++++++++++++++++++-------------------- parse_execution.h | 57 +++-- parser.cpp | 15 -- proc.cpp | 6 + 5 files changed, 342 insertions(+), 280 deletions(-) diff --git a/parse_constants.h b/parse_constants.h index b59f52e98..e923bc10e 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -107,7 +107,7 @@ enum parse_error_code_t { parse_error_none, - /* matching values from enum parser_error */ + /* Matching values from enum parser_error */ parse_error_syntax, parse_error_eval, parse_error_cmdsubst, diff --git a/parse_execution.cpp b/parse_execution.cpp index 0733fb8bb..38b543c46 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -1,7 +1,10 @@ /**\file parse_execution.cpp - Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.). - + Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.) + + A note on error handling: fish has two kind of errors, fatal parse errors non-fatal runtime errors. A fatal error prevents execution of the entire file, while a non-fatal error skips that job. + + Non-fatal errors are printed as soon as they are encountered; otherwise you would have to wait for the execution to finish to see them. */ #include "parse_execution.h" @@ -45,6 +48,44 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co return offset; } +enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const +{ + assert(plain_statement.type == symbol_plain_statement); + enum process_type_t process_type = EXTERNAL; + + /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ + enum parse_statement_decoration_t decoration = tree.decoration_for_plain_statement(plain_statement); + + /* Do the "exec hack" */ + if (decoration != parse_statement_decoration_command && cmd == L"exec") + { + /* Either 'builtin exec' or just plain 'exec', and definitely not 'command exec'. Note we don't allow overriding exec with a function. */ + process_type = INTERNAL_EXEC; + } + else if (decoration == parse_statement_decoration_command) + { + /* Always a command */ + process_type = EXTERNAL; + } + else if (decoration == parse_statement_decoration_builtin) + { + /* What happens if this builtin is not valid? */ + process_type = INTERNAL_BUILTIN; + } + else if (function_exists(cmd)) + { + process_type = INTERNAL_FUNCTION; + } + else if (builtin_exists(cmd)) + { + process_type = INTERNAL_BUILTIN; + } + else + { + process_type = EXTERNAL; + } + return process_type; +} bool parse_execution_context_t::should_cancel_execution(const block_t *block) const { @@ -72,7 +113,7 @@ parse_execution_context_t::execution_cancellation_reason_t parse_execution_conte } } -int parse_execution_context_t::run_if_statement(const parse_node_t &statement) +parse_execution_result_t parse_execution_context_t::run_if_statement(const parse_node_t &statement) { assert(statement.type == symbol_if_statement); @@ -80,16 +121,29 @@ int parse_execution_context_t::run_if_statement(const parse_node_t &statement) if_block_t *ib = new if_block_t(); ib->node_offset = this->get_offset(statement); parser->push_block(ib); + + parse_execution_result_t result = parse_execution_success; /* We have a sequence of if clauses, with a final else, resulting in a single job list that we execute */ const parse_node_t *job_list_to_execute = NULL; const parse_node_t *if_clause = get_child(statement, 0, symbol_if_clause); const parse_node_t *else_clause = get_child(statement, 1, symbol_else_clause); - while (! should_cancel_execution(ib)) + for (;;) { + if (should_cancel_execution(ib)) + { + result = parse_execution_cancelled; + break; + } + assert(if_clause != NULL && else_clause != NULL); const parse_node_t &condition = *get_child(*if_clause, 1, symbol_job); - if (run_1_job(condition, ib) == EXIT_SUCCESS) + + /* Check the condition. We treat parse_execution_errored here as failure, in accordance with historic behavior */ + parse_execution_result_t cond_ret = run_1_job(condition, ib); + bool take_branch = (cond_ret == parse_execution_success) && proc_get_last_status() == EXIT_SUCCESS; + + if (take_branch) { /* condition succeeded */ job_list_to_execute = get_child(*if_clause, 3, symbol_job_list); @@ -131,10 +185,10 @@ int parse_execution_context_t::run_if_statement(const parse_node_t &statement) /* Done */ parser->pop_block(ib); - return proc_get_last_status(); + return result; } -int parse_execution_context_t::run_begin_statement(const parse_node_t &header, const parse_node_t &contents) +parse_execution_result_t parse_execution_context_t::run_begin_statement(const parse_node_t &header, const parse_node_t &contents) { assert(header.type == symbol_begin_header); assert(contents.type == symbol_job_list); @@ -144,31 +198,32 @@ int parse_execution_context_t::run_begin_statement(const parse_node_t &header, c parser->push_block(sb); /* Run the job list */ - run_job_list(contents, sb); + parse_execution_result_t ret = run_job_list(contents, sb); /* Pop the block */ parser->pop_block(sb); - return proc_get_last_status(); -} + return ret; + } /* Define a function */ -int parse_execution_context_t::run_function_statement(const parse_node_t &header, const parse_node_t &contents) +parse_execution_result_t parse_execution_context_t::run_function_statement(const parse_node_t &header, const parse_node_t &contents) { assert(header.type == symbol_function_header); assert(contents.type == symbol_job_list); + parse_execution_result_t result = parse_execution_success; /* Get arguments */ const parse_node_t *unmatched_wildcard = NULL; wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); - bool errored = false; if (unmatched_wildcard != NULL) { - errored = append_unmatched_wildcard_error(*unmatched_wildcard); + append_unmatched_wildcard_error(*unmatched_wildcard); + result = parse_execution_errored; } - if (! errored) + if (result == parse_execution_success) { const wcstring contents_str = get_source(contents); wcstring error_str; @@ -178,12 +233,14 @@ int parse_execution_context_t::run_function_statement(const parse_node_t &header if (! error_str.empty()) { this->append_error(header, L"%ls", error_str.c_str()); + result = parse_execution_errored; } } - return proc_get_last_status(); + return result; + } -int parse_execution_context_t::run_block_statement(const parse_node_t &statement) +parse_execution_result_t parse_execution_context_t::run_block_statement(const parse_node_t &statement) { assert(statement.type == symbol_block_statement); @@ -191,7 +248,7 @@ int parse_execution_context_t::run_block_statement(const parse_node_t &statement const parse_node_t &header = *get_child(block_header, 0); //specific header type (e.g. for loop) const parse_node_t &contents = *get_child(statement, 2, symbol_job_list); //block contents - int ret = 1; + parse_execution_result_t ret = parse_execution_success; switch (header.type) { case symbol_for_header: @@ -216,10 +273,10 @@ int parse_execution_context_t::run_block_statement(const parse_node_t &statement break; } - return proc_get_last_status(); + return ret; } -int parse_execution_context_t::run_for_statement(const parse_node_t &header, const parse_node_t &block_contents) +parse_execution_result_t parse_execution_context_t::run_for_statement(const parse_node_t &header, const parse_node_t &block_contents) { assert(header.type == symbol_for_header); assert(block_contents.type == symbol_job_list); @@ -228,10 +285,11 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con const parse_node_t &var_name_node = *get_child(header, 1, parse_token_type_string); const wcstring for_var_name = get_source(var_name_node); - /* Get the contents to iterate over. Here we could do something with unmatched_wildcard. However it seems nicer to not make for loops complain about this, i.e. just iterate over a potentially empty list - */ + /* Get the contents to iterate over. Here we could do something with unmatched_wildcard. However it seems nicer to not make for loops complain about this, i.e. just iterate over a potentially empty list */ wcstring_list_t argument_list = this->determine_arguments(header, NULL); + parse_execution_result_t ret = parse_execution_success; + for_block_t *fb = new for_block_t(for_var_name); parser->push_block(fb); @@ -239,9 +297,14 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con std::reverse(argument_list.begin(), argument_list.end()); fb->sequence = argument_list; - /* Now drive the for loop. TODO: handle break, etc. */ - while (! fb->sequence.empty() && ! should_cancel_execution(fb)) + /* Now drive the for loop. */ + while (! fb->sequence.empty()) { + if (should_cancel_execution(fb)) + { + ret = parse_execution_cancelled; + } + const wcstring &for_variable = fb->variable; const wcstring &val = fb->sequence.back(); env_set(for_variable, val.c_str(), ENV_LOCAL); @@ -267,17 +330,18 @@ int parse_execution_context_t::run_for_statement(const parse_node_t &header, con } } } - - return proc_get_last_status(); + return ret; } -int parse_execution_context_t::run_switch_statement(const parse_node_t &statement) +parse_execution_result_t parse_execution_context_t::run_switch_statement(const parse_node_t &statement) { assert(statement.type == symbol_switch_statement); - bool errored = false; + parse_execution_result_t ret = parse_execution_success; const parse_node_t *matching_case_item = NULL; + parse_execution_result_t result = parse_execution_success; + /* Get the switch variable */ const parse_node_t &switch_value_node = *get_child(statement, 1, parse_token_type_string); const wcstring switch_value = get_source(switch_value_node); @@ -289,7 +353,7 @@ int parse_execution_context_t::run_switch_statement(const parse_node_t &statemen { case EXPAND_ERROR: { - errored = append_error(switch_value_node, + result = append_error(switch_value_node, _(L"Could not expand string '%ls'"), switch_value.c_str()); break; @@ -298,7 +362,8 @@ int parse_execution_context_t::run_switch_statement(const parse_node_t &statemen case EXPAND_WILDCARD_NO_MATCH: { /* Store the node that failed to expand */ - errored = append_error(switch_value_node, WILDCARD_ERR_MSG, switch_value.c_str()); + append_error(switch_value_node, WILDCARD_ERR_MSG, switch_value.c_str()); + ret = parse_execution_errored; break; } @@ -309,9 +374,9 @@ int parse_execution_context_t::run_switch_statement(const parse_node_t &statemen } } - if (! errored && switch_values_expanded.size() != 1) + if (result == parse_execution_success && switch_values_expanded.size() != 1) { - errored = append_error(switch_value_node, + result = append_error(switch_value_node, _(L"switch: Expected exactly one argument, got %lu\n"), switch_values_expanded.size()); } @@ -320,12 +385,18 @@ int parse_execution_context_t::run_switch_statement(const parse_node_t &statemen switch_block_t *sb = new switch_block_t(switch_value_expanded); parser->push_block(sb); - if (! errored) + if (result == parse_execution_success) { /* Expand case statements */ const parse_node_t *case_item_list = get_child(statement, 3, symbol_case_item_list); - while (matching_case_item == NULL && case_item_list->child_count > 0 && ! should_cancel_execution(sb)) + while (matching_case_item == NULL && case_item_list->child_count > 0) { + if (should_cancel_execution(sb)) + { + result = parse_execution_cancelled; + break; + } + if (case_item_list->production_idx == 2) { /* Hackish: blank line */ @@ -364,22 +435,19 @@ int parse_execution_context_t::run_switch_statement(const parse_node_t &statemen } } - if (! errored && matching_case_item) + if (result == parse_execution_success && matching_case_item) { /* Success, evaluate the job list */ const parse_node_t *job_list = get_child(*matching_case_item, 3, symbol_job_list); - this->run_job_list(*job_list, sb); + result = this->run_job_list(*job_list, sb); } parser->pop_block(sb); - - // Oops, this is stomping STATUS_WILDCARD_ERROR. TODO: Don't! - if (errored) - proc_set_last_status(STATUS_BUILTIN_ERROR); - return proc_get_last_status(); + + return result; } -int parse_execution_context_t::run_while_statement(const parse_node_t &header, const parse_node_t &block_contents) +parse_execution_result_t parse_execution_context_t::run_while_statement(const parse_node_t &header, const parse_node_t &block_contents) { assert(header.type == symbol_while_header); assert(block_contents.type == symbol_job_list); @@ -390,12 +458,31 @@ int parse_execution_context_t::run_while_statement(const parse_node_t &header, c wb->node_offset = this->get_offset(header); parser->push_block(wb); + parse_execution_result_t ret = parse_execution_success; + /* The condition and contents of the while loop, as a job and job list respectively */ const parse_node_t &while_condition = *get_child(header, 1, symbol_job); - /* A while loop is a while loop! */ - while (! this->should_cancel_execution(wb) && this->run_1_job(while_condition, wb) == EXIT_SUCCESS) + /* Run while the condition is true */ + for (;;) { + /* Check the condition */ + parse_execution_result_t cond_result = this->run_1_job(while_condition, wb); + + /* We only continue on successful execution and EXIT_SUCCESS */ + if (cond_result != parse_execution_success || proc_get_last_status() != EXIT_SUCCESS) + { + break; + } + + /* Check cancellation */ + if (this->should_cancel_execution(wb)) + { + ret = parse_execution_cancelled; + break; + } + + /* The block ought to go inside the loop (see #1212) */ this->run_job_list(block_contents, wb); @@ -419,11 +506,11 @@ int parse_execution_context_t::run_while_statement(const parse_node_t &header, c /* Done */ parser->pop_block(wb); - return proc_get_last_status(); + return ret; } -/* Appends an error to the error list. Always returns true, so you can assign the result to an 'errored' variable */ -bool parse_execution_context_t::append_error(const parse_node_t &node, const wchar_t *fmt, ...) +/* Appends an error to the error list. Always returns parse_execution_errored, so you can assign the result to an 'errored' variable */ +parse_execution_result_t parse_execution_context_t::append_error(const parse_node_t &node, const wchar_t *fmt, ...) { parse_error_t error; error.source_start = node.source_start; @@ -435,12 +522,16 @@ bool parse_execution_context_t::append_error(const parse_node_t &node, const wch error.text = vformat_string(fmt, va); va_end(va); - this->errors.push_back(error); - return true; + //this->errors.push_back(error); + + /* Output the error */ + fprintf(stderr, "%ls\n", error.describe(this->src).c_str()); + + return parse_execution_errored; } /* Appends an unmatched wildcard error to the error list, and returns true. */ -bool parse_execution_context_t::append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard) +parse_execution_result_t parse_execution_context_t::append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard) { proc_set_last_status(STATUS_UNMATCHED_WILDCARD); return append_error(unmatched_wildcard, WILDCARD_ERR_MSG, get_source(unmatched_wildcard).c_str()); @@ -451,30 +542,14 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str { assert(statement_node.type == symbol_plain_statement); - /* - We couldn't find the specified command. - - What we want to happen now is that the - specified job won't get executed, and an - error message is printed on-screen, but - otherwise, the parsing/execution of the - file continues. Because of this, we don't - want to call error(), since that would stop - execution of the file. Instead we let - p->actual_command be 0 (null), which will - cause the job to silently not execute. We - also print an error message and set the - status to 127 (This is the standard number - for this, used by other shells like bash - and zsh). - */ - + /* We couldn't find the specified command. This is a non-fatal error. We want to set the exit status to 127, which is the standard number used by other shells like bash and zsh. */ + const wchar_t * const cmd = cmd_str.c_str(); const wchar_t * const equals_ptr = wcschr(cmd, L'='); if (equals_ptr != NULL) { /* Try to figure out if this is a pure variable assignment (foo=bar), or if this appears to be running a command (foo=bar ruby...) */ - + const wcstring name_str = wcstring(cmd, equals_ptr - cmd); //variable name, up to the = const wcstring val_str = wcstring(equals_ptr + 1); //variable value, past the = @@ -488,60 +563,60 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str wcstring ellipsis_str = wcstring(1, ellipsis_char); if (ellipsis_str == L"$") ellipsis_str = L"..."; - + /* Looks like a command */ - debug(0, - _(L"Unknown command '%ls'. Did you mean to run %ls with a modified environment? Try 'env %ls=%ls %ls%ls'. See the help section on the set command by typing 'help set'."), - cmd, - argument.c_str(), - name_str.c_str(), - val_str.c_str(), - argument.c_str(), - ellipsis_str.c_str()); + this->append_error(statement_node, + _(L"Unknown command '%ls'. Did you mean to run %ls with a modified environment? Try 'env %ls=%ls %ls%ls'. See the help section on the set command by typing 'help set'."), + cmd, + argument.c_str(), + name_str.c_str(), + val_str.c_str(), + argument.c_str(), + ellipsis_str.c_str()); } else { - debug(0, - COMMAND_ASSIGN_ERR_MSG, - cmd, - name_str.c_str(), - val_str.c_str()); + this->append_error(statement_node, + COMMAND_ASSIGN_ERR_MSG, + cmd, + name_str.c_str(), + val_str.c_str()); } } else if (cmd[0]==L'$' || cmd[0] == VARIABLE_EXPAND || cmd[0] == VARIABLE_EXPAND_SINGLE) { - + const env_var_t val_wstr = env_get_string(cmd+1); const wchar_t *val = val_wstr.missing() ? NULL : val_wstr.c_str(); if (val) { - debug(0, - _(L"Variables may not be used as commands. Instead, define a function like 'function %ls; %ls $argv; end' or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), - cmd+1, - val, - cmd, - cmd); + this->append_error(statement_node, + _(L"Variables may not be used as commands. Instead, define a function like 'function %ls; %ls $argv; end' or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), + cmd+1, + val, + cmd, + cmd); } else { - debug(0, - _(L"Variables may not be used as commands. Instead, define a function or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), - cmd, - cmd); + this->append_error(statement_node, + _(L"Variables may not be used as commands. Instead, define a function or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), + cmd, + cmd); } } else if (wcschr(cmd, L'$')) { - debug(0, - _(L"Commands may not contain variables. Use the eval builtin instead, like 'eval %ls'. See the help section for the eval command by typing 'help eval'."), - cmd, - cmd); + this->append_error(statement_node, + _(L"Commands may not contain variables. Use the eval builtin instead, like 'eval %ls'. See the help section for the eval command by typing 'help eval'."), + cmd, + cmd); } else if (err_code!=ENOENT) { - debug(0, - _(L"The file '%ls' is not executable by this user"), - cmd?cmd:L"UNKNOWN"); + this->append_error(statement_node, + _(L"The file '%ls' is not executable by this user"), + cmd?cmd:L"UNKNOWN"); } else { @@ -550,10 +625,13 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str command not found handler that can make better error messages */ - + wcstring_list_t event_args; event_args.push_back(cmd_str); event_fire_generic(L"fish_command_not_found", &event_args); + + /* Here we want to report an error (so it shows a backtrace), but with no text */ + this->append_error(statement_node, L""); } /* Set the last proc status appropriately */ @@ -561,11 +639,11 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str } /* Creates a 'normal' (non-block) process */ -process_t *parse_execution_context_t::create_plain_process(job_t *job, const parse_node_t &statement) +parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t *job, process_t *proc, const parse_node_t &statement) { + assert(job != NULL); + assert(proc != NULL); assert(statement.type == symbol_plain_statement); - - bool errored = false; /* We may decide that a command should be an implicit cd */ bool use_implicit_cd = false; @@ -579,44 +657,12 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); if (! expanded) { - errored = append_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); + append_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); + return parse_execution_errored; } - if (errored) - return NULL; - - /* Determine the process type, which depends on the statement decoration (command, builtin, etc) */ - enum parse_statement_decoration_t decoration = tree.decoration_for_plain_statement(statement); - enum process_type_t process_type = EXTERNAL; - - /* exec hack */ - if (decoration != parse_statement_decoration_command && cmd == L"exec") - { - /* Either 'builtin exec' or just plain 'exec', and definitely not 'command exec'. Note we don't allow overriding exec with a function. */ - process_type = INTERNAL_EXEC; - } - else if (decoration == parse_statement_decoration_command) - { - /* Always a command */ - process_type = EXTERNAL; - } - else if (decoration == parse_statement_decoration_builtin) - { - /* What happens if this builtin is not valid? */ - process_type = INTERNAL_BUILTIN; - } - else if (function_exists(cmd)) - { - process_type = INTERNAL_FUNCTION; - } - else if (builtin_exists(cmd)) - { - process_type = INTERNAL_BUILTIN; - } - else - { - process_type = EXTERNAL; - } + /* Determine the process type */ + enum process_type_t process_type = process_type_for_command(statement, cmd); wcstring actual_cmd; if (process_type == EXTERNAL) @@ -628,7 +674,7 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par const int no_cmd_err_code = errno; /* If the specified command does not exist, and is undecorated, try using an implicit cd. */ - if (! has_command && decoration == parse_statement_decoration_none) + if (! has_command && tree.decoration_for_plain_statement(statement) == parse_statement_decoration_none) { /* Implicit cd requires an empty argument and redirection list */ const parse_node_t *args = get_child(statement, 1, symbol_arguments_or_redirections_list); @@ -644,11 +690,9 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par { /* No command */ this->handle_command_not_found(cmd, statement, no_cmd_err_code); - errored = true; + return parse_execution_errored; } } - if (errored) - return NULL; /* The argument list and set of IO redirections that we will construct for the process */ wcstring_list_t argument_list; @@ -665,7 +709,7 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par } else { - /* Form the list of arguments. The command is the first argument. TODO: count hack */ + /* Form the list of arguments. The command is the first argument. TODO: count hack, where we treat 'count --help' as different from 'count $foo' that expands to 'count --help'. fish 1.x never successfully did this, but it tried to! */ const parse_node_t *unmatched_wildcard = NULL; argument_list = this->determine_arguments(statement, &unmatched_wildcard); argument_list.insert(argument_list.begin(), cmd); @@ -674,30 +718,27 @@ process_t *parse_execution_context_t::create_plain_process(job_t *job, const par if (unmatched_wildcard != NULL) { job_set_flag(job, JOB_WILDCARD_ERROR, 1); - errored = append_unmatched_wildcard_error(*unmatched_wildcard); + append_unmatched_wildcard_error(*unmatched_wildcard); + return parse_execution_errored; } - if (errored) - return NULL; - /* The set of IO redirections that we construct for the process */ - errored = ! this->determine_io_chain(statement, &process_io_chain); - if (errored) - return NULL; + if (! this->determine_io_chain(statement, &process_io_chain)) + { + return parse_execution_errored; + } + + /* Determine the process type */ + process_type = process_type_for_command(statement, cmd); } - /* Return the process, or NULL on error */ - process_t *result = NULL; - if (! errored) - { - result = new process_t(); - result->type = process_type; - result->set_argv(argument_list); - result->set_io_chain(process_io_chain); - result->actual_cmd = actual_cmd; - } - return result; + /* Populate the process */ + proc->type = process_type; + proc->set_argv(argument_list); + proc->set_io_chain(process_io_chain); + proc->actual_cmd = actual_cmd; + return parse_execution_success; } /* Determine the list of arguments, expanding stuff. If we have a wildcard and none could be expanded, return the unexpandable wildcard node by reference. */ @@ -867,7 +908,7 @@ bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement return ! errored; } -process_t *parse_execution_context_t::create_boolean_process(job_t *job, const parse_node_t &bool_statement) +parse_execution_result_t parse_execution_context_t::populate_boolean_process(job_t *job, process_t *proc, const parse_node_t &bool_statement) { // Handle a boolean statement bool skip_job = false; @@ -898,16 +939,18 @@ process_t *parse_execution_context_t::create_boolean_process(job_t *job, const p } } - process_t *result = NULL; - if (! skip_job) + if (skip_job) + { + return parse_execution_skipped; + } + else { const parse_node_t &subject = *tree.get_child(bool_statement, 1, symbol_statement); - result = this->create_job_process(job, subject); + return this->populate_job_process(job, proc, subject); } - return result; } -process_t *parse_execution_context_t::create_block_process(job_t *job, const parse_node_t &statement_node) +parse_execution_result_t parse_execution_context_t::populate_block_process(job_t *job, process_t *proc, const parse_node_t &statement_node) { /* We handle block statements by creating INTERNAL_BLOCK_NODE, that will bounce back to us when it's time to execute them */ assert(statement_node.type == symbol_block_statement || statement_node.type == symbol_if_statement || statement_node.type == symbol_switch_statement); @@ -916,18 +959,17 @@ process_t *parse_execution_context_t::create_block_process(job_t *job, const par io_chain_t process_io_chain; bool errored = ! this->determine_io_chain(statement_node, &process_io_chain); if (errored) - return NULL; + return parse_execution_errored; - process_t *result = new process_t(); - result->type = INTERNAL_BLOCK_NODE; - result->internal_block_node = this->get_offset(statement_node); - result->set_io_chain(process_io_chain); - return result; + proc->type = INTERNAL_BLOCK_NODE; + proc->internal_block_node = this->get_offset(statement_node); + proc->set_io_chain(process_io_chain); + return parse_execution_success; } /* Returns a process_t allocated with new. It's the caller's responsibility to delete it (!) */ -process_t *parse_execution_context_t::create_job_process(job_t *job, const parse_node_t &statement_node) +parse_execution_result_t parse_execution_context_t::populate_job_process(job_t *job, process_t *proc, const parse_node_t &statement_node) { assert(statement_node.type == symbol_statement); assert(statement_node.child_count == 1); @@ -935,13 +977,13 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse // Get the "specific statement" which is boolean / block / if / switch / decorated const parse_node_t &specific_statement = *get_child(statement_node, 0); - process_t *result = NULL; + parse_execution_result_t result = parse_execution_success; switch (specific_statement.type) { case symbol_boolean_statement: { - result = this->create_boolean_process(job, specific_statement); + result = this->populate_boolean_process(job, proc, specific_statement); break; } @@ -949,7 +991,7 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse case symbol_if_statement: case symbol_switch_statement: { - result = this->create_block_process(job, specific_statement); + result = this->populate_block_process(job, proc, specific_statement); break; } @@ -957,7 +999,7 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse { /* Get the plain statement. It will pull out the decoration itself */ const parse_node_t &plain_statement = tree.find_child(specific_statement, symbol_plain_statement); - result = this->create_plain_process(job, plain_statement); + result = this->populate_plain_process(job, proc, plain_statement); break; } @@ -971,13 +1013,10 @@ process_t *parse_execution_context_t::create_job_process(job_t *job, const parse } -bool parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse_node_t &job_node) +parse_execution_result_t parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse_node_t &job_node, const block_t *associated_block) { assert(job_node.type == symbol_job); - /* Track whether we had an error */ - bool process_errored = false; - /* Tell the job what its command is */ j->set_command(get_source(job_node)); @@ -985,42 +1024,65 @@ bool parse_execution_context_t::populate_job_from_job_node(job_t *j, const parse const parse_node_t *statement_node = get_child(job_node, 0, symbol_statement); assert(statement_node != NULL); - /* Create the process (may fail!) */ - j->first_process = this->create_job_process(j, *statement_node); - if (j->first_process == NULL) - process_errored = true; + parse_execution_result_t result = parse_execution_success; + + /* Create processes. Each one may fail. */ + std::vector processes; + processes.push_back(new process_t()); + result = this->populate_job_process(j, processes.back(), *statement_node); /* Construct process_ts for job continuations (pipelines), by walking the list until we hit the terminal (empty) job continuation */ const parse_node_t *job_cont = get_child(job_node, 1, symbol_job_continuation); - process_t *last_process = j->first_process; - while (! process_errored && job_cont != NULL && job_cont->child_count > 0) + assert(job_cont != NULL); + while (result == parse_execution_success && job_cont->child_count > 0) { assert(job_cont->type == symbol_job_continuation); - /* Handle the pipe */ + /* Handle the pipe, whose fd may not be the obvious stdoud */ const parse_node_t &pipe_node = *get_child(*job_cont, 0, parse_token_type_pipe); - last_process->pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); + processes.back()->pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); /* Get the statement node and make a process from it */ const parse_node_t *statement_node = get_child(*job_cont, 1, symbol_statement); assert(statement_node != NULL); /* Store the new process (and maybe with an error) */ - last_process->next = this->create_job_process(j, *statement_node); - if (last_process->next == NULL) - process_errored = true; - - /* Link the process and get the next continuation */ - last_process = last_process->next; + processes.push_back(new process_t()); + result = this->populate_job_process(j, processes.back(), *statement_node); + + /* Get the next continuation */ job_cont = get_child(*job_cont, 2, symbol_job_continuation); + assert(job_cont != NULL); } - /* Return success */ - return ! process_errored; + /* Return what happened */ + if (result == parse_execution_success) + { + /* Link up the processes */ + assert(! processes.empty()); + j->first_process = processes.at(0); + for (size_t i=1 ; i < processes.size(); i++) + { + processes.at(i-1)->next = processes.at(i); + } + } + else + { + /* Clean up processes */ + for (size_t i=0; i < processes.size(); i++) + { + const process_t *proc = processes.at(i); + processes.at(i) = NULL; + delete proc; + } + } + return result; } -int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const block_t *associated_block) +parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t &job_node, const block_t *associated_block) { + parse_execution_result_t result = parse_execution_success; + bool log_it = false; if (log_it) { @@ -1030,7 +1092,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const blo if (should_cancel_execution(associated_block)) { - return 1; + return parse_execution_cancelled; } // Get terminal modes @@ -1041,7 +1103,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const blo { // need real error handling here wperror(L"tcgetattr"); - return EXIT_FAILURE; + return parse_execution_errored; } } @@ -1063,23 +1125,27 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const blo } job_t *j = new job_t(acquire_job_id(), block_io); - job_set_flag(j, JOB_FOREGROUND, 1); - job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL)); - job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ - && (!is_subshell && !is_event)); - job_set_flag(j, JOB_SKIP_NOTIFICATION, is_subshell \ - || is_block \ - || is_event \ - || (!get_is_interactive())); + j->tmodes = tmodes; job_set_flag(j, JOB_CONTROL, (job_control_mode==JOB_CONTROL_ALL) || ((job_control_mode == JOB_CONTROL_INTERACTIVE) && (get_is_interactive()))); - /* Populate the job. This may fail for reasons like command_not_found */ - bool process_errored = ! this->populate_job_from_job_node(j, job_node); + job_set_flag(j, JOB_FOREGROUND, 1); - /* Clean up the job on failure */ - if (process_errored) + job_set_flag(j, JOB_TERMINAL, job_get_flag(j, JOB_CONTROL) \ + && (!is_subshell && !is_event)); + + job_set_flag(j, JOB_SKIP_NOTIFICATION, is_subshell \ + || is_block \ + || is_event \ + || (!get_is_interactive())); + + /* Populate the job. This may fail for reasons like command_not_found. If this fails, an error will have been printed */ + parse_execution_result_t pop_result = this->populate_job_from_job_node(j, job_node, associated_block); + + /* Clean up the job on failure or cancellation */ + bool populated_job = (pop_result == parse_execution_success); + if (! populated_job) { delete j; j = NULL; @@ -1093,7 +1159,7 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const blo profile_item->skipped=parser->current_block()->skip; } - if (! process_errored) + if (populated_job) { /* Success. Give the job to the parser - it will clean it up. */ parser->job_add(j); @@ -1119,41 +1185,34 @@ int parse_execution_context_t::run_1_job(const parse_node_t &job_node, const blo set_proc_had_barrier(false); } } + + /* If the job was skipped, we pretend it ran anyways */ + if (result == parse_execution_skipped) + { + result = parse_execution_success; + } - /* Need support for skipped_exec here */ if (do_profile) { exec_time = get_time(); profile_item->level=eval_level; profile_item->parse = (int)(parse_time-start_time); profile_item->exec=(int)(exec_time-parse_time); - profile_item->skipped = process_errored; + profile_item->skipped = ! populated_job; } - - /* Set the last status to 1 if the job could not be executed. TODO: Don't stomp STATUS_UNKNOWN_COMMAND / STATUS_NOT_EXECUTABLE */ - if (process_errored) - proc_set_last_status(1); - const int ret = proc_get_last_status(); - /* Clean up jobs. Do this after we've determined the return value, since this may trigger event handlers */ + /* Clean up jobs. */ job_reap(0); - - /* Output any errors (hack) */ - if (! this->errors.empty()) - { - fprintf(stderr, "%ls\n", parse_errors_description(this->errors, this->src).c_str()); - this->errors.clear(); - } - + /* All done */ - return ret; + return result; } -int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node, const block_t *associated_block) +parse_execution_result_t parse_execution_context_t::run_job_list(const parse_node_t &job_list_node, const block_t *associated_block) { assert(job_list_node.type == symbol_job_list); - int result = 1; + parse_execution_result_t result = parse_execution_success; const parse_node_t *job_list = &job_list_node; while (job_list != NULL && ! should_cancel_execution(associated_block)) { @@ -1193,7 +1252,7 @@ int parse_execution_context_t::run_job_list(const parse_node_t &job_list_node, c return result; } -int parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io) +parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io) { bool log_it = false; @@ -1217,7 +1276,7 @@ int parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const b node.type == symbol_if_statement || node.type == symbol_switch_statement); - int status = 1; + enum parse_execution_result_t status = parse_execution_success; switch (node.type) { case symbol_job_list: @@ -1244,11 +1303,6 @@ int parse_execution_context_t::eval_node_at_offset(node_offset_t offset, const b PARSER_DIE(); break; } - - proc_set_last_status(status); - - /* Argh */ - int ret = errors.empty() ? 0 : 1; - errors.clear(); - return ret; + + return status; } diff --git a/parse_execution.h b/parse_execution.h index f68cad5fd..d22da442d 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -15,6 +15,21 @@ class job_t; struct profile_item_t; struct block_t; +enum parse_execution_result_t +{ + /* The job was successfully executed (though it have failed on its own). */ + parse_execution_success, + + /* The job did not execute due to some error (e.g. failed to wildcard expand). An error will have been printed and proc_last_status will have been set. */ + parse_execution_errored, + + /* The job was cancelled (e.g. Ctrl-C) */ + parse_execution_cancelled, + + /* The job was skipped (e.g. due to a not-taken 'and' command). This is a special return allowed only from the populate functions, not the run functions. */ + parse_execution_skipped +}; + class parse_execution_context_t { private: @@ -22,7 +37,7 @@ class parse_execution_context_t const wcstring src; io_chain_t block_io; parser_t * const parser; - parse_error_list_t errors; + //parse_error_list_t errors; int eval_level; std::vector profile_items; @@ -45,9 +60,9 @@ class parse_execution_context_t execution_cancellation_reason_t cancellation_reason(const block_t *block) const; /* Report an error. Always returns true. */ - bool append_error(const parse_node_t &node, const wchar_t *fmt, ...); + parse_execution_result_t append_error(const parse_node_t &node, const wchar_t *fmt, ...); /* Wildcard error helper */ - bool append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); + parse_execution_result_t append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); void handle_command_not_found(const wcstring &cmd, const parse_node_t &statement_node, int err_code); @@ -56,35 +71,37 @@ class parse_execution_context_t const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; node_offset_t get_offset(const parse_node_t &node) const; - /* These create process_t structures from statements */ - process_t *create_job_process(job_t *job, const parse_node_t &statement_node); - process_t *create_boolean_process(job_t *job, const parse_node_t &bool_statement); - process_t *create_plain_process(job_t *job, const parse_node_t &statement); - process_t *create_block_process(job_t *job, const parse_node_t &statement_node); + enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const; - /* These encapsulate the actual logic of various (block) statements. They just do what the statement says. */ - int run_block_statement(const parse_node_t &statement); - int run_for_statement(const parse_node_t &header, const parse_node_t &contents); - int run_if_statement(const parse_node_t &statement); - int run_switch_statement(const parse_node_t &statement); - int run_while_statement(const parse_node_t &header, const parse_node_t &contents); - int run_function_statement(const parse_node_t &header, const parse_node_t &contents); - int run_begin_statement(const parse_node_t &header, const parse_node_t &contents); + /* These create process_t structures from statements */ + parse_execution_result_t populate_job_process(job_t *job, process_t *proc, const parse_node_t &statement_node); + parse_execution_result_t populate_boolean_process(job_t *job, process_t *proc, const parse_node_t &bool_statement); + parse_execution_result_t populate_plain_process(job_t *job, process_t *proc, const parse_node_t &statement); + parse_execution_result_t populate_block_process(job_t *job, process_t *proc, const parse_node_t &statement_node); + + /* These encapsulate the actual logic of various (block) statements. */ + parse_execution_result_t run_block_statement(const parse_node_t &statement); + parse_execution_result_t run_for_statement(const parse_node_t &header, const parse_node_t &contents); + parse_execution_result_t run_if_statement(const parse_node_t &statement); + parse_execution_result_t run_switch_statement(const parse_node_t &statement); + parse_execution_result_t run_while_statement(const parse_node_t &header, const parse_node_t &contents); + parse_execution_result_t run_function_statement(const parse_node_t &header, const parse_node_t &contents); + parse_execution_result_t run_begin_statement(const parse_node_t &header, const parse_node_t &contents); wcstring_list_t determine_arguments(const parse_node_t &parent, const parse_node_t **out_unmatched_wildcard_node); /* Determines the IO chain. Returns true on success, false on error */ bool determine_io_chain(const parse_node_t &statement, io_chain_t *out_chain); - int run_1_job(const parse_node_t &job_node, const block_t *associated_block); - int run_job_list(const parse_node_t &job_list_node, const block_t *associated_block); - bool populate_job_from_job_node(job_t *j, const parse_node_t &job_node); + parse_execution_result_t run_1_job(const parse_node_t &job_node, const block_t *associated_block); + parse_execution_result_t run_job_list(const parse_node_t &job_list_node, const block_t *associated_block); + parse_execution_result_t populate_job_from_job_node(job_t *j, const parse_node_t &job_node, const block_t *associated_block); public: parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, parser_t *p); /* Start executing at the given node offset. Returns 0 if there was no error, 1 if there was an error */ - int eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io); + parse_execution_result_t eval_node_at_offset(node_offset_t offset, const block_t *associated_block, const io_chain_t &io); }; diff --git a/parser.cpp b/parser.cpp index 6b7d19091..3cd1b98c3 100644 --- a/parser.cpp +++ b/parser.cpp @@ -469,20 +469,6 @@ block_t *parser_t::current_block() return block_stack.empty() ? NULL : block_stack.back(); } - -/** - Returns 1 if the specified command is a builtin that may not be used in a pipeline -*/ -static int parser_is_pipe_forbidden(const wcstring &word) -{ - return contains(word, - L"exec", - L"case", - L"break", - L"return", - L"continue"); -} - /** Search the text for the end of the current block */ @@ -2690,7 +2676,6 @@ int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_typ if (parser_use_ast()) return this->eval_new_parser(cmd_str, io, block_type); - const wchar_t * const cmd = cmd_str.c_str(); size_t forbid_count; int code; diff --git a/proc.cpp b/proc.cpp index f6a20eb2d..65e906ac8 100644 --- a/proc.cpp +++ b/proc.cpp @@ -638,6 +638,9 @@ int job_reap(bool interactive) static int locked = 0; locked++; + + /* Preserve the exit status */ + const int saved_status = proc_get_last_status(); /* job_read may fire an event handler, we do not want to call @@ -753,6 +756,9 @@ int job_reap(bool interactive) if (found) fflush(stdout); + /* Restore the exit status. */ + proc_set_last_status(saved_status); + locked = 0; return found; From 4402f73bf7a8db577cb6a61273a93b3c53969003 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 31 Dec 2013 16:37:15 -0800 Subject: [PATCH 081/108] Improve error reporting for tokens ending with newlines --- parse_tree.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index ad825a115..b624cf375 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -20,27 +20,31 @@ wcstring parse_error_t::describe(const wcstring &src, bool skip_caret) const // Locate the beginning of this line of source size_t line_start = 0; - // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline - size_t newline = src.find_last_of(L'\n', source_start); - //fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length); - if (newline != wcstring::npos) + // Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline. Note that source_start may itself point at a newline; we want to find the newline before it. + if (source_start > 0) { - line_start = newline + 1; + size_t newline = src.find_last_of(L'\n', source_start - 1); + if (newline != wcstring::npos) + { + line_start = newline + 1; + } } - size_t line_end = src.find(L'\n', source_start + source_length); + // Look for the newline after the source range. If the source range itself includes a newline, that's the one we want, so start just before the end of the range + size_t last_char_in_range = (source_length == 0 ? source_start : source_start + source_length - 1); + size_t line_end = src.find(L'\n', last_char_in_range); if (line_end == wcstring::npos) { line_end = src.size(); } + assert(line_end >= line_start); - //fprintf(stderr, "source start: %lu, source_length %lu, line start %lu, line end %lu\n", source_start, source_length, line_start, line_end); assert(source_start >= line_start); - // Append the line of text + // Append the line of text. result.push_back(L'\n'); result.append(src, line_start, line_end - line_start); - + // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions wcstring caret_space_line; caret_space_line.reserve(source_start - line_start); @@ -51,6 +55,11 @@ wcstring parse_error_t::describe(const wcstring &src, bool skip_caret) const { caret_space_line.push_back(L'\t'); } + else if (wc == L'\n') + { + /* It's possible that the source_start points at a newline itself. In that case, pretend it's a space. We only expect this to be at the end of the string. */ + caret_space_line.push_back(L' '); + } else { int width = fish_wcwidth(wc); From e2da73bf4f540b6dc98c7abda94597e658c9c40a Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 31 Dec 2013 16:38:15 -0800 Subject: [PATCH 082/108] Fix for issue where 'while |' would be incorrectly interpreted as a "naked statement" causing a wonky error message --- parse_productions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse_productions.cpp b/parse_productions.cpp index 24962e534..53a90a56a 100644 --- a/parse_productions.cpp +++ b/parse_productions.cpp @@ -128,7 +128,7 @@ RESOLVE(statement) // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g. a "naked if". bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_begin && token1.keyword != parse_keyword_end); - if (naked_invocation_invokes_help && token2.type != parse_token_type_string) + if (naked_invocation_invokes_help && (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) { return 4; } From 27cba56761de987c778937521987759475e27051 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 1 Jan 2014 00:04:02 -0800 Subject: [PATCH 083/108] Further improvements to error reporting with new parser --- parse_execution.cpp | 70 ++++++++++++++++----------- parse_execution.h | 4 +- parse_tree.cpp | 113 +++++++++++++++++++++++++++++++++++--------- parse_tree.h | 1 + 4 files changed, 136 insertions(+), 52 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index 38b543c46..338a513da 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -219,7 +219,7 @@ parse_execution_result_t parse_execution_context_t::run_function_statement(const if (unmatched_wildcard != NULL) { - append_unmatched_wildcard_error(*unmatched_wildcard); + report_unmatched_wildcard_error(*unmatched_wildcard); result = parse_execution_errored; } @@ -232,7 +232,7 @@ parse_execution_result_t parse_execution_context_t::run_function_statement(const if (! error_str.empty()) { - this->append_error(header, L"%ls", error_str.c_str()); + this->report_error(header, L"%ls", error_str.c_str()); result = parse_execution_errored; } } @@ -285,8 +285,13 @@ parse_execution_result_t parse_execution_context_t::run_for_statement(const pars const parse_node_t &var_name_node = *get_child(header, 1, parse_token_type_string); const wcstring for_var_name = get_source(var_name_node); - /* Get the contents to iterate over. Here we could do something with unmatched_wildcard. However it seems nicer to not make for loops complain about this, i.e. just iterate over a potentially empty list */ - wcstring_list_t argument_list = this->determine_arguments(header, NULL); + /* Get the contents to iterate over. */ + const parse_node_t *unmatched_wildcard = NULL; + wcstring_list_t argument_list = this->determine_arguments(header, &unmatched_wildcard); + if (unmatched_wildcard != NULL) + { + return report_unmatched_wildcard_error(*unmatched_wildcard); + } parse_execution_result_t ret = parse_execution_success; @@ -303,6 +308,7 @@ parse_execution_result_t parse_execution_context_t::run_for_statement(const pars if (should_cancel_execution(fb)) { ret = parse_execution_cancelled; + break; } const wcstring &for_variable = fb->variable; @@ -353,7 +359,7 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement(const p { case EXPAND_ERROR: { - result = append_error(switch_value_node, + result = report_error(switch_value_node, _(L"Could not expand string '%ls'"), switch_value.c_str()); break; @@ -362,7 +368,7 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement(const p case EXPAND_WILDCARD_NO_MATCH: { /* Store the node that failed to expand */ - append_error(switch_value_node, WILDCARD_ERR_MSG, switch_value.c_str()); + report_error(switch_value_node, WILDCARD_ERR_MSG, switch_value.c_str()); ret = parse_execution_errored; break; } @@ -376,7 +382,7 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement(const p if (result == parse_execution_success && switch_values_expanded.size() != 1) { - result = append_error(switch_value_node, + result = report_error(switch_value_node, _(L"switch: Expected exactly one argument, got %lu\n"), switch_values_expanded.size()); } @@ -509,8 +515,8 @@ parse_execution_result_t parse_execution_context_t::run_while_statement(const pa return ret; } -/* Appends an error to the error list. Always returns parse_execution_errored, so you can assign the result to an 'errored' variable */ -parse_execution_result_t parse_execution_context_t::append_error(const parse_node_t &node, const wchar_t *fmt, ...) +/* Reports an error. Always returns parse_execution_errored, so you can assign the result to an 'errored' variable */ +parse_execution_result_t parse_execution_context_t::report_error(const parse_node_t &node, const wchar_t *fmt, ...) { parse_error_t error; error.source_start = node.source_start; @@ -522,19 +528,29 @@ parse_execution_result_t parse_execution_context_t::append_error(const parse_nod error.text = vformat_string(fmt, va); va_end(va); - //this->errors.push_back(error); - /* Output the error */ - fprintf(stderr, "%ls\n", error.describe(this->src).c_str()); + const wcstring desc = error.describe(this->src); + if (! desc.empty()) + { + fprintf(stderr, "%ls\n", desc.c_str()); + } return parse_execution_errored; } -/* Appends an unmatched wildcard error to the error list, and returns true. */ -parse_execution_result_t parse_execution_context_t::append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard) +/* Reoports an unmatched wildcard error and returns parse_execution_errored */ +parse_execution_result_t parse_execution_context_t::report_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard) { proc_set_last_status(STATUS_UNMATCHED_WILDCARD); - return append_error(unmatched_wildcard, WILDCARD_ERR_MSG, get_source(unmatched_wildcard).c_str()); + /* For reasons I cannot explain, unmatched wildcards are only reported in interactive use. */ + if (get_is_interactive()) + { + return report_error(unmatched_wildcard, WILDCARD_ERR_MSG, get_source(unmatched_wildcard).c_str()); + } + else + { + return parse_execution_errored; + } } /* Handle the case of command not found */ @@ -565,7 +581,7 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str ellipsis_str = L"..."; /* Looks like a command */ - this->append_error(statement_node, + this->report_error(statement_node, _(L"Unknown command '%ls'. Did you mean to run %ls with a modified environment? Try 'env %ls=%ls %ls%ls'. See the help section on the set command by typing 'help set'."), cmd, argument.c_str(), @@ -576,7 +592,7 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str } else { - this->append_error(statement_node, + this->report_error(statement_node, COMMAND_ASSIGN_ERR_MSG, cmd, name_str.c_str(), @@ -590,7 +606,7 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str const wchar_t *val = val_wstr.missing() ? NULL : val_wstr.c_str(); if (val) { - this->append_error(statement_node, + this->report_error(statement_node, _(L"Variables may not be used as commands. Instead, define a function like 'function %ls; %ls $argv; end' or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), cmd+1, val, @@ -599,7 +615,7 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str } else { - this->append_error(statement_node, + this->report_error(statement_node, _(L"Variables may not be used as commands. Instead, define a function or use the eval builtin instead, like 'eval %ls'. See the help section for the function command by typing 'help function'."), cmd, cmd); @@ -607,14 +623,14 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str } else if (wcschr(cmd, L'$')) { - this->append_error(statement_node, + this->report_error(statement_node, _(L"Commands may not contain variables. Use the eval builtin instead, like 'eval %ls'. See the help section for the eval command by typing 'help eval'."), cmd, cmd); } else if (err_code!=ENOENT) { - this->append_error(statement_node, + this->report_error(statement_node, _(L"The file '%ls' is not executable by this user"), cmd?cmd:L"UNKNOWN"); } @@ -631,7 +647,7 @@ void parse_execution_context_t::handle_command_not_found(const wcstring &cmd_str event_fire_generic(L"fish_command_not_found", &event_args); /* Here we want to report an error (so it shows a backtrace), but with no text */ - this->append_error(statement_node, L""); + this->report_error(statement_node, L""); } /* Set the last proc status appropriately */ @@ -657,7 +673,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); if (! expanded) { - append_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); + report_error(statement, ILLEGAL_CMD_ERR_MSG, cmd.c_str()); return parse_execution_errored; } @@ -718,7 +734,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t if (unmatched_wildcard != NULL) { job_set_flag(job, JOB_WILDCARD_ERROR, 1); - append_unmatched_wildcard_error(*unmatched_wildcard); + report_unmatched_wildcard_error(*unmatched_wildcard); return parse_execution_errored; } @@ -770,7 +786,7 @@ wcstring_list_t parse_execution_context_t::determine_arguments(const parse_node_ { case EXPAND_ERROR: { - this->append_error(arg_node, + this->report_error(arg_node, _(L"Could not expand string '%ls'"), arg_str.c_str()); break; @@ -838,7 +854,7 @@ bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement if (! target_expanded || target.empty()) { /* Should improve this error message */ - errored = append_error(redirect_node, + errored = report_error(redirect_node, _(L"Invalid redirection target: %ls"), target.c_str()); } @@ -862,7 +878,7 @@ bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement int old_fd = fish_wcstoi(target.c_str(), &end, 10); if (old_fd < 0 || errno || *end) { - errored = append_error(redirect_node, + errored = report_error(redirect_node, _(L"Requested redirection to something that is not a file descriptor %ls"), target.c_str()); } diff --git a/parse_execution.h b/parse_execution.h index d22da442d..feeb7404a 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -60,9 +60,9 @@ class parse_execution_context_t execution_cancellation_reason_t cancellation_reason(const block_t *block) const; /* Report an error. Always returns true. */ - parse_execution_result_t append_error(const parse_node_t &node, const wchar_t *fmt, ...); + parse_execution_result_t report_error(const parse_node_t &node, const wchar_t *fmt, ...); /* Wildcard error helper */ - parse_execution_result_t append_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); + parse_execution_result_t report_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); void handle_command_not_found(const wcstring &cmd, const parse_node_t &statement_node, int err_code); diff --git a/parse_tree.cpp b/parse_tree.cpp index b624cf375..222af8af9 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,6 +1,7 @@ #include "parse_productions.h" #include "tokenizer.h" #include "fallback.h" +#include "proc.h" #include #include @@ -41,37 +42,47 @@ wcstring parse_error_t::describe(const wcstring &src, bool skip_caret) const assert(line_end >= line_start); assert(source_start >= line_start); - // Append the line of text. - result.push_back(L'\n'); - result.append(src, line_start, line_end - line_start); + // Don't include the caret and line if we're interactive this is the first line, because then it's obvious + bool skip_caret = (get_is_interactive() && source_start == 0); - // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions - wcstring caret_space_line; - caret_space_line.reserve(source_start - line_start); - for (size_t i=line_start; i < source_start; i++) + if (! skip_caret) { - wchar_t wc = src.at(i); - if (wc == L'\t') + // Append the line of text. + if (! result.empty()) { - caret_space_line.push_back(L'\t'); + result.push_back(L'\n'); } - else if (wc == L'\n') + result.append(src, line_start, line_end - line_start); + + + // Append the caret line. The input source may include tabs; for that reason we construct a "caret line" that has tabs in corresponding positions + wcstring caret_space_line; + caret_space_line.reserve(source_start - line_start); + for (size_t i=line_start; i < source_start; i++) { - /* It's possible that the source_start points at a newline itself. In that case, pretend it's a space. We only expect this to be at the end of the string. */ - caret_space_line.push_back(L' '); - } - else - { - int width = fish_wcwidth(wc); - if (width > 0) + wchar_t wc = src.at(i); + if (wc == L'\t') { - caret_space_line.append(static_cast(width), L' '); + caret_space_line.push_back(L'\t'); + } + else if (wc == L'\n') + { + /* It's possible that the source_start points at a newline itself. In that case, pretend it's a space. We only expect this to be at the end of the string. */ + caret_space_line.push_back(L' '); + } + else + { + int width = fish_wcwidth(wc); + if (width > 0) + { + caret_space_line.append(static_cast(width), L' '); + } } } + result.push_back(L'\n'); + result.append(caret_space_line); + result.push_back(L'^'); } - result.push_back(L'\n'); - result.append(caret_space_line); - result.push_back(L'^'); } return result; } @@ -249,6 +260,36 @@ wcstring parse_token_t::describe() const return result; } +/** A string description appropriate for presentation to the user */ +wcstring parse_token_t::user_presentable_description() const +{ + if (keyword != parse_keyword_none) + { + return format_string(L"keyword %ls", keyword_description(keyword).c_str()); + } + + switch (type) + { + /* Hackish. We only support the */ + case parse_token_type_string: + return L"a string"; + + case parse_token_type_pipe: + return L"a pipe"; + + case parse_token_type_redirection: + return L"a redirection"; + + case parse_token_type_background: + return L"a '&'"; + + case parse_token_type_end: + return L"statement terminator"; + + default: + return format_string(L"a %ls", this->describe().c_str()); + } +} /* Convert from tokenizer_t's token type to a parse_token_t type */ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type) @@ -382,6 +423,25 @@ struct parse_stack_element_t } return result; } + + /* Returns a name that we can show to the user, e.g. "a command" */ + wcstring user_presentable_description(void) const + { + if (keyword != parse_keyword_none) + { + return format_string(L"keyword %ls", keyword_description(keyword).c_str()); + } + + switch (type) + { + /* Hackish, the only one we support now */ + case symbol_statement: + return L"a command"; + + default: + return format_string(L"a %ls", this->describe().c_str()); + } + } }; /* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */ @@ -407,6 +467,7 @@ class parse_ll_t void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(parse_token_t token, parse_error_code_t code, const wchar_t *format, ...); + void parse_error_failed_production(struct parse_stack_element_t &elem, parse_token_t token); void parse_error_unbalancing_token(parse_token_t token); void append_error_callout(wcstring &error_message, parse_token_t token); @@ -671,6 +732,12 @@ void parse_ll_t::parse_error_unbalancing_token(parse_token_t token) } } +// This is a 'generic' parse error when we can't match the top of the stack element +void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &stack_elem, parse_token_t token) +{ + this->parse_error(token, parse_error_generic, L"Expected %ls, but instead found %ls", stack_elem.user_presentable_description().c_str(), token.user_presentable_description().c_str()); +} + void parse_ll_t::report_tokenizer_error(parse_token_t token, const wchar_t *tok_error) { assert(tok_error != NULL); @@ -861,7 +928,7 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) { if (should_generate_error_messages) { - this->parse_error(token1, parse_error_generic, L"Unable to produce a '%ls' from input '%ls'", stack_elem.describe().c_str(), token1.describe().c_str()); + parse_error_failed_production(stack_elem, token1); } else { diff --git a/parse_tree.h b/parse_tree.h index aa7a0d984..d8874b119 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -51,6 +51,7 @@ struct parse_token_t size_t source_length; wcstring describe() const; + wcstring user_presentable_description() const; }; From fb882f0b695d88a3211cdae97f18d955427b0b18 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 1 Jan 2014 15:29:56 -0800 Subject: [PATCH 084/108] Support for stack overflow and infinite recursion detection in new parser --- builtin.cpp | 2 +- complete.cpp | 2 +- fish_tests.cpp | 8 ++++ parse_constants.h | 16 ++++--- parse_execution.cpp | 110 ++++++++++++++++++++++++++++++++++++++++---- parse_execution.h | 1 + parse_tree.cpp | 62 ++++++++++++++++++++++++- parse_tree.h | 10 +++- parser.cpp | 36 +++++---------- proc.cpp | 3 +- 10 files changed, 204 insertions(+), 46 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 215edc596..0bff83d38 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -690,7 +690,7 @@ static int builtin_bind(parser_t &parser, wchar_t **argv) default: { res = STATUS_BUILTIN_ERROR; - append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d"), argv[0], argc-woptind); + append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d\n"), argv[0], argc-woptind); break; } } diff --git a/complete.cpp b/complete.cpp index 6541ed502..c9db0f701 100644 --- a/complete.cpp +++ b/complete.cpp @@ -1230,7 +1230,7 @@ void completer_t::complete_from_args(const wcstring &str, std::vector possible_comp; bool is_autosuggest = (this->type() == COMPLETE_AUTOSUGGEST); - parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false); + parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false /* don't show errors */); /* If type is COMPLETE_AUTOSUGGEST, it means we're on a background thread, so don't call proc_push_interactive */ if (! is_autosuggest) diff --git a/fish_tests.cpp b/fish_tests.cpp index 83f8976d0..29f4e830b 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -659,6 +659,14 @@ static void test_parser() { err(L"Invalid block mode when evaluating undetected"); } + + /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */ +#if 1 + /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */ + say(L"Testing recursion detection"); + parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP); + parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP); +#endif } static void test_indents() diff --git a/parse_constants.h b/parse_constants.h index e923bc10e..12fcb114c 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -128,6 +128,8 @@ enum { typedef unsigned int parser_test_error_bits_t; +/** Maximum number of function calls. */ +#define FISH_MAX_STACK_DEPTH 128 /** Error message for tokenizer error. The tokenizer message is @@ -140,15 +142,15 @@ typedef unsigned int parser_test_error_bits_t; */ #define COND_ERR_MSG _( L"An additional command is required" ) -/** - Error message on a function that calls itself immediately -*/ +/** Error message on a function that calls itself immediately */ #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") -/** - Error message on reaching maximum recursion depth -*/ -#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?") +/** Error message on a function that calls itself immediately */ +#define INFINITE_FUNC_RECURSION_ERR_MSG _( L"The function '%ls' calls itself immediately, which would result in an infinite loop.") + + +/** Error message on reaching maximum call stack depth */ +#define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?") /** Error message used when the end of a block can't be located diff --git a/parse_execution.cpp b/parse_execution.cpp index 338a513da..bc478c05d 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -48,6 +48,81 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co return offset; } +const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const +{ + assert(job_list.type == symbol_job_list); + /* + This is a bit fragile. It is a test to see if we are + inside of function call, but not inside a block in that + function call. If, in the future, the rules for what + block scopes are pushed on function invocation changes, + then this check will break. + */ + const block_t *current = parser->block_at_index(0), *parent = parser->block_at_index(1); + bool is_within_function_call = (current && parent && current->type() == TOP && parent->type() == FUNCTION_CALL); + if (! is_within_function_call) + { + return NULL; + } + + /* Check to see which function call is forbidden */ + if (parser->forbidden_function.empty()) + { + return NULL; + } + const wcstring &forbidden_function_name = parser->forbidden_function.back(); + + /* Get the first job in the job list. */ + const parse_node_t *first_job = tree.next_job_in_job_list(job_list, NULL); + if (first_job == NULL) + { + return NULL; + } + + /* Here's the statement node we find that's infinite recursive */ + const parse_node_t *infinite_recursive_statement = NULL; + + /* Get the list of statements */ + const parse_node_tree_t::parse_node_list_t statements = tree.specific_statements_for_job(*first_job); + + /* Find all the decorated statements. We are interested in statements with no decoration (i.e. not command, not builtin) whose command expands to the forbidden function */ + for (size_t i=0; i < statements.size(); i++) + { + /* We only care about decorated statements, not while statements, etc. */ + const parse_node_t &statement = *statements.at(i); + if (statement.type != symbol_decorated_statement) + { + continue; + } + + const parse_node_t &plain_statement = tree.find_child(statement, symbol_plain_statement); + if (tree.decoration_for_plain_statement(plain_statement) != parse_statement_decoration_none) + { + /* This statement has a decoration like 'builtin' or 'command', and therefore is not infinite recursion. In particular this is what enables 'wrapper functions' */ + continue; + } + + /* Ok, this is an undecorated plain statement. Get and expand its command */ + wcstring cmd; + tree.command_for_plain_statement(plain_statement, src, &cmd); + expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); + + if (cmd == forbidden_function_name) + { + /* This is it */ + infinite_recursive_statement = &statement; + if (out_func_name != NULL) + { + *out_func_name = forbidden_function_name; + } + break; + } + } + + assert(infinite_recursive_statement == NULL || infinite_recursive_statement->type == symbol_decorated_statement); + return infinite_recursive_statement; +} + enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const { assert(plain_statement.type == symbol_plain_statement); @@ -528,12 +603,10 @@ parse_execution_result_t parse_execution_context_t::report_error(const parse_nod error.text = vformat_string(fmt, va); va_end(va); - /* Output the error */ - const wcstring desc = error.describe(this->src); - if (! desc.empty()) - { - fprintf(stderr, "%ls\n", desc.c_str()); - } + /* Get a backtrace */ + wcstring backtrace; + const parse_error_list_t error_list = parse_error_list_t(1, error); + parser->get_backtrace(src, error_list, &backtrace); return parse_execution_errored; } @@ -669,7 +742,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd); assert(got_cmd); - /* Expand it as a command. Return NULL on failure. */ + /* Expand it as a command. Return an error on failure. */ bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES); if (! expanded) { @@ -680,6 +753,13 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t /* Determine the process type */ enum process_type_t process_type = process_type_for_command(statement, cmd); + /* Check for stack overflow */ + if (process_type == INTERNAL_FUNCTION && parser->forbidden_function.size() > FISH_MAX_STACK_DEPTH) + { + this->report_error(statement, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); + return parse_execution_errored; + } + wcstring actual_cmd; if (process_type == EXTERNAL) { @@ -1296,10 +1376,24 @@ parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_off switch (node.type) { case symbol_job_list: + { /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */ assert(offset == 0); - status = this->run_job_list(node, associated_block); + wcstring func_name; + const parse_node_t *infinite_recursive_node = this->infinite_recursive_statement_in_job_list(node, &func_name); + if (infinite_recursive_node != NULL) + { + /* We have an infinite recursion */ + this->report_error(*infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str()); + status = parse_execution_errored; + } + else + { + /* No infinite recursion */ + status = this->run_job_list(node, associated_block); + } break; + } case symbol_block_statement: status = this->run_block_statement(node); diff --git a/parse_execution.h b/parse_execution.h index feeb7404a..0d924b8ec 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -70,6 +70,7 @@ class parse_execution_context_t wcstring get_source(const parse_node_t &node) const; const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; node_offset_t get_offset(const parse_node_t &node) const; + const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const; enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const; diff --git a/parse_tree.cpp b/parse_tree.cpp index 222af8af9..81973c61e 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1405,7 +1405,7 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi return result; } -const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) +const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) const { const parse_node_t *result = NULL; if (node.type == symbol_block_statement) @@ -1418,3 +1418,63 @@ const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const par } return result; } + +parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_job(const parse_node_t &job) const +{ + assert(job.type == symbol_job); + parse_node_list_t result; + + /* Initial statement (non-specific) */ + result.push_back(get_child(job, 0, symbol_statement)); + + /* Our cursor variable. Walk over the list of continuations. */ + const parse_node_t *continuation = get_child(job, 1, symbol_job_continuation); + while (continuation != NULL && continuation->child_count > 0) + { + result.push_back(get_child(*continuation, 1, symbol_statement)); + continuation = get_child(*continuation, 2, symbol_job_continuation); + } + + /* Result now contains a list of statements. But we want a list of specific statements e.g. symbol_switch_statement. So replace them in-place in the vector. */ + for (size_t i=0; i < result.size(); i++) + { + const parse_node_t *statement = result.at(i); + assert(statement->type == symbol_statement); + result.at(i) = this->get_child(*statement, 0); + } + + return result; +} + +const parse_node_t *parse_node_tree_t::next_job_in_job_list(const parse_node_t &top_job_list, const parse_node_t **out_list_tail) const +{ + assert(top_job_list.type == symbol_job_list); + + /* Our cursor variable */ + const parse_node_t *job_list = &top_job_list; + + /* Skip over a run of empty jobs */ + assert(job_list->type == symbol_job_list); + while (job_list->production_idx == 2) + { + job_list = this->get_child(*job_list, 1, symbol_job_list); + } + + /* Should now be at production 0 or 1 */ + assert(job_list->type == symbol_job_list); + assert(job_list->production_idx == 0 || job_list->production_idx == 1); + + /* Pull out the job */ + const parse_node_t *job = NULL; + const parse_node_t *list_tail = NULL; + if (job_list->production_idx == 1) + { + job = this->get_child(*job_list, 0, symbol_job); + list_tail = this->get_child(*job_list, 1, symbol_job_list); + } + + /* Return them */ + if (out_list_tail != NULL) + *out_list_tail = list_tail; + return job; +} diff --git a/parse_tree.h b/parse_tree.h index d8874b119..acfaa7396 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -209,7 +209,13 @@ public: enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const; /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ - const parse_node_t *header_node_for_block_statement(const parse_node_t &node); + const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const; + + /* Given a job list, returns the next job (or NULL), and the tail of the job list. */ + const parse_node_t *next_job_in_job_list(const parse_node_t &job_list, const parse_node_t **list_tail) const; + + /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */ + parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; }; /* Fish grammar: @@ -252,7 +258,7 @@ public: while_header = WHILE job begin_header = BEGIN -# Functions take arguments, and require at least one (the name) +# Functions take arguments, and require at least one (the name). No redirections allowed. function_header = FUNCTION argument argument_list # A boolean statement is AND or OR or NOT diff --git a/parser.cpp b/parser.cpp index 3cd1b98c3..6f5008174 100644 --- a/parser.cpp +++ b/parser.cpp @@ -47,11 +47,6 @@ The fish parser. Contains functions for parsing and evaluating code. #include "parse_tree.h" #include "parse_execution.h" -/** - Maximum number of function calls, i.e. recursion depth. -*/ -#define MAX_RECURSION_DEPTH 128 - /** Error message for unknown builtin */ @@ -78,11 +73,6 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") -/** - Error message on reaching maximum recursion depth -*/ -#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?") - /** Error message used when the end of a block can't be located */ @@ -584,12 +574,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...) va_start(va, str); err_buff = vformat_string(str, va); va_end(va); - - if (parser_use_ast()) - { - fprintf(stderr, "parser error: %ls\n", err_buff.c_str()); - err_buff.clear(); - } } /** @@ -746,7 +730,6 @@ void parser_t::print_errors_stderr() void parser_t::eval_args(const wchar_t *line, std::vector &args) { - expand_flags_t eflags = 0; if (! show_errors) eflags |= EXPAND_NO_DESCRIPTIONS; @@ -757,7 +740,7 @@ void parser_t::eval_args(const wchar_t *line, std::vector &args) if (! line) return; - // PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does. + // PCA we need to suppress calling proc_push_interactive off of the main thread. if (this->parser_type == PARSER_TYPE_GENERAL) proc_push_interactive(0); @@ -988,10 +971,7 @@ int parser_t::line_number_of_character_at_offset(size_t idx) const const wchar_t *parser_t::current_filename() const { - /* We query a global array for the current file name, so it only makes sense to ask this on the principal parser. */ ASSERT_IS_MAIN_THREAD(); - assert(this == &principal_parser()); - for (size_t i=0; i < this->block_count(); i++) { @@ -1002,7 +982,13 @@ const wchar_t *parser_t::current_filename() const return function_get_definition_file(fb->name); } } - return reader_current_filename(); + + /* We query a global array for the current file name, but only do that if we are the principal parser */ + if (this == &principal_parser()) + { + return reader_current_filename(); + } + return NULL; } /** @@ -1221,7 +1207,7 @@ void parser_t::job_promote(job_t *job) { signal_block(); - job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job); + job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job); assert(loc != my_job_list.end()); /* Move the job to the beginning */ @@ -1940,9 +1926,9 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok) /* Check if we have reached the maximum recursion depth */ - if (forbidden_function.size() > MAX_RECURSION_DEPTH) + if (forbidden_function.size() > FISH_MAX_STACK_DEPTH) { - error(SYNTAX_ERROR, tok_get_pos(tok), OVERFLOW_RECURSION_ERR_MSG); + error(SYNTAX_ERROR, tok_get_pos(tok), CALL_STACK_LIMIT_EXCEEDED_ERR_MSG); } else { diff --git a/proc.cpp b/proc.cpp index 65e906ac8..4a25b4517 100644 --- a/proc.cpp +++ b/proc.cpp @@ -136,7 +136,8 @@ static bool proc_had_barrier = false; int get_is_interactive(void) { ASSERT_IS_MAIN_THREAD(); - return is_interactive; + // The tests leave is_interactive as -1, which is interpreted as true. So let's have them default to false. + return is_interactive > 0; } bool get_proc_had_barrier() From 1863be7be43d18e5e24c2684df86d764ddb089cc Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 1 Jan 2014 15:49:41 -0800 Subject: [PATCH 085/108] Fix some warnings --- exec.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/exec.cpp b/exec.cpp index 594a53857..dba7b534e 100644 --- a/exec.cpp +++ b/exec.cpp @@ -701,7 +701,7 @@ void exec_job(parser_t &parser, job_t *j) j->first_process->completed=1; return; } - + assert(0 && "This should be unreachable"); } signal_block(); @@ -1133,6 +1133,20 @@ void exec_job(parser_t &parser, job_t *j) } break; } + + case EXTERNAL: + /* External commands are handled in the next switch statement below */ + break; + + case INTERNAL_EXEC: + /* We should have handled exec up above */ + assert(0 && "INTERNAL_EXEC process found in pipeline, where it should never be. Aborting."); + break; + + case INTERNAL_BUFFER: + /* Internal buffers are handled in the next switch statement below */ + break; + } if (exec_error) @@ -1493,7 +1507,13 @@ void exec_job(parser_t &parser, job_t *j) break; } - + + case INTERNAL_EXEC: + { + /* We should have handled exec up above */ + assert(0 && "INTERNAL_EXEC process found in pipeline, where it should never be. Aborting."); + break; + } } if (p->type == INTERNAL_BUILTIN) From 993148552efcffb60c7e1be7aa3322f1bffa1c74 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Thu, 2 Jan 2014 12:37:50 -0800 Subject: [PATCH 086/108] Support for Ctrl-C cancellation in new parser. Added tests for it too. --- fish_tests.cpp | 66 ++++++++++++++++++++++++++++++++++++++++++++- parse_execution.cpp | 8 +++++- parser.cpp | 19 ++++++++++++- parser.h | 3 +++ proc.cpp | 3 ++- reader.cpp | 8 +++--- 6 files changed, 98 insertions(+), 9 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 29f4e830b..ce61a40af 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -669,6 +669,66 @@ static void test_parser() #endif } +/* Wait a while and then SIGINT the main thread */ +struct test_cancellation_info_t +{ + pthread_t thread; + double delay; +}; + +static int signal_main(test_cancellation_info_t *info) +{ + usleep(info->delay * 1E6); + pthread_kill(info->thread, SIGINT); + return 0; +} + +static void test_1_cancellation(const wchar_t *src) +{ + shared_ptr out_buff(io_buffer_t::create(false, STDOUT_FILENO)); + const io_chain_t io_chain(out_buff); + test_cancellation_info_t ctx = {pthread_self(), 0.25 /* seconds */ }; + iothread_perform(signal_main, (void (*)(test_cancellation_info_t *, int))NULL, &ctx); + parser_t::principal_parser().eval(src, io_chain, TOP); + out_buff->read(); + if (out_buff->out_buffer_size() != 0) + { + err(L"Expected 0 bytes in out_buff, but instead found %lu bytes\n", out_buff->out_buffer_size()); + } + iothread_drain_all(); +} + +static void test_cancellation() +{ + say(L"Testing Ctrl-C cancellation. If this hangs, that's a bug!"); + + /* Enable fish's signal handling here. We need to make this interactive for fish to install its signal handlers */ + proc_push_interactive(1); + signal_set_handlers(); + + /* This tests that we can correctly ctrl-C out of certain loop constructs, and that nothing gets printed if we do */ + + /* Here the command substitution is an infinite loop. echo never even gets its argument, so when we cancel we expect no output */ + test_1_cancellation(L"echo (while true ; echo blah ; end)"); + fprintf(stderr, "."); + + /* Nasty infinite loop that doesn't actually execute anything */ + test_1_cancellation(L"echo (while true ; end) (while true ; end) (while true ; end)"); + fprintf(stderr, "."); + + test_1_cancellation(L"while true ; end"); + fprintf(stderr, "."); + + test_1_cancellation(L"for i in (whiel true ; end) ; end"); + fprintf(stderr, "."); + + + fprintf(stderr, "\n"); + /* Restore signal handling */ + proc_pop_interactive(); + signal_reset_handlers(); +} + static void test_indents() { say(L"Testing indents"); @@ -2640,12 +2700,15 @@ int main(int argc, char **argv) say(L"Testing low-level functionality"); set_main_thread(); setup_fork_guards(); - //proc_init(); //disabling this prevents catching SIGINT + proc_init(); event_init(); function_init(); builtin_init(); reader_init(); env_init(); + + /* Set default signal handlers, so we can ctrl-C out of this */ + signal_reset_handlers(); if (should_test_function("highlighting")) test_highlighting(); if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); @@ -2662,6 +2725,7 @@ int main(int argc, char **argv) if (should_test_function("fork")) test_fork(); if (should_test_function("iothread")) test_iothread(); if (should_test_function("parser")) test_parser(); + if (should_test_function("cancellation")) test_cancellation(); if (should_test_function("indents")) test_indents(); if (should_test_function("utils")) test_utils(); if (should_test_function("escape_sequences")) test_escape_sequences(); diff --git a/parse_execution.cpp b/parse_execution.cpp index bc478c05d..8ca50d565 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -173,6 +173,10 @@ parse_execution_context_t::execution_cancellation_reason_t parse_execution_conte { return execution_cancellation_exit; } + else if (parser && parser->cancellation_requested) + { + return execution_cancellation_skip; + } else if (block && block->loop_status != LOOP_NORMAL) { /* Nasty hack - break and continue set the 'skip' flag as well as the loop status flag. */ @@ -1241,12 +1245,14 @@ parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t /* Clean up the job on failure or cancellation */ bool populated_job = (pop_result == parse_execution_success); - if (! populated_job) + if (! populated_job || this->should_cancel_execution(associated_block)) { delete j; j = NULL; + populated_job = false; } + /* Store time it took to 'parse' the command */ if (do_profile) { diff --git a/parser.cpp b/parser.cpp index 6f5008174..a96f72b1c 100644 --- a/parser.cpp +++ b/parser.cpp @@ -315,6 +315,7 @@ parser_t::parser_t(enum parser_type_t type, bool errors) : show_errors(errors), error_code(0), err_pos(0), + cancellation_requested(false), current_tokenizer(NULL), current_tokenizer_pos(0), job_start_pos(0), @@ -343,6 +344,8 @@ void parser_t::skip_all_blocks(void) /* Tell all blocks to skip */ if (s_principal_parser) { + s_principal_parser->cancellation_requested = true; + //write(2, "Cancelling blocks\n", strlen("Cancelling blocks\n")); for (size_t i=0; i < s_principal_parser->block_count(); i++) { @@ -2614,7 +2617,20 @@ int parser_t::eval_block_node(node_offset_t node_idx, const io_chain_t &io, enum assert(ctx != NULL); CHECK_BLOCK(1); - + + /* Handle cancellation requests. If our block stack is currently empty, then we already did successfully cancel (or there was nothing to cancel); clear the flag. If our block stack is not empty, we are still in the process of cancelling; refuse to evaluate anything */ + if (this->cancellation_requested) + { + if (! block_stack.empty()) + { + return 1; + } + else + { + this->cancellation_requested = false; + } + } + /* Only certain blocks are allowed */ if ((block_type != TOP) && (block_type != SUBST)) @@ -2662,6 +2678,7 @@ int parser_t::eval(const wcstring &cmd_str, const io_chain_t &io, enum block_typ if (parser_use_ast()) return this->eval_new_parser(cmd_str, io, block_type); + const wchar_t * const cmd = cmd_str.c_str(); size_t forbid_count; int code; diff --git a/parser.h b/parser.h index f013a3b92..0c8c7334a 100644 --- a/parser.h +++ b/parser.h @@ -287,6 +287,9 @@ private: /** Position of last error */ int err_pos; + /** Indication that we should skip all blocks */ + bool cancellation_requested; + /** Stack of execution contexts. We own these pointers and must delete them */ std::vector execution_contexts; diff --git a/proc.cpp b/proc.cpp index 4a25b4517..4a1bfd5b5 100644 --- a/proc.cpp +++ b/proc.cpp @@ -136,7 +136,8 @@ static bool proc_had_barrier = false; int get_is_interactive(void) { ASSERT_IS_MAIN_THREAD(); - // The tests leave is_interactive as -1, which is interpreted as true. So let's have them default to false. + /* is_interactive is initialized to -1; ensure someone has popped/pushed it before then */ + assert(is_interactive >= 0); return is_interactive > 0; } diff --git a/reader.cpp b/reader.cpp index f5ae62f48..b03b84a80 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2068,11 +2068,9 @@ static void reader_interactive_destroy() void reader_sanity_check() { - if (get_is_interactive()) + /* Note: 'data' is non-null if we are interactive, except in the testing environment */ + if (get_is_interactive() && data != NULL) { - if (!data) - sanity_lose(); - if (!(data->buff_pos <= data->command_length())) sanity_lose(); @@ -2739,7 +2737,7 @@ static void reader_super_highlight_me_plenty(size_t match_highlight_pos) bool shell_is_exiting() { if (get_is_interactive()) - return job_list_is_empty() && data->end_loop; + return job_list_is_empty() && data != NULL && data->end_loop; else return end_loop; } From b8079953d89be2d05f28d95a141806e3bea87515 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 3 Jan 2014 15:27:39 -0800 Subject: [PATCH 087/108] Fix for cross-test interference where cancellation from one test would lead to failure in expansion test --- fish_tests.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index ce61a40af..6a52f1200 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -710,6 +710,7 @@ static void test_cancellation() /* Here the command substitution is an infinite loop. echo never even gets its argument, so when we cancel we expect no output */ test_1_cancellation(L"echo (while true ; echo blah ; end)"); + fprintf(stderr, "."); /* Nasty infinite loop that doesn't actually execute anything */ @@ -724,9 +725,13 @@ static void test_cancellation() fprintf(stderr, "\n"); + /* Restore signal handling */ proc_pop_interactive(); signal_reset_handlers(); + + /* Ensure that we don't think we should cancel */ + reader_reset_interrupted(); } static void test_indents() @@ -970,13 +975,13 @@ static int expand_test(const wchar_t *in, int flags, ...) size_t i=0; int res=1; wchar_t *arg; - + if (expand_string(in, output, flags)) { } - #if 0 + printf("input: %ls\n", in); for (size_t idx=0; idx < output.size(); idx++) { printf("%ls\n", output.at(idx).completion.c_str()); From c8eec109b2621c71ea23bbdf8c3334aeec4aed88 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 3 Jan 2014 16:54:34 -0800 Subject: [PATCH 088/108] Support in parse_dump_tree for "forests", which occur when there's a parse error --- parse_tree.cpp | 26 +++++++++++++++++++++----- parse_tree.h | 1 - 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/parse_tree.cpp b/parse_tree.cpp index 81973c61e..75ecc0da4 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -339,9 +339,17 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum toke } /* Helper function for dump_tree */ -static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, size_t *line) +static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, size_t *line, node_offset_t *inout_first_node_not_dumped) { assert(node_idx < nodes.size()); + + // Update first_node_not_dumped + // This takes a bit of explanation. While it's true that a parse tree may be a "forest", its individual trees are "compact," meaning they are not interleaved. Thus we keep track of the largest node index as we descend a tree. One past the largest is the start of the next tree. + if (*inout_first_node_not_dumped <= node_idx) + { + *inout_first_node_not_dumped = node_idx + 1; + } + const parse_node_t &node = nodes.at(node_idx); const size_t spacesPerIndent = 2; @@ -383,19 +391,27 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & ++*line; for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) { - dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line); + dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line, inout_first_node_not_dumped); } } -/* Gives a debugging textual description of a parse tree */ +/* Gives a debugging textual description of a parse tree. Note that this supports "parse forests" too. That is, our tree may not really be a tree, but instead a collection of trees. */ wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { if (nodes.empty()) return L"(empty!)"; - + + node_offset_t first_node_not_dumped = 0; size_t line = 0; wcstring result; - dump_tree_recursive(nodes, src, 0, 0, &result, &line); + while (first_node_not_dumped < nodes.size()) + { + if (first_node_not_dumped > 0) + { + result.append(L"---New Tree---\n"); + } + dump_tree_recursive(nodes, src, first_node_not_dumped, 0, &result, &line, &first_node_not_dumped); + } return result; } diff --git a/parse_tree.h b/parse_tree.h index acfaa7396..4f26674ea 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -272,7 +272,6 @@ public: argument_list = | argument argument_list - arguments_or_redirections_list = | argument_or_redirection arguments_or_redirections_list argument_or_redirection = argument | redirection From bf75731bbe479ced1a1afd3ec088c51fafbb4c12 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 3 Jan 2014 17:42:25 -0800 Subject: [PATCH 089/108] Fix for wrong syntax highlighting in the face of tokenizer errors, e.g. "echo 'hi" --- fish_tests.cpp | 9 ++++++++- parse_tree.cpp | 5 ++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 6a52f1200..0ce1092a2 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2638,8 +2638,15 @@ static void test_highlighting(void) {NULL, -1} }; + const highlight_component_t components10[] = + { + {L"echo", HIGHLIGHT_COMMAND}, + {L"'single_quote", HIGHLIGHT_ERROR}, + {NULL, -1} + }; + - const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9}; + const highlight_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10}; for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) { const highlight_component_t *components = tests[which]; diff --git a/parse_tree.cpp b/parse_tree.cpp index 75ecc0da4..d3953ca26 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1109,8 +1109,11 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags { if (parse_flags & parse_flag_continue_after_error) { + /* Hack hack hack. Typically the parse error is due to the first token. However, if it's a tokenizer error, then has_fatal_error was set due to the check above; in that case the second token is what matters. */ + size_t error_token_idx = (queue[1].type == parse_special_type_tokenizer_error ? 1 : 0); + /* Mark a special error token, and then keep going */ - const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[0].source_start, queue[0].source_length}; + const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length}; this->parser->accept_tokens(token, kInvalidToken); this->parser->reset_symbols(); } From 45852f0497b0c73d0d7e3547e77abec65842e0f4 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 5 Jan 2014 15:23:42 -0800 Subject: [PATCH 090/108] Add a convenience method next_node_in_node_list which can be used for iterating over lists in a parse tree --- parse_execution.cpp | 54 ++++++++++++------------------------------ parse_tree.cpp | 57 ++++++++++++++++++++++++++------------------- parse_tree.h | 4 ++-- 3 files changed, 50 insertions(+), 65 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index 8ca50d565..e5a9b9f3d 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -73,7 +73,7 @@ const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_j const wcstring &forbidden_function_name = parser->forbidden_function.back(); /* Get the first job in the job list. */ - const parse_node_t *first_job = tree.next_job_in_job_list(job_list, NULL); + const parse_node_t *first_job = tree.next_node_in_node_list(job_list, symbol_job, NULL); if (first_job == NULL) { return NULL; @@ -474,26 +474,26 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement(const p { /* Expand case statements */ const parse_node_t *case_item_list = get_child(statement, 3, symbol_case_item_list); - while (matching_case_item == NULL && case_item_list->child_count > 0) + + /* Loop while we don't have a match but do have more of the list */ + while (matching_case_item == NULL && case_item_list != NULL) { if (should_cancel_execution(sb)) { result = parse_execution_cancelled; break; } - - if (case_item_list->production_idx == 2) + + /* Get the next item and the remainder of the list */ + const parse_node_t *case_item = tree.next_node_in_node_list(*case_item_list, symbol_case_item, &case_item_list); + if (case_item == NULL) { - /* Hackish: blank line */ - case_item_list = get_child(*case_item_list, 1, symbol_case_item_list); - continue; + /* No more items */ + break; } - /* Pull out this case item and the rest of the list */ - const parse_node_t &case_item = *get_child(*case_item_list, 0, symbol_case_item); - /* Pull out the argument list */ - const parse_node_t &arg_list = *get_child(case_item, 1, symbol_argument_list); + const parse_node_t &arg_list = *get_child(*case_item, 1, symbol_argument_list); /* Expand arguments. We explicitly ignore unmatched_wildcard. That is, a case item list may have a wildcard that fails to expand to anything. */ const wcstring_list_t case_args = this->determine_arguments(arg_list, NULL); @@ -510,17 +510,14 @@ parse_execution_result_t parse_execution_context_t::run_switch_statement(const p /* If this matched, we're done */ if (match) { - matching_case_item = &case_item; + matching_case_item = case_item; break; } } - - /* Remainder of the list */ - case_item_list = get_child(*case_item_list, 1, symbol_case_item_list); } } - if (result == parse_execution_success && matching_case_item) + if (result == parse_execution_success && matching_case_item != NULL) { /* Success, evaluate the job list */ const parse_node_t *job_list = get_child(*matching_case_item, 3, symbol_job_list); @@ -1319,30 +1316,9 @@ parse_execution_result_t parse_execution_context_t::run_job_list(const parse_nod while (job_list != NULL && ! should_cancel_execution(associated_block)) { assert(job_list->type == symbol_job_list); - - // These correspond to the three productions of job_list + // Try pulling out a job - const parse_node_t *job = NULL; - switch (job_list->production_idx) - { - case 0: // empty - job_list = NULL; - break; - - case 1: //job, job_list - job = get_child(*job_list, 0, symbol_job); - job_list = get_child(*job_list, 1, symbol_job_list); - break; - - case 2: //blank line, job_list - job = NULL; - job_list = get_child(*job_list, 1, symbol_job_list); - break; - - default: //if we get here, it means more productions have been added to job_list, which is bad - fprintf(stderr, "Unexpected production in job_list: %lu\n", (unsigned long)job_list->production_idx); - PARSER_DIE(); - } + const parse_node_t *job = tree.next_node_in_node_list(*job_list, symbol_job, &job_list); if (job != NULL) { diff --git a/parse_tree.cpp b/parse_tree.cpp index d3953ca26..5134379d5 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1465,35 +1465,44 @@ parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_ return result; } -const parse_node_t *parse_node_tree_t::next_job_in_job_list(const parse_node_t &top_job_list, const parse_node_t **out_list_tail) const +const parse_node_t *parse_node_tree_t::next_node_in_node_list(const parse_node_t &node_list, parse_token_type_t entry_type, const parse_node_t **out_list_tail) const { - assert(top_job_list.type == symbol_job_list); + parse_token_type_t list_type = node_list.type; - /* Our cursor variable */ - const parse_node_t *job_list = &top_job_list; + /* Paranoia - it doesn't make sense for a list type to contain itself */ + assert(list_type != entry_type); - /* Skip over a run of empty jobs */ - assert(job_list->type == symbol_job_list); - while (job_list->production_idx == 2) + const parse_node_t *list_cursor = &node_list; + const parse_node_t *list_entry = NULL; + + /* Loop while we don't have an item but do have a list. Note that not every node in the list may contain an in item that we care about - e.g. job_list contains blank lines as a production */ + while (list_entry == NULL && list_cursor != NULL) { - job_list = this->get_child(*job_list, 1, symbol_job_list); + const parse_node_t *next_cursor = NULL; + + /* Walk through the children */ + for (size_t i=0; i < list_cursor->child_count; i++) + { + const parse_node_t *child = this->get_child(*list_cursor, i); + if (child->type == entry_type) + { + /* This is the list entry */ + list_entry = child; + } + else if (child->type == list_type) + { + /* This is the next in the list */ + next_cursor = child; + } + } + /* Go to the next entry, even if it's NULL */ + list_cursor = next_cursor; } - /* Should now be at production 0 or 1 */ - assert(job_list->type == symbol_job_list); - assert(job_list->production_idx == 0 || job_list->production_idx == 1); - - /* Pull out the job */ - const parse_node_t *job = NULL; - const parse_node_t *list_tail = NULL; - if (job_list->production_idx == 1) - { - job = this->get_child(*job_list, 0, symbol_job); - list_tail = this->get_child(*job_list, 1, symbol_job_list); - } - - /* Return them */ + /* Return what we got */ + assert(list_cursor == NULL || list_cursor->type == list_type); + assert(list_entry == NULL || list_entry->type == entry_type); if (out_list_tail != NULL) - *out_list_tail = list_tail; - return job; + *out_list_tail = list_cursor; + return list_entry; } diff --git a/parse_tree.h b/parse_tree.h index 4f26674ea..76cd80ec4 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -211,8 +211,8 @@ public: /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */ const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const; - /* Given a job list, returns the next job (or NULL), and the tail of the job list. */ - const parse_node_t *next_job_in_job_list(const parse_node_t &job_list, const parse_node_t **list_tail) const; + /* Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return the next element of the given type in that list, and the tail (by reference). Returns NULL if we've exhausted the list. */ + const parse_node_t *next_node_in_node_list(const parse_node_t &node_list, parse_token_type_t item_type, const parse_node_t **list_tail) const; /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */ parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; From cb6be2a50dbf6718fad88c3586e409a53c785324 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 7 Jan 2014 10:45:36 -0800 Subject: [PATCH 091/108] Support for "simple block" optimization, where we can run blocks directly if there are no arguments or redirections to the block itself --- parse_execution.cpp | 73 +++++++++++++++++++++++++++++++++++++++++---- parse_execution.h | 4 +++ 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index e5a9b9f3d..aeca11a72 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -19,6 +19,12 @@ #include "exec.h" #include "path.h" +/* These are the specific statement types that support redirections */ +static bool specific_statement_type_is_redirectable_block(const parse_node_t &node) +{ + return node.type == symbol_block_statement || node.type == symbol_if_statement || node.type == symbol_switch_statement; + +} parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &t, const wcstring &s, parser_t *p) : tree(t), src(s), parser(p), eval_level(0) { @@ -192,6 +198,41 @@ parse_execution_context_t::execution_cancellation_reason_t parse_execution_conte } } +/* Return whether the job contains a single statement, of block type, with no redirections */ +bool parse_execution_context_t::job_is_simple_block(const parse_node_t &job_node) const +{ + assert(job_node.type == symbol_job); + + /* Must have one statement */ + const parse_node_t &statement = *get_child(job_node, 0, symbol_statement); + const parse_node_t &specific_statement = *get_child(statement, 0); + if (! specific_statement_type_is_redirectable_block(specific_statement)) + { + /* Not an appropriate block type */ + return false; + } + + + /* Must be no pipes */ + const parse_node_t &continuation = *get_child(job_node, 1, symbol_job_continuation); + if (continuation.child_count > 0) + { + /* Multiple statements in this job, so there's pipes involved */ + return false; + } + + /* Check for arguments and redirections. All of the above types have an arguments / redirections list. It must be empty. */ + const parse_node_t &args_and_redirections = tree.find_child(specific_statement, symbol_arguments_or_redirections_list); + if (args_and_redirections.child_count > 0) + { + /* Non-empty, we have an argument or redirection */ + return false; + } + + /* Ok, we are a simple block! */ + return true; +} + parse_execution_result_t parse_execution_context_t::run_if_statement(const parse_node_t &statement) { assert(statement.type == symbol_if_statement); @@ -415,6 +456,9 @@ parse_execution_result_t parse_execution_context_t::run_for_statement(const pars } } } + + parser->pop_block(fb); + return ret; } @@ -1207,7 +1251,29 @@ parse_execution_result_t parse_execution_context_t::run_1_job(const parse_node_t /* Increment the eval_level for the duration of this command */ scoped_push saved_eval_level(&eval_level, eval_level + 1); - /* TODO: blocks-without-redirections optimization */ + /* When we encounter a block construct (e.g. while loop) in the general case, we create a "block process" that has a pointer to its source. This allows us to handle block-level redirections. However, if there are no redirections, then we can just jump into the block directly, which is significantly faster. */ + if (job_is_simple_block(job_node)) + { + const parse_node_t &statement = *get_child(job_node, 0, symbol_statement); + const parse_node_t &specific_statement = *get_child(statement, 0); + assert(specific_statement_type_is_redirectable_block(specific_statement)); + switch (specific_statement.type) + { + case symbol_block_statement: + return this->run_block_statement(specific_statement); + + case symbol_if_statement: + return this->run_if_statement(specific_statement); + + case symbol_switch_statement: + return this->run_switch_statement(specific_statement); + + default: + /* Other types should be impossible due to the specific_statement_type_is_redirectable_block check */ + PARSER_DIE(); + break; + } + } /* Profiling support */ long long start_time = 0, parse_time = 0, exec_time = 0; @@ -1349,10 +1415,7 @@ parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_off } /* Currently, we only expect to execute the top level job list, or a block node. Assert that. */ - assert(node.type == symbol_job_list || - node.type == symbol_block_statement || - node.type == symbol_if_statement || - node.type == symbol_switch_statement); + assert(node.type == symbol_job_list || specific_statement_type_is_redirectable_block(node)); enum parse_execution_result_t status = parse_execution_success; switch (node.type) diff --git a/parse_execution.h b/parse_execution.h index 0d924b8ec..6c022cb2b 100644 --- a/parse_execution.h +++ b/parse_execution.h @@ -64,6 +64,7 @@ class parse_execution_context_t /* Wildcard error helper */ parse_execution_result_t report_unmatched_wildcard_error(const parse_node_t &unmatched_wildcard); + /* Command not found support */ void handle_command_not_found(const wcstring &cmd, const parse_node_t &statement_node, int err_code); /* Utilities */ @@ -72,6 +73,9 @@ class parse_execution_context_t node_offset_t get_offset(const parse_node_t &node) const; const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const; + /* Indicates whether a job is a simple block (one block, no redirections) */ + bool job_is_simple_block(const parse_node_t &node) const; + enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const; /* These create process_t structures from statements */ From b34721b3f458210caeafae7665c6d32962a24f63 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 7 Jan 2014 14:57:58 -0800 Subject: [PATCH 092/108] Miscellaneous optimizations to reduce string copying --- builtin.cpp | 2 +- complete.cpp | 10 +++++++-- complete.h | 4 ++-- env.cpp | 1 + expand.cpp | 54 ++++++++++++++++++++++++++------------------- parse_execution.cpp | 8 +++---- parser.cpp | 6 ++--- 7 files changed, 50 insertions(+), 35 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index 0bff83d38..61f7ed8b8 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4460,7 +4460,7 @@ void builtin_get_names(std::vector &list) { for (size_t i=0; i < BUILTIN_COUNT; i++) { - list.push_back(completion_t(builtin_datas[i].name)); + append_completion(list, builtin_datas[i].name); } } diff --git a/complete.cpp b/complete.cpp index 49f3df3ef..5676d5bb3 100644 --- a/complete.cpp +++ b/complete.cpp @@ -466,7 +466,13 @@ void completion_autoload_t::command_removed(const wcstring &cmd) /** Create a new completion entry */ void append_completion(std::vector &completions, const wcstring &comp, const wcstring &desc, complete_flags_t flags, string_fuzzy_match_t match) { - completions.push_back(completion_t(comp, desc, match, flags)); + /* If we just constructed the completion and used push_back, we would get two string copies. Try to avoid that by making a stubby completion in the vector first, and then copying our string in. */ + completions.push_back(completion_t(wcstring())); + completion_t *last = &completions.back(); + last->completion = comp; + last->description = desc; + last->match = match; + last->flags = flags; } /** @@ -1191,7 +1197,7 @@ void completer_t::complete_cmd(const wcstring &str_cmd, bool use_function, bool wcstring_list_t names = function_get_names(str_cmd.at(0) == L'_'); for (size_t i=0; i < names.size(); i++) { - possible_comp.push_back(completion_t(names.at(i))); + append_completion(possible_comp, names.at(i)); } this->complete_strings(str_cmd, 0, &complete_function_desc, possible_comp, 0); diff --git a/complete.h b/complete.h index cd4f22a16..fa00c3e5b 100644 --- a/complete.h +++ b/complete.h @@ -124,7 +124,7 @@ public: int flags; /* Construction. Note: defining these so that they are not inlined reduces the executable size. */ - completion_t(const wcstring &comp, const wcstring &desc = L"", string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact), int flags_val = 0); + completion_t(const wcstring &comp, const wcstring &desc = wcstring(), string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact), int flags_val = 0); completion_t(const completion_t &); completion_t &operator=(const completion_t &); @@ -268,7 +268,7 @@ void complete_load(const wcstring &cmd, bool reload); \param flags completion flags */ -void append_completion(std::vector &completions, const wcstring &comp, const wcstring &desc = L"", int flags = 0, string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact)); +void append_completion(std::vector &completions, const wcstring &comp, const wcstring &desc = wcstring(), int flags = 0, string_fuzzy_match_t match = string_fuzzy_match_t(fuzzy_match_exact)); /* Function used for testing */ void complete_set_variable_names(const wcstring_list_t *names); diff --git a/env.cpp b/env.cpp index 13f87b6cc..acf132c8a 100644 --- a/env.cpp +++ b/env.cpp @@ -892,6 +892,7 @@ int env_set(const wcstring &key, const wchar_t *val, int var_mode) if (!is_universal) { event_t ev = event_t::variable_event(key); + ev.arguments.reserve(3); ev.arguments.push_back(L"VARIABLE"); ev.arguments.push_back(L"SET"); ev.arguments.push_back(key); diff --git a/expand.cpp b/expand.cpp index a73118b89..6216da222 100644 --- a/expand.cpp +++ b/expand.cpp @@ -785,7 +785,15 @@ static int expand_pid(const wcstring &instr_with_sep, expand_flags_t flags, std::vector &out) { - + /* Hack. If there's no INTERNAL_SEP and no PROCESS_EXPAND, then there's nothing to do. Check out this "null terminated string." */ + const wchar_t some_chars[] = {INTERNAL_SEPARATOR, PROCESS_EXPAND, L'\0'}; + if (instr_with_sep.find_first_of(some_chars) == wcstring::npos) + { + /* Nothing to do */ + append_completion(out, instr_with_sep); + return 1; + } + /* expand_string calls us with internal separators in instr...sigh */ wcstring instr = instr_with_sep; remove_internal_separator(instr, false); @@ -1372,7 +1380,7 @@ static int expand_brackets(parser_t &parser, const wcstring &instr, int flags, s /** Perform cmdsubst expansion */ -static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector &outList) +static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector &out_list) { wchar_t *paran_begin=0, *paran_end=0; std::vector sub_res; @@ -1390,7 +1398,7 @@ static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector< L"Mismatched parenthesis"); return 0; case 0: - outList.push_back(completion_t(input)); + append_completion(out_list, input); return 1; case 1: @@ -1455,15 +1463,15 @@ static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector< */ for (i=0; i &output, expa if ((!(flags & ACCEPT_INCOMPLETE)) && expand_is_clean(input.c_str())) { - output.push_back(completion_t(input)); + append_completion(output, input); return EXPAND_OK; } @@ -1681,7 +1689,7 @@ int expand_string(const wcstring &input, std::vector &output, expa parser.error(CMDSUBST_ERROR, -1, L"Command substitutions not allowed"); return EXPAND_ERROR; } - in->push_back(completion_t(input)); + append_completion(*in, input); } else { @@ -1709,7 +1717,7 @@ int expand_string(const wcstring &input, std::vector &output, expa next[i] = L'$'; } } - out->push_back(completion_t(next)); + append_completion(*out, next); } else { @@ -1725,7 +1733,7 @@ int expand_string(const wcstring &input, std::vector &output, expa for (i=0; i < in->size(); i++) { - wcstring next = in->at(i).completion; + const wcstring &next = in->at(i).completion; if (!expand_brackets(parser, next, flags, *out)) { @@ -1745,7 +1753,7 @@ int expand_string(const wcstring &input, std::vector &output, expa if (flags & ACCEPT_INCOMPLETE) { - if (next[0] == PROCESS_EXPAND) + if (! next.empty() && next.at(0) == PROCESS_EXPAND) { /* If process expansion matches, we are not @@ -1758,7 +1766,7 @@ int expand_string(const wcstring &input, std::vector &output, expa } else { - out->push_back(completion_t(next)); + append_completion(*out, next); } } else @@ -1840,7 +1848,7 @@ int expand_string(const wcstring &input, std::vector &output, expa { if (!(flags & ACCEPT_INCOMPLETE)) { - out->push_back(completion_t(next_str)); + append_completion(*out, next_str); } } } @@ -1970,19 +1978,19 @@ bool fish_openSUSE_dbus_hack_hack_hack_hack(std::vector *args) val.resize(last_good + 1); args->clear(); - args->push_back(completion_t(L"set")); + append_completion(*args, L"set"); if (key == L"DBUS_SESSION_BUS_ADDRESS") - args->push_back(completion_t(L"-x")); - args->push_back(completion_t(key)); - args->push_back(completion_t(val)); + append_completion(*args, L"-x"); + append_completion(*args, key); + append_completion(*args, val); result = true; } else if (string_prefixes_string(L"export DBUS_SESSION_BUS_ADDRESS;", cmd)) { /* Nothing, we already exported it */ args->clear(); - args->push_back(completion_t(L"echo")); - args->push_back(completion_t(L"-n")); + append_completion(*args, L"echo"); + append_completion(*args, L"-n"); result = true; } } diff --git a/parse_execution.cpp b/parse_execution.cpp index aeca11a72..c628b1d63 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -805,11 +805,11 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t return parse_execution_errored; } - wcstring actual_cmd; + wcstring path_to_external_command; if (process_type == EXTERNAL) { /* Determine the actual command. This may be an implicit cd. */ - bool has_command = path_get_path(cmd, &actual_cmd); + bool has_command = path_get_path(cmd, &path_to_external_command); /* If there was no command, then we care about the value of errno after checking for it, to distinguish between e.g. no file vs permissions problem */ const int no_cmd_err_code = errno; @@ -843,7 +843,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t /* Implicit cd is simple */ argument_list.push_back(L"cd"); argument_list.push_back(cmd); - actual_cmd.clear(); + path_to_external_command.clear(); /* If we have defined a wrapper around cd, use it, otherwise use the cd builtin */ process_type = function_exists(L"cd") ? INTERNAL_FUNCTION : INTERNAL_BUILTIN; @@ -878,7 +878,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t proc->type = process_type; proc->set_argv(argument_list); proc->set_io_chain(process_io_chain); - proc->actual_cmd = actual_cmd; + proc->actual_cmd = path_to_external_command; return parse_execution_success; } diff --git a/parser.cpp b/parser.cpp index a96f72b1c..c2ef76814 100644 --- a/parser.cpp +++ b/parser.cpp @@ -1939,7 +1939,7 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok) } } } - args.push_back(completion_t(nxt)); + append_completion(args, nxt); } if (error_code == 0) @@ -1982,8 +1982,8 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok) if (use_implicit_cd) { args.clear(); - args.push_back(completion_t(L"cd")); - args.push_back(completion_t(implicit_cd_path)); + append_completion(args, L"cd"); + append_completion(args, implicit_cd_path); /* If we have defined a wrapper around cd, use it, otherwise use the cd builtin */ if (use_function && function_exists(L"cd")) From d69f408b14a267ff2968f21f24241cb3e87102de Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 8 Jan 2014 15:13:08 -0800 Subject: [PATCH 093/108] Rename builtin parse to __fish_parse --- builtin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin.cpp b/builtin.cpp index 61f7ed8b8..e475e7774 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4315,6 +4315,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) static const builtin_data_t builtin_datas[]= { { L"[", &builtin_test, N_(L"Test a condition") }, + { L"__fish_parse", &builtin_parse, N_(L"Try out the new parser") }, { L"and", &builtin_generic, N_(L"Execute command if previous command suceeded") }, { L"begin", &builtin_begin, N_(L"Create a block of code") }, { L"bg", &builtin_bg, N_(L"Send job to background") }, @@ -4346,7 +4347,6 @@ static const builtin_data_t builtin_datas[]= { L"jobs", &builtin_jobs, N_(L"Print currently running jobs") }, { L"not", &builtin_generic, N_(L"Negate exit status of job") }, { L"or", &builtin_generic, N_(L"Execute command if previous command failed") }, - { L"parse", &builtin_parse, N_(L"Try out the new parser") }, { L"printf", &builtin_printf, N_(L"Prints formatted text") }, { L"pwd", &builtin_pwd, N_(L"Print the working directory") }, { L"random", &builtin_random, N_(L"Generate random number") }, From 0e9d159bc2e95d71ae2051957397bc689e020e42 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 8 Jan 2014 18:20:55 -0800 Subject: [PATCH 094/108] Improvements to error reporting. In particular, we now append a newline in reader_shell_test, so that there's always a statement terminator. Otherwise commands like 'echo |' would not be considered an error (just incomplete). --- parse_execution.cpp | 34 ++++++++++++++++++++-------------- reader.cpp | 15 ++++++++++----- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index c628b1d63..518ff479a 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -638,20 +638,26 @@ parse_execution_result_t parse_execution_context_t::run_while_statement(const pa /* Reports an error. Always returns parse_execution_errored, so you can assign the result to an 'errored' variable */ parse_execution_result_t parse_execution_context_t::report_error(const parse_node_t &node, const wchar_t *fmt, ...) { - parse_error_t error; - error.source_start = node.source_start; - error.source_length = node.source_length; - error.code = parse_error_syntax; //hackish - - va_list va; - va_start(va, fmt); - error.text = vformat_string(fmt, va); - va_end(va); - - /* Get a backtrace */ - wcstring backtrace; - const parse_error_list_t error_list = parse_error_list_t(1, error); - parser->get_backtrace(src, error_list, &backtrace); + if (parser->show_errors) + { + /* Create an error */ + parse_error_t error; + error.source_start = node.source_start; + error.source_length = node.source_length; + error.code = parse_error_syntax; //hackish + + va_list va; + va_start(va, fmt); + error.text = vformat_string(fmt, va); + va_end(va); + + /* Get a backtrace */ + wcstring backtrace_and_desc; + const parse_error_list_t error_list = parse_error_list_t(1, error); + parser->get_backtrace(src, error_list, &backtrace_and_desc); + + fprintf(stderr, "%ls", backtrace_and_desc.c_str()); + } return parse_execution_errored; } diff --git a/reader.cpp b/reader.cpp index b03b84a80..c5857f900 100644 --- a/reader.cpp +++ b/reader.cpp @@ -2470,21 +2470,26 @@ void reader_run_command(parser_t &parser, const wcstring &cmd) int reader_shell_test(const wchar_t *b) { + assert(b != NULL); wcstring bstr = b; + + /* Append a newline, to act as a statement terminator */ + bstr.push_back(L'\n'); + parse_error_list_t errors; int res = parse_util_detect_errors(bstr, &errors); if (res & PARSER_TEST_ERROR) { - wcstring sb; - parser_t::principal_parser().get_backtrace(bstr, errors, &sb); + wcstring error_desc; + parser_t::principal_parser().get_backtrace(bstr, errors, &error_desc); // ensure we end with a newline. Also add an initial newline, because it's likely the user just hit enter and so there's junk on the current line - if (! string_suffixes_string(L"\n", sb)) + if (! string_suffixes_string(L"\n", error_desc)) { - sb.push_back(L'\n'); + error_desc.push_back(L'\n'); } - fwprintf(stderr, L"\n%ls", sb.c_str()); + fwprintf(stderr, L"\n%ls", error_desc.c_str()); } return res; } From 1130e4782de6378a7ad2cbb39cfb1626d7233fe8 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 00:11:29 -0800 Subject: [PATCH 095/108] Remove some unused defines --- parse_constants.h | 23 +---------------------- parser.cpp | 5 ----- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/parse_constants.h b/parse_constants.h index 12fcb114c..266d27e1c 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -131,20 +131,6 @@ typedef unsigned int parser_test_error_bits_t; /** Maximum number of function calls. */ #define FISH_MAX_STACK_DEPTH 128 -/** - Error message for tokenizer error. The tokenizer message is - appended to this message. -*/ -#define TOK_ERR_MSG _( L"Tokenizer error: '%ls'") - -/** - Error message for short circuit command error. -*/ -#define COND_ERR_MSG _( L"An additional command is required" ) - -/** Error message on a function that calls itself immediately */ -#define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.") - /** Error message on a function that calls itself immediately */ #define INFINITE_FUNC_RECURSION_ERR_MSG _( L"The function '%ls' calls itself immediately, which would result in an infinite loop.") @@ -152,14 +138,7 @@ typedef unsigned int parser_test_error_bits_t; /** Error message on reaching maximum call stack depth */ #define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?") -/** - Error message used when the end of a block can't be located -*/ -#define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.") - -/** - Error message when a non-string token is found when expecting a command name -*/ +/** Error message when a non-string token is found when expecting a command name */ #define CMD_ERR_MSG _( L"Expected a command name, got token of type '%ls'") /** diff --git a/parser.cpp b/parser.cpp index c2ef76814..5f3f5dc1a 100644 --- a/parser.cpp +++ b/parser.cpp @@ -78,11 +78,6 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.") -/** - Error message when a non-string token is found when expecting a command name -*/ -#define CMD_ERR_MSG _( L"Expected a command name, got token of type '%ls'") - /** Error message when a non-string token is found when expecting a command name */ From 89069fdaa4fff2f8b703fef0be260eb4d4c2ed9b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 13:33:35 -0800 Subject: [PATCH 096/108] Miscellaneous minor fixes based on cppcheck static analyzer --- autoload.cpp | 3 +-- builtin.cpp | 10 +++------- builtin_commandline.cpp | 10 +++------- builtin_complete.cpp | 3 ++- builtin_jobs.cpp | 3 +-- builtin_set.cpp | 7 +++---- common.cpp | 11 +++++------ complete.cpp | 4 ++-- parse_tree.cpp | 5 ++--- 9 files changed, 22 insertions(+), 34 deletions(-) diff --git a/autoload.cpp b/autoload.cpp index 3470ecae6..e5d2334df 100644 --- a/autoload.cpp +++ b/autoload.cpp @@ -195,7 +195,6 @@ autoload_function_t *autoload_t::get_autoloaded_function_with_creation(const wcs bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_load, bool reload, const wcstring_list_t &path_list) { /* Note that we are NOT locked in this function! */ - size_t i; bool reloaded = 0; /* Try using a cached function. If we really want the function to be loaded, require that it be really loaded. If we're not reloading, allow stale functions. */ @@ -276,7 +275,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_ if (! has_script_source) { /* Iterate over path searching for suitable completion files */ - for (i=0; i indexes; wcstring_list_t result; @@ -713,9 +712,9 @@ static int builtin_set(parser_t &parser, wchar_t **argv) retcode = 1; break; } - - val_count = argc-woptind-1; - idx_count = indexes.size(); + + size_t idx_count = indexes.size(); + size_t val_count = argc-woptind-1; if (!erase) { diff --git a/common.cpp b/common.cpp index a36f4169b..a600432b0 100644 --- a/common.cpp +++ b/common.cpp @@ -105,7 +105,7 @@ void show_stackframe() return; void *trace[32]; - int i, trace_size = 0; + int trace_size = 0; trace_size = backtrace(trace, 32); char **messages = backtrace_symbols(trace, trace_size); @@ -113,7 +113,7 @@ void show_stackframe() if (messages) { debug(0, L"Backtrace:"); - for (i=0; iresult_mode & NO_COMMON) use_common = false; if (o->result_mode & NO_FILES) use_files = false; - complete_from_args(str, o->comp.c_str(), o->localized_desc(), o->flags); + complete_from_args(str, o->comp, o->localized_desc(), o->flags); } } @@ -1697,7 +1697,7 @@ bool completer_t::complete_variable(const wcstring &str, size_t start_offset) desc = format_string(COMPLETE_VAR_DESC_VAL, value.c_str()); } - append_completion(this->completions, comp.c_str(), desc.c_str(), flags, match); + append_completion(this->completions, comp, desc, flags, match); res = true; } diff --git a/parse_tree.cpp b/parse_tree.cpp index 5134379d5..ee401a451 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -765,7 +765,7 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; if (this->should_generate_error_messages) { - wcstring desc = token_type_description(token.type); + wcstring desc = token.user_presentable_description(); this->parse_error(token, parse_error_generic, L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); } } @@ -881,8 +881,7 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token) } else { - const wcstring expected = token_type_description(stack_top.type); - const wcstring actual = token_type_description(token.type); + const wcstring expected = stack_top.user_presentable_description(); this->parse_error(expected.c_str(), token); } } From b2c78dbd57a7c891aa176f1a571f36bd765e9ffb Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 13:42:26 -0800 Subject: [PATCH 097/108] Fix for issue where we compute a default USER variable, but never set its value. --- env.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/env.cpp b/env.cpp index acf132c8a..ab7f63e5d 100644 --- a/env.cpp +++ b/env.cpp @@ -299,7 +299,6 @@ static bool var_is_locale(const wcstring &key) static void handle_locale() { const env_var_t lc_all = env_get_string(L"LC_ALL"); - int i; const wcstring old_locale = wsetlocale(LC_MESSAGES, NULL); /* @@ -330,7 +329,7 @@ static void handle_locale() wsetlocale(LC_ALL, lang.c_str()); } - for (i=2; locale_variable[i]; i++) + for (int i=2; locale_variable[i]; i++) { const env_var_t val = env_get_string(locale_variable[i]); @@ -479,7 +478,7 @@ static void env_set_defaults() if (pw->pw_name != NULL) { const wcstring wide_name = str2wcstring(pw->pw_name); - env_set(L"USER", NULL, ENV_GLOBAL); + env_set(L"USER", wide_name.c_str(), ENV_GLOBAL); } } From 40e223c12d7b0199098408201f1b15e70a9918b7 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 13:53:59 -0800 Subject: [PATCH 098/108] Additional miscellaneous fixes based on cppcheck static analysis --- common.cpp | 4 ++-- common.h | 4 ++-- env_universal.cpp | 7 ++----- event.cpp | 5 +---- reader.cpp | 5 ++--- 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/common.cpp b/common.cpp index a600432b0..518625600 100644 --- a/common.cpp +++ b/common.cpp @@ -761,7 +761,7 @@ void debug_safe(int level, const char *msg, const char *param1, const char *para errno = errno_old; } -void format_long_safe(char buff[128], long val) +void format_long_safe(char buff[64], long val) { if (val == 0) { @@ -795,7 +795,7 @@ void format_long_safe(char buff[128], long val) } } -void format_long_safe(wchar_t buff[128], long val) +void format_long_safe(wchar_t buff[64], long val) { if (val == 0) { diff --git a/common.h b/common.h index 6cf75759f..86c5bc6c9 100644 --- a/common.h +++ b/common.h @@ -348,8 +348,8 @@ void format_size_safe(char buff[128], unsigned long long sz); void debug_safe(int level, const char *msg, const char *param1 = NULL, const char *param2 = NULL, const char *param3 = NULL, const char *param4 = NULL, const char *param5 = NULL, const char *param6 = NULL, const char *param7 = NULL, const char *param8 = NULL, const char *param9 = NULL, const char *param10 = NULL, const char *param11 = NULL, const char *param12 = NULL); /** Writes out a long safely */ -void format_long_safe(char buff[128], long val); -void format_long_safe(wchar_t buff[128], long val); +void format_long_safe(char buff[64], long val); +void format_long_safe(wchar_t buff[64], long val); template diff --git a/env_universal.cpp b/env_universal.cpp index a9e7462a6..ed7396f6a 100644 --- a/env_universal.cpp +++ b/env_universal.cpp @@ -426,8 +426,6 @@ void env_universal_barrier() void env_universal_set(const wcstring &name, const wcstring &value, bool exportv) { - message_t *msg; - if (!s_env_univeral_inited) return; @@ -439,7 +437,7 @@ void env_universal_set(const wcstring &name, const wcstring &value, bool exportv } else { - msg = create_message(exportv?SET_EXPORT:SET, + message_t *msg = create_message(exportv?SET_EXPORT:SET, name.c_str(), value.c_str()); @@ -459,7 +457,6 @@ int env_universal_remove(const wchar_t *name) { int res; - message_t *msg; if (!s_env_univeral_inited) return 1; @@ -476,7 +473,7 @@ int env_universal_remove(const wchar_t *name) } else { - msg= create_message(ERASE, name, 0); + message_t *msg = create_message(ERASE, name, 0); msg->count=1; env_universal_server.unsent.push(msg); env_universal_barrier(); diff --git a/event.cpp b/event.cpp index d2b219e7d..a6fab6205 100644 --- a/event.cpp +++ b/event.cpp @@ -568,9 +568,6 @@ static void event_fire_internal(const event_t &event) */ static void event_fire_delayed() { - - size_t i; - /* If is_event is one, we are running the event-handler non-recursively. @@ -582,7 +579,7 @@ static void event_fire_delayed() { event_list_t new_blocked; - for (i=0; icursor_pos; - this->autosuggestion = completion_apply_to_command_line(comp.completion.c_str(), comp.flags, this->search_string, &cursor, true /* append only */); + this->autosuggestion = completion_apply_to_command_line(comp.completion, comp.flags, this->search_string, &cursor, true /* append only */); return 1; } @@ -2200,7 +2200,6 @@ static void handle_token_history(int forward, int reset) */ if (data->history_search.go_backwards()) { - wcstring item = data->history_search.current_string(); data->token_history_buff = data->history_search.current_string(); } current_pos = data->token_history_buff.size(); @@ -3325,7 +3324,7 @@ const wchar_t *reader_readline(void) { //history_reset(); data->history_search.go_to_end(); - reader_set_buffer(data->search_buff.c_str(), data->search_buff.size()); + reader_set_buffer(data->search_buff, data->search_buff.size()); } else { From ec469782c8f146476de28453e971642095e4f381 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 15:10:59 -0800 Subject: [PATCH 099/108] Improvements to error messages with new parser. In particular, "naked builtins" now print their help (e.g. just 'while') --- builtin.cpp | 40 +++++++++++++++++++------ parse_tree.cpp | 81 +++++++++++++++++++++++--------------------------- 2 files changed, 69 insertions(+), 52 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index b52c8e9eb..e4a3bf6ff 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -1038,20 +1038,22 @@ static int builtin_emit(parser_t &parser, wchar_t **argv) static int builtin_generic(parser_t &parser, wchar_t **argv) { int argc=builtin_count_args(argv); + + /* Hackish - if we have no arguments other than the command, we are a "naked invocation" and we just print help */ + if (argc == 1) + { + builtin_print_help(parser, argv[0], stdout_buffer); + return STATUS_BUILTIN_ERROR; + } + woptind=0; static const struct woption long_options[] = { - { - L"help", no_argument, 0, 'h' - } - , - { - 0, 0, 0, 0 - } - } - ; + { L"help", no_argument, 0, 'h' }, + { 0, 0, 0, 0 } + }; while (1) { @@ -2060,6 +2062,13 @@ int define_function(parser_t &parser, const wcstring_list_t &c_args, const wcstr */ static int builtin_function(parser_t &parser, wchar_t **argv) { + /* Hack hack hack - with the new parser, this is only invoked for help */ + if (parser_use_ast()) + { + builtin_print_help(parser, argv[0], stdout_buffer); + return STATUS_BUILTIN_OK; + } + int argc = builtin_count_args(argv); int res=STATUS_BUILTIN_OK; wchar_t *desc=0; @@ -3662,6 +3671,12 @@ static int builtin_for(parser_t &parser, wchar_t **argv) int argc = builtin_count_args(argv); int res=STATUS_BUILTIN_ERROR; + /* Hackish - if we have no arguments other than the command, we are a "naked invocation" and we just print help */ + if (argc == 1) + { + builtin_print_help(parser, argv[0], stdout_buffer); + return STATUS_BUILTIN_ERROR; + } if (argc < 3) { @@ -4048,6 +4063,13 @@ static int builtin_switch(parser_t &parser, wchar_t **argv) { int res=STATUS_BUILTIN_OK; int argc = builtin_count_args(argv); + + /* Hackish - if we have no arguments other than the command, we are a "naked invocation" and we just print help */ + if (argc == 1) + { + builtin_print_help(parser, argv[0], stdout_buffer); + return STATUS_BUILTIN_ERROR; + } if (argc != 2) { diff --git a/parse_tree.cpp b/parse_tree.cpp index ee401a451..536520fc8 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -240,6 +240,39 @@ wcstring keyword_description(parse_keyword_t k) return format_string(L"Unknown keyword type %ld", static_cast(k)); } +static wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) +{ + if (keyword != parse_keyword_none) + { + return format_string(L"keyword '%ls'", keyword_description(keyword).c_str()); + } + + switch (type) + { + /* Hackish. We only support the following types. */ + case symbol_statement: + return L"a command"; + + case parse_token_type_string: + return L"a string"; + + case parse_token_type_pipe: + return L"a pipe"; + + case parse_token_type_redirection: + return L"a redirection"; + + case parse_token_type_background: + return L"a '&'"; + + case parse_token_type_end: + return L"end of the statement"; + + default: + return format_string(L"a %ls", token_type_description(type).c_str()); + } +} + /** Returns a string description of the given parse node */ wcstring parse_node_t::describe(void) const { @@ -263,32 +296,7 @@ wcstring parse_token_t::describe() const /** A string description appropriate for presentation to the user */ wcstring parse_token_t::user_presentable_description() const { - if (keyword != parse_keyword_none) - { - return format_string(L"keyword %ls", keyword_description(keyword).c_str()); - } - - switch (type) - { - /* Hackish. We only support the */ - case parse_token_type_string: - return L"a string"; - - case parse_token_type_pipe: - return L"a pipe"; - - case parse_token_type_redirection: - return L"a redirection"; - - case parse_token_type_background: - return L"a '&'"; - - case parse_token_type_end: - return L"statement terminator"; - - default: - return format_string(L"a %ls", this->describe().c_str()); - } + return token_type_user_presentable_description(type, keyword); } /* Convert from tokenizer_t's token type to a parse_token_t type */ @@ -443,20 +451,7 @@ struct parse_stack_element_t /* Returns a name that we can show to the user, e.g. "a command" */ wcstring user_presentable_description(void) const { - if (keyword != parse_keyword_none) - { - return format_string(L"keyword %ls", keyword_description(keyword).c_str()); - } - - switch (type) - { - /* Hackish, the only one we support now */ - case symbol_statement: - return L"a command"; - - default: - return format_string(L"a %ls", this->describe().c_str()); - } + return token_type_user_presentable_description(type, keyword); } }; @@ -751,7 +746,8 @@ void parse_ll_t::parse_error_unbalancing_token(parse_token_t token) // This is a 'generic' parse error when we can't match the top of the stack element void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &stack_elem, parse_token_t token) { - this->parse_error(token, parse_error_generic, L"Expected %ls, but instead found %ls", stack_elem.user_presentable_description().c_str(), token.user_presentable_description().c_str()); + const wcstring expected = stack_elem.user_presentable_description(); + this->parse_error(expected.c_str(), token); } void parse_ll_t::report_tokenizer_error(parse_token_t token, const wchar_t *tok_error) @@ -765,8 +761,7 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token) fatal_errored = true; if (this->should_generate_error_messages) { - wcstring desc = token.user_presentable_description(); - this->parse_error(token, parse_error_generic, L"Expected a %ls, instead got a token of type %ls", expected, desc.c_str()); + this->parse_error(token, parse_error_generic, L"Expected %ls, but instead found %ls", expected, token.user_presentable_description().c_str()); } } From 096f8504335577d05392436ddcffd5bceabef6d4 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 22:39:12 -0800 Subject: [PATCH 100/108] Eliminate class parse_t --- builtin.cpp | 4 +- complete.cpp | 2 +- fish_tests.cpp | 145 +++++++++++++++++++++---------------------------- highlight.cpp | 6 +- parse_tree.cpp | 63 ++++----------------- parse_tree.h | 25 +-------- parse_util.cpp | 4 +- parser.cpp | 2 +- reader.cpp | 2 +- 9 files changed, 86 insertions(+), 167 deletions(-) diff --git a/builtin.cpp b/builtin.cpp index e4a3bf6ff..4aab71d43 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -4298,7 +4298,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) const wcstring src = str2wcstring(&txt.at(0), txt.size()); parse_node_tree_t parse_tree; parse_error_list_t errors; - bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors, true); + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors, true); if (! success) { stdout_buffer.append(L"Parsing failed:\n"); @@ -4311,7 +4311,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv) stdout_buffer.append(L"(Reparsed with continue after error)\n"); parse_tree.clear(); errors.clear(); - parse_t::parse(src, parse_flag_continue_after_error, &parse_tree, &errors, true); + parse_tree_from_string(src, parse_flag_continue_after_error, &parse_tree, &errors, true); } const wcstring dump = parse_dump_tree(parse_tree, src); stdout_buffer.append(dump); diff --git a/complete.cpp b/complete.cpp index fdc62e1e9..33e0536bd 100644 --- a/complete.cpp +++ b/complete.cpp @@ -1839,7 +1839,7 @@ void complete(const wcstring &cmd_with_subcmds, std::vector &comps //const wcstring prev_token(prev_begin, prev_token_len); parse_node_tree_t tree; - parse_t::parse(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL); + parse_tree_from_string(cmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL); /* Find the plain statement that contains the position */ const parse_node_t *plain_statement = tree.find_node_matching_source_location(symbol_plain_statement, pos, NULL); diff --git a/fish_tests.cpp b/fish_tests.cpp index 0ce1092a2..070004b1b 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -660,11 +660,11 @@ static void test_parser() err(L"Invalid block mode when evaluating undetected"); } - /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */ -#if 1 /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */ say(L"Testing recursion detection"); parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP); +#if 0 + /* This is disabled since it produces a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */ parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP); #endif } @@ -720,10 +720,9 @@ static void test_cancellation() test_1_cancellation(L"while true ; end"); fprintf(stderr, "."); - test_1_cancellation(L"for i in (whiel true ; end) ; end"); + test_1_cancellation(L"for i in (while true ; end) ; end"); fprintf(stderr, "."); - fprintf(stderr, "\n"); /* Restore signal handling */ @@ -2234,7 +2233,7 @@ static void test_new_parser_correctness(void) const parser_test_t *test = &parser_tests[i]; parse_node_tree_t parse_tree; - bool success = parse_t::parse(test->src, parse_flag_none, &parse_tree, NULL); + bool success = parse_tree_from_string(test->src, parse_flag_none, &parse_tree, NULL); say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no"); if (success && ! test->ok) { @@ -2248,95 +2247,75 @@ static void test_new_parser_correctness(void) say(L"Parse tests complete"); } -struct parser_fuzz_token_t +/* Given that we have an array of 'fuzz_count' strings, we wish to enumerate all permutations of 'len' values. We do this by incrementing an integer, interpreting it as "base fuzz_count". */ +static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_count, size_t len, size_t permutation, wcstring *out_str) { - parse_token_type_t token_type; - parse_keyword_t keyword; - - parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none) + out_str->clear(); + + size_t remaining_permutation = permutation; + for (size_t i=0; i < len; i++) { + size_t idx = remaining_permutation % fuzz_count; + remaining_permutation /= fuzz_count; + + out_str->append(fuzzes[idx]); + out_str->push_back(L' '); } -}; - -static bool increment(std::vector &tokens) -{ - size_t i, end = tokens.size(); - for (i=0; i < end; i++) - { - bool wrapped = false; - - struct parser_fuzz_token_t &token = tokens[i]; - bool incremented_in_keyword = false; - if (token.token_type == parse_token_type_string) - { - // try incrementing the keyword - token.keyword++; - if (token.keyword <= LAST_KEYWORD) - { - incremented_in_keyword = true; - } - else - { - token.keyword = parse_keyword_none; - incremented_in_keyword = false; - } - } - - if (! incremented_in_keyword) - { - token.token_type++; - // Skip the very special parse_token_type_terminate, since that's always the last thing delivered - if (token.token_type == parse_token_type_terminate) - { - token.token_type++; - } - - if (token.token_type > LAST_TERMINAL_TYPE) - { - token.token_type = FIRST_TERMINAL_TYPE; - wrapped = true; - } - } - - if (! wrapped) - { - break; - } - } - return i == end; + // Return false if we wrapped + return remaining_permutation == 0; } static void test_new_parser_fuzzing(void) { say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t)); + const wcstring fuzzes[] = + { + L"if", + L"else", + L"for", + L"in", + L"while", + L"begin", + L"function", + L"switch", + L"case", + L"end", + L"and", + L"or", + L"not", + L"command", + L"builtin", + L"foo", + L"|", + L"^", + L"&", + L";", + }; + + /* Generate a list of strings of all keyword / token combinations. */ + wcstring src; + src.reserve(128); + + parse_node_tree_t node_tree; + parse_error_list_t errors; + double start = timef(); - bool log_it = false; - // ensure nothing crashes - size_t max = 4; - for (size_t len=1; len <= max; len++) + bool log_it = true; + size_t max_len = 5; + for (size_t len = 0; len < max_len; len++) { if (log_it) - fprintf(stderr, "%lu / %lu...", len, max); - std::vector tokens(len); - size_t count = 0; - parse_t parser; - parse_node_tree_t parse_tree; - do - { - parser.clear(); - parse_tree.clear(); - count++; - for (size_t i=0; i < len; i++) - { - const parser_fuzz_token_t &token = tokens[i]; - parser.parse_1_token(token.token_type, token.keyword, &parse_tree, NULL); - } + fprintf(stderr, "%lu / %lu...", len, max_len); - // keep going until we wrap + /* We wish to look at all permutations of 4 elements of 'fuzzes' (with replacement). Construct an int and keep incrementing it. */ + size_t permutation = 0; + while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++, &src)) + { + parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors); } - while (! increment(tokens)); if (log_it) - fprintf(stderr, "done (%lu)\n", count); + fprintf(stderr, "done (%lu)\n", permutation); + } double end = timef(); if (log_it) @@ -2352,7 +2331,7 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o bool result = false; parse_node_tree_t tree; - if (parse_t::parse(src, parse_flag_none, &tree, NULL)) + if (parse_tree_from_string(src, parse_flag_none, &tree, NULL)) { /* Get the statement. Should only have one */ const parse_node_tree_t::parse_node_list_t stmt_nodes = tree.find_nodes(tree.at(0), symbol_plain_statement); @@ -2432,7 +2411,7 @@ static void test_new_parser_ad_hoc() /* Ensure that 'case' terminates a job list */ const wcstring src = L"switch foo ; case bar; case baz; end"; parse_node_tree_t parse_tree; - bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL); + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL); if (! success) { err(L"Parsing failed"); @@ -2476,7 +2455,7 @@ static void test_new_parser_errors(void) parse_error_list_t errors; parse_node_tree_t parse_tree; - bool success = parse_t::parse(src, parse_flag_none, &parse_tree, &errors); + bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors); if (success) { err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); diff --git a/highlight.cpp b/highlight.cpp index e9923fb00..f24bd6f19 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -714,7 +714,7 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Find the last statement */ const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL); @@ -1716,7 +1716,7 @@ class highlighter_t { /* Parse the tree */ this->parse_tree.clear(); - parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &this->parse_tree, NULL); } /* Perform highlighting, returning an array of colors */ @@ -2062,7 +2062,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() /* Parse the buffer */ parse_node_tree_t parse_tree; - parse_t::parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); + parse_tree_from_string(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL); #if 0 const wcstring dump = parse_dump_tree(parse_tree, buff); diff --git a/parse_tree.cpp b/parse_tree.cpp index 536520fc8..b55f43bb5 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -973,15 +973,6 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) } } -parse_t::parse_t() : parser(new parse_ll_t()) -{ -} - -parse_t::~parse_t() -{ - delete parser; -} - static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) { parse_keyword_t result = parse_keyword_none; @@ -1056,9 +1047,10 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok) return result; } -bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) +bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) { - this->parser->set_should_generate_error_messages(errors != NULL); + parse_ll_t parser; + parser.set_should_generate_error_messages(errors != NULL); /* Construct the tokenizer */ tok_flags_t tok_options = 0; @@ -1090,16 +1082,16 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags } /* Pass these two tokens. We know that queue[0] is valid; queue[1] may be invalid. */ - this->parser->accept_tokens(queue[0], queue[1]); + parser.accept_tokens(queue[0], queue[1]); /* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */ if (queue[1].type == parse_special_type_tokenizer_error) { - this->parser->report_tokenizer_error(queue[1], tok_last(&tok)); + parser.report_tokenizer_error(queue[1], tok_last(&tok)); } /* Handle errors */ - if (this->parser->has_fatal_error()) + if (parser.has_fatal_error()) { if (parse_flags & parse_flag_continue_after_error) { @@ -1108,8 +1100,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags /* Mark a special error token, and then keep going */ const parse_token_t token = {parse_special_type_parse_error, parse_keyword_none, false, queue[error_token_idx].source_start, queue[error_token_idx].source_length}; - this->parser->accept_tokens(token, kInvalidToken); - this->parser->reset_symbols(); + parser.accept_tokens(token, kInvalidToken); + parser.reset_symbols(); } else { @@ -1123,10 +1115,10 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags // Teach each node where its source range is - this->parser->determine_node_ranges(); + parser.determine_node_ranges(); // Acquire the output from the parser - this->parser->acquire_output(output, errors); + parser.acquire_output(output, errors); #if 0 //wcstring result = dump_tree(this->parser->nodes, str); @@ -1135,40 +1127,7 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags #endif // Indicate if we had a fatal error - return ! this->parser->has_fatal_error(); -} - -bool parse_t::parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it) -{ - parse_t parse; - return parse.parse_internal(str, flags, output, errors, log_it); -} - -bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors) -{ - const parse_token_t invalid_token = {token_type_invalid, parse_keyword_none, -1, -1}; - - // Only strings can have keywords. So if we have a keyword, the type must be a string - assert(keyword == parse_keyword_none || token_type == parse_token_type_string); - - parse_token_t token; - token.type = token_type; - token.keyword = keyword; - token.source_start = -1; - token.source_length = 0; - - bool wants_errors = (errors != NULL); - this->parser->set_should_generate_error_messages(wants_errors); - - /* Passing invalid_token here is totally wrong. This code is only used in testing however. */ - this->parser->accept_tokens(token, invalid_token); - - return ! this->parser->has_fatal_error(); -} - -void parse_t::clear() -{ - this->parser->reset_symbols_and_nodes(); + return ! parser.has_fatal_error(); } const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const diff --git a/parse_tree.h b/parse_tree.h index 76cd80ec4..6ce82299d 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -74,28 +74,6 @@ enum }; typedef unsigned int parse_tree_flags_t; -class parse_ll_t; -class parse_t -{ - parse_ll_t * const parser; - - bool parse_internal(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); - -public: - parse_t(); - ~parse_t(); - - /* Parse a string all at once */ - static bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); - - /* Parse a single token */ - bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors); - - /* Reset, ready to parse something else */ - void clear(); - -}; - wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src); wcstring token_type_description(parse_token_type_t type); @@ -218,6 +196,9 @@ public: parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; }; +/* The big entry point. Parse a string! */ +bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false); + /* Fish grammar: # A job_list is a list of jobs, separated by semicolons or newlines diff --git a/parse_util.cpp b/parse_util.cpp index cf196db1f..027a0e9bf 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -878,7 +878,7 @@ std::vector parse_util_compute_indents(const wcstring &src) /* Parse the string. We pass continue_after_error to produce a forest; the trailing indent of the last node we visited becomes the input indent of the next. I.e. in the case of 'switch foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it were a case item list */ parse_node_tree_t tree; - parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); + parse_tree_from_string(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */); /* Start indenting at the first node. If we have a parse error, we'll have to start indenting from the top again */ node_offset_t start_node_idx = 0; @@ -994,7 +994,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars // Parse the input string into a parse tree // Some errors are detected here - bool parsed = parse_t::parse(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors); + bool parsed = parse_tree_from_string(buff_src, parse_flag_leave_unterminated, &node_tree, &parse_errors); if (! parsed) { errored = true; diff --git a/parser.cpp b/parser.cpp index 5f3f5dc1a..e22596898 100644 --- a/parser.cpp +++ b/parser.cpp @@ -2581,7 +2581,7 @@ int parser_t::eval_new_parser(const wcstring &cmd, const io_chain_t &io, enum bl /* Parse the source into a tree, if we can */ parse_node_tree_t tree; - if (! parse_t::parse(cmd, parse_flag_none, &tree, NULL)) + if (! parse_tree_from_string(cmd, parse_flag_none, &tree, NULL)) { return 1; } diff --git a/reader.cpp b/reader.cpp index a2bda51e4..6cf77ae41 100644 --- a/reader.cpp +++ b/reader.cpp @@ -664,7 +664,7 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso /* Parse this subcmd */ parse_node_tree_t parse_tree; - parse_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); + parse_tree_from_string(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL); /* Look for plain statements where the cursor is at the end of the command */ const parse_node_t *matching_cmd_node = NULL; From 25b25e3e49b0489f35458370b8422b10e24db422 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 12 Jan 2014 23:52:33 -0800 Subject: [PATCH 101/108] Include missing algorithm header in parse_execution.cpp --- parse_execution.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/parse_execution.cpp b/parse_execution.cpp index 518ff479a..70454d15e 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -18,6 +18,7 @@ #include "wutil.h" #include "exec.h" #include "path.h" +#include /* These are the specific statement types that support redirections */ static bool specific_statement_type_is_redirectable_block(const parse_node_t &node) From 51c18e3f9a490cb6ae990442e783f7a0f106d5f1 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 13 Jan 2014 00:05:43 -0800 Subject: [PATCH 102/108] Turn off new parser for execution by default in preparation for merge with trunk --- parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser.cpp b/parser.cpp index e22596898..6831290d1 100644 --- a/parser.cpp +++ b/parser.cpp @@ -3129,7 +3129,7 @@ bool parser_use_ast(void) env_var_t var = env_get_string(L"fish_new_parser"); if (var.missing_or_empty()) { - return 10; + return 0; } else { From 746cc4c10b40cff7d0c88eac2448864506d8098b Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 13 Jan 2014 00:31:25 -0800 Subject: [PATCH 103/108] Rename highlight_shell_magic to highlight_shell_new_parser --- fish_tests.cpp | 1 - highlight.cpp | 4 ++-- highlight.h | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 070004b1b..e3da9a7f3 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -59,7 +59,6 @@ #include "iothread.h" #include "postfork.h" #include "signal.h" -#include "highlight.h" #include "parse_tree.h" #include "parse_util.h" diff --git a/highlight.cpp b/highlight.cpp index f24bd6f19..32a8a27d7 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -1245,7 +1245,7 @@ void highlight_shell(const wcstring &buff, std::vector &color, size_t pos, { if (1) { - highlight_shell_magic(buff, color, pos, error, vars); + highlight_shell_new_parser(buff, color, pos, error, vars); } else { @@ -2186,7 +2186,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight() return color_array; } -void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) +void highlight_shell_new_parser(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) { /* Do something sucky and get the current working directory on this background thread. This should really be passed in. */ const wcstring working_directory = env_get_pwd_slash(); diff --git a/highlight.h b/highlight.h index eb123258c..40a535518 100644 --- a/highlight.h +++ b/highlight.h @@ -84,7 +84,7 @@ struct file_detection_context_t; \param error a list in which a description of each error will be inserted. May be 0, in whcich case no error descriptions will be generated. */ void highlight_shell(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); -void highlight_shell_magic(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_new_parser(const wcstring &buffstr, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); /** Perform syntax highlighting for the text in buff. Matching quotes and paranthesis are highlighted. The result is @@ -136,7 +136,7 @@ bool is_potential_path(const wcstring &const_path, const wcstring_list_t &direct /* For testing */ void highlight_shell_classic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); -void highlight_shell_magic(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); +void highlight_shell_new_parser(const wcstring &buff, std::vector &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars); #endif From 95f87cdd56602fe7c2746519f875741c325d2df8 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 13 Jan 2014 02:24:11 -0800 Subject: [PATCH 104/108] Support for special && and || error messages in new parser --- fish_tests.cpp | 5 ++++- parse_constants.h | 16 +++++++-------- parse_tree.cpp | 51 +++++++++++++++++++++++++++++++++++++---------- parse_tree.h | 2 +- parser.cpp | 17 ++++------------ 5 files changed, 57 insertions(+), 34 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index e3da9a7f3..319a3872a 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2444,7 +2444,10 @@ static void test_new_parser_errors(void) {L"if true ; end ; else", parse_error_unbalancing_else}, {L"case", parse_error_unbalancing_case}, - {L"if true ; case ; end", parse_error_unbalancing_case} + {L"if true ; case ; end", parse_error_unbalancing_case}, + + {L"foo || bar", parse_error_double_pipe}, + {L"foo && bar", parse_error_double_background}, }; for (size_t i = 0; i < sizeof tests / sizeof *tests; i++) diff --git a/parse_constants.h b/parse_constants.h index 266d27e1c..e910507a6 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -119,6 +119,9 @@ enum parse_error_code_t parse_error_unbalancing_end, //end outside of block parse_error_unbalancing_else, //else outside of if parse_error_unbalancing_case, //case outside of switch + + parse_error_double_pipe, // foo || bar, has special error message + parse_error_double_background // foo && bar, has special error message }; enum { @@ -138,18 +141,15 @@ typedef unsigned int parser_test_error_bits_t; /** Error message on reaching maximum call stack depth */ #define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?") -/** Error message when a non-string token is found when expecting a command name */ -#define CMD_ERR_MSG _( L"Expected a command name, got token of type '%ls'") +/** + Error message when a non-string token is found when expecting a command name +*/ +#define CMD_OR_ERR_MSG _( L"Expected a command, but instead found a pipe. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.") /** Error message when a non-string token is found when expecting a command name */ -#define CMD_OR_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.") - -/** - Error message when a non-string token is found when expecting a command name -*/ -#define CMD_AND_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.") +#define CMD_AND_ERR_MSG _( L"Expected a command, but instead found a '&'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.") /** Error message when encountering an illegal command name diff --git a/parse_tree.cpp b/parse_tree.cpp index b55f43bb5..41873bbe9 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1,6 +1,7 @@ #include "parse_productions.h" #include "tokenizer.h" #include "fallback.h" +#include "wutil.h" #include "proc.h" #include #include @@ -746,8 +747,43 @@ void parse_ll_t::parse_error_unbalancing_token(parse_token_t token) // This is a 'generic' parse error when we can't match the top of the stack element void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &stack_elem, parse_token_t token) { - const wcstring expected = stack_elem.user_presentable_description(); - this->parse_error(expected.c_str(), token); + fatal_errored = true; + if (this->should_generate_error_messages) + { + bool done = false; + + /* Check for || */ + if (token.type == parse_token_type_pipe && token.source_start > 0) + { + /* Here we wanted a statement and instead got a pipe. See if this is a double pipe: foo || bar. If so, we have a special error message. */ + const parse_node_t *prev_pipe = nodes.find_node_matching_source_location(parse_token_type_pipe, token.source_start - 1, NULL); + if (prev_pipe != NULL) + { + /* The pipe of the previous job abuts our current token. So we have ||. */ + this->parse_error(token, parse_error_double_pipe, CMD_OR_ERR_MSG); + done = true; + } + } + + /* Check for && */ + if (! done && token.type == parse_token_type_background && token.source_start > 0) + { + /* Check to see if there was a previous token_background */ + const parse_node_t *prev_background = nodes.find_node_matching_source_location(parse_token_type_background, token.source_start - 1, NULL); + if (prev_background != NULL) + { + /* We have &&. */ + this->parse_error(token, parse_error_double_background, CMD_AND_ERR_MSG); + done = true; + } + } + + if (! done) + { + const wcstring expected = stack_elem.user_presentable_description(); + this->parse_error(expected.c_str(), token); + } + } } void parse_ll_t::report_tokenizer_error(parse_token_t token, const wchar_t *tok_error) @@ -936,15 +972,8 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2) const production_t *production = production_for_token(stack_elem.type, token1, token2, &node.production_idx, NULL /* error text */); if (production == NULL) { - if (should_generate_error_messages) - { - parse_error_failed_production(stack_elem, token1); - } - else - { - this->parse_error(token1, parse_error_generic, NULL); - } - // parse_error sets fatal_errored, which ends the loop + parse_error_failed_production(stack_elem, token1); + // the above sets fatal_errored, which ends the loop } else { diff --git a/parse_tree.h b/parse_tree.h index 6ce82299d..5cc0a4ccc 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -166,7 +166,7 @@ public: /* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */ const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const; - /* Finds a node containing the given source location */ + /* Finds a node containing the given source location. If 'parent' is not NULL, it must be an ancestor. */ const parse_node_t *find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const; /* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */ diff --git a/parser.cpp b/parser.cpp index 6831290d1..3995d0393 100644 --- a/parser.cpp +++ b/parser.cpp @@ -78,15 +78,8 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define BLOCK_END_ERR_MSG _( L"Could not locate end of block. The 'end' command is missing, misspelled or a ';' is missing.") -/** - Error message when a non-string token is found when expecting a command name -*/ -#define CMD_OR_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.") - -/** - Error message when a non-string token is found when expecting a command name -*/ -#define CMD_AND_ERR_MSG _( L"Expected a command name, got token of type '%ls'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.") +/** Error message when a non-string token is found when expecting a command name */ +#define CMD_ERR_MSG _( L"Expected a command name, got token of type '%ls'") /** Error message when encountering an illegal command name @@ -1692,8 +1685,7 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok) { error(SYNTAX_ERROR, tok_get_pos(tok), - CMD_OR_ERR_MSG, - tok_get_desc(tok_last_type(tok))); + CMD_OR_ERR_MSG); } else { @@ -2530,8 +2522,7 @@ void parser_t::eval_job(tokenizer_t *tok) { error(SYNTAX_ERROR, tok_get_pos(tok), - CMD_AND_ERR_MSG, - tok_get_desc(tok_last_type(tok))); + CMD_AND_ERR_MSG); } else { From d9056081e78cc847574589d8ca31f2e25a82b6f5 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 13 Jan 2014 02:49:41 -0800 Subject: [PATCH 105/108] Added tests for piping an fd other than stdout --- parse_execution.cpp | 4 ++-- tests/test1.in | 8 +++++++- tests/test1.out | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/parse_execution.cpp b/parse_execution.cpp index 70454d15e..cc0a4ddd2 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -1011,7 +1011,7 @@ bool parse_execution_context_t::determine_io_chain(const parse_node_t &statement if (old_fd < 0 || errno || *end) { errored = report_error(redirect_node, - _(L"Requested redirection to something that is not a file descriptor %ls"), + _(L"Requested redirection to '%ls', which is not a valid file descriptor"), target.c_str()); } else @@ -1186,7 +1186,7 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node(j { assert(job_cont->type == symbol_job_continuation); - /* Handle the pipe, whose fd may not be the obvious stdoud */ + /* Handle the pipe, whose fd may not be the obvious stdout */ const parse_node_t &pipe_node = *get_child(*job_cont, 0, parse_token_type_pipe); processes.back()->pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); diff --git a/tests/test1.in b/tests/test1.in index c180159c8..7f60a4dad 100644 --- a/tests/test1.in +++ b/tests/test1.in @@ -15,7 +15,7 @@ echo x-{1} echo x-{1,2} echo foo-{1,2{3,4}} -# Escpaed newlines +# Escaped newlines echo foo\ bar echo foo\ bar @@ -99,6 +99,12 @@ echo Test 5 $sta echo Test redirections begin ; echo output ; echo errput 1>&2 ; end 2>&1 | tee /tmp/tee_test.txt ; cat /tmp/tee_test.txt +# Verify that we can pipe something other than stdout +# The first line should be printed, since we output to stdout but pipe stderr to /dev/null +# The second line should not be printed, since we output to stderr and pipe it to /dev/null +begin ; echo is_stdout ; end 2>| cat > /dev/null +begin ; echo is_stderr 1>&2 ; end 2>| cat > /dev/null + # echo tests echo 'abc\ndef' diff --git a/tests/test1.out b/tests/test1.out index c6ecbb308..b3460cdde 100644 --- a/tests/test1.out +++ b/tests/test1.out @@ -23,6 +23,7 @@ errput output errput output +is_stdout abc\ndef abc def From 6fc1d7dc7705c9e6376ae220f1f7afa85434dc26 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 13 Jan 2014 03:57:59 -0800 Subject: [PATCH 106/108] Further cleanup and improvements to error messages --- parse_constants.h | 72 +++++---------------------------------------- parse_execution.cpp | 8 ++++- parser.cpp | 7 +---- 3 files changed, 15 insertions(+), 72 deletions(-) diff --git a/parse_constants.h b/parse_constants.h index e910507a6..104af27f4 100644 --- a/parse_constants.h +++ b/parse_constants.h @@ -141,36 +141,21 @@ typedef unsigned int parser_test_error_bits_t; /** Error message on reaching maximum call stack depth */ #define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?") -/** - Error message when a non-string token is found when expecting a command name -*/ +/** Error message when a non-string token is found when expecting a command name */ #define CMD_OR_ERR_MSG _( L"Expected a command, but instead found a pipe. Did you mean 'COMMAND; or COMMAND'? See the help section for the 'or' builtin command by typing 'help or'.") -/** - Error message when a non-string token is found when expecting a command name -*/ +/** Error message when a non-string token is found when expecting a command name */ #define CMD_AND_ERR_MSG _( L"Expected a command, but instead found a '&'. Did you mean 'COMMAND; and COMMAND'? See the help section for the 'and' builtin command by typing 'help and'.") -/** - Error message when encountering an illegal command name -*/ +/** Error message when encountering an illegal command name */ #define ILLEGAL_CMD_ERR_MSG _( L"Illegal command name '%ls'") -/** - Error message when encountering an illegal file descriptor -*/ -#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor '%ls'") +/** Error message when encountering an illegal file descriptor */ +#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor in redirection '%ls'") -/** - Error message for wildcards with no matches -*/ +/** Error message for wildcards with no matches */ #define WILDCARD_ERR_MSG _( L"No matches for wildcard '%ls'.") -/** - Error when using case builtin outside of switch block -*/ -#define INVALID_CASE_ERR_MSG _( L"'case' builtin not inside of switch block") - /** Error when using break outside of loop */ #define INVALID_BREAK_ERR_MSG _( L"break command while not inside of loop" ) @@ -180,52 +165,9 @@ typedef unsigned int parser_test_error_bits_t; /** Error when using return builtin outside of function definition */ #define INVALID_RETURN_ERR_MSG _( L"'return' builtin command outside of function definition" ) -/** - Error when using else builtin outside of if block -*/ -#define INVALID_ELSE_ERR_MSG _( L"'%ls' builtin not inside of if block" ) - -/** - Error when using 'else if' past a naked 'else' -*/ -#define INVALID_ELSEIF_PAST_ELSE_ERR_MSG _( L"'%ls' used past terminating 'else'" ) - -/** - Error when using end builtin outside of block -*/ -#define INVALID_END_ERR_MSG _( L"'end' command outside of block") - -/** - Error message for Posix-style assignment: foo=bar -*/ +/** Error message for Posix-style assignment: foo=bar */ #define COMMAND_ASSIGN_ERR_MSG _( L"Unknown command '%ls'. Did you mean 'set %ls %ls'? See the help section on the set command by typing 'help set'.") -/** - Error for invalid redirection token -*/ -#define REDIRECT_TOKEN_ERR_MSG _( L"Expected redirection specification, got token of type '%ls'") - -/** - Error when encountering redirection without a command -*/ -#define INVALID_REDIRECTION_ERR_MSG _( L"Encountered redirection when expecting a command name. Fish does not allow a redirection operation before a command.") - -/** - Error for evaluating null pointer -*/ -#define EVAL_NULL_ERR_MSG _( L"Tried to evaluate null pointer." ) - -/** - Error for evaluating in illegal scope -*/ -#define INVALID_SCOPE_ERR_MSG _( L"Tried to evaluate commands using invalid block type '%ls'" ) - - -/** - Error for wrong token type -*/ -#define UNEXPECTED_TOKEN_ERR_MSG _( L"Unexpected token of type '%ls'") - /** While block description */ diff --git a/parse_execution.cpp b/parse_execution.cpp index cc0a4ddd2..d30f3b994 100644 --- a/parse_execution.cpp +++ b/parse_execution.cpp @@ -1188,7 +1188,13 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node(j /* Handle the pipe, whose fd may not be the obvious stdout */ const parse_node_t &pipe_node = *get_child(*job_cont, 0, parse_token_type_pipe); - processes.back()->pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); + int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe_node)); + if (pipe_write_fd == -1) + { + result = report_error(pipe_node, ILLEGAL_FD_ERR_MSG, get_source(pipe_node).c_str()); + break; + } + processes.back()->pipe_write_fd = pipe_write_fd; /* Get the statement node and make a process from it */ const parse_node_t *statement_node = get_child(*job_cont, 1, symbol_statement); diff --git a/parser.cpp b/parser.cpp index 3995d0393..ffcec02ad 100644 --- a/parser.cpp +++ b/parser.cpp @@ -89,7 +89,7 @@ The fish parser. Contains functions for parsing and evaluating code. /** Error message when encountering an illegal file descriptor */ -#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor '%ls'") +#define ILLEGAL_FD_ERR_MSG _( L"Illegal file descriptor in redirection '%ls'") /** Error message for wildcards with no matches @@ -141,11 +141,6 @@ The fish parser. Contains functions for parsing and evaluating code. */ #define INVALID_REDIRECTION_ERR_MSG _( L"Encountered redirection when expecting a command name. Fish does not allow a redirection operation before a command.") -/** - Error for evaluating null pointer -*/ -#define EVAL_NULL_ERR_MSG _( L"Tried to evaluate null pointer." ) - /** Error for evaluating in illegal scope */ From eb28c710baa385cbd9b979e99736ef65b16de0db Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 13 Jan 2014 12:51:09 -0800 Subject: [PATCH 107/108] Improve the 'This command can not be used in a pipeline' message to actually name the command --- parse_util.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parse_util.cpp b/parse_util.cpp index 027a0e9bf..eded7b93f 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -44,7 +44,7 @@ /** Error message for improper use of the exec builtin */ -#define EXEC_ERR_MSG _(L"This command can not be used in a pipeline") +#define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline") int parse_util_lineno(const wchar_t *str, size_t offset) { @@ -1035,7 +1035,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) { - errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG); + errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, command.c_str()); } } From 212eeaa77c7408893df92aa9b312855bfc9dcd8e Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Mon, 13 Jan 2014 13:14:18 -0800 Subject: [PATCH 108/108] Correctly report errors for 'and' and 'or' in pipelines with new parser --- fish_tests.cpp | 17 +++++++++++++++++ parse_tree.cpp | 6 ++++-- parse_tree.h | 2 +- parse_util.cpp | 20 ++++++++++++++------ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/fish_tests.cpp b/fish_tests.cpp index 319a3872a..fab103351 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -645,6 +645,23 @@ static void test_parser() err(L"'break' command inside switch falsely reported as error"); } + if (parse_util_detect_errors(L"or cat | cat") || parse_util_detect_errors(L"and cat | cat")) + { + err(L"boolean command at beginning of pipeline falsely reported as error"); + } + + if (! parse_util_detect_errors(L"cat | and cat")) + { + err(L"'and' command in pipeline not reported as error"); + } + + if (! parse_util_detect_errors(L"cat | exec") || ! parse_util_detect_errors(L"exec | cat")) + { + err(L"'exec' command in pipeline not reported as error"); + } + + + say(L"Testing basic evaluation"); #if 0 diff --git a/parse_tree.cpp b/parse_tree.cpp index 41873bbe9..6bb7a3cd3 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -1357,13 +1357,15 @@ bool parse_node_tree_t::command_for_plain_statement(const parse_node_t &node, co return result; } -bool parse_node_tree_t::plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const +bool parse_node_tree_t::statement_is_in_pipeline(const parse_node_t &node, bool include_first) const { // Moderately nasty hack! Walk up our ancestor chain and see if we are in a job_continuation. This checks if we are in the second or greater element in a pipeline; if we are the first element we treat this as false + // This accepts a few statement types bool result = false; const parse_node_t *ancestor = &node; - if (ancestor) + // If we're given a plain statement, try to get its decorated statement parent + if (ancestor && ancestor->type == symbol_plain_statement) ancestor = this->get_parent(*ancestor, symbol_decorated_statement); if (ancestor) ancestor = this->get_parent(*ancestor, symbol_statement); diff --git a/parse_tree.h b/parse_tree.h index 5cc0a4ccc..f77b87811 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -181,7 +181,7 @@ public: bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const; /* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */ - bool plain_statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; + bool statement_is_in_pipeline(const parse_node_t &node, bool include_first) const; /* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */ enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const; diff --git a/parse_util.cpp b/parse_util.cpp index eded7b93f..491e47328 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -1016,6 +1016,16 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars // an 'end' without source is an unclosed block has_unclosed_block = true; } + else if (node.type == symbol_boolean_statement) + { + // 'or' and 'and' can be in a pipeline, as long as they're first + // These numbers 0 and 1 correspond to productions for boolean_statement. This should be cleaned up. + bool is_and = (node.production_idx == 0), is_or = (node.production_idx == 1); + if ((is_and || is_or) && node_tree.statement_is_in_pipeline(node, false /* don't count first */)) + { + errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, is_and ? L"and" : L"or"); + } + } else if (node.type == symbol_plain_statement) { wcstring command; @@ -1028,12 +1038,10 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars } // Check that pipes are sound - bool is_boolean_command = contains(command, L"or", L"and"); - bool is_pipe_forbidden = parser_is_pipe_forbidden(command); - if (! errored && (is_boolean_command || is_pipe_forbidden)) + if (! errored && parser_is_pipe_forbidden(command)) { - // 'or' and 'and' can be first in the pipeline. forbidden commands cannot be in a pipeline at all - if (node_tree.plain_statement_is_in_pipeline(node, is_pipe_forbidden)) + // forbidden commands cannot be in a pipeline at all + if (node_tree.statement_is_in_pipeline(node, true /* count first */)) { errored = append_syntax_error(&parse_errors, node, EXEC_ERR_MSG, command.c_str()); } @@ -1062,7 +1070,7 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, pars } } - // Check that we don't return from outside a function + // Check that we don't break or continue from outside a loop if (! errored && (command == L"break" || command == L"continue")) { // Walk up until we hit a 'for' or 'while' loop. If we hit a function first, stop the search; we can't break an outer loop from inside a function.