Support for stack overflow and infinite recursion detection in new

parser
2025-03-25 12:45:14 +08:00 · 2014-01-01 15:29:56 -08:00 · 2014-01-01 15:29:56 -08:00 · fb882f0b69
commit fb882f0b69
parent 27cba56761
10 changed files with 204 additions and 46 deletions
--- a/builtin.cpp
+++ b/builtin.cpp
@ -690,7 +690,7 @@ static int builtin_bind(parser_t &parser, wchar_t **argv)
                default:
                {
                    res = STATUS_BUILTIN_ERROR;
-                    append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d"), argv[0], argc-woptind);
+                    append_format(stderr_buffer, _(L"%ls: Expected zero or two parameters, got %d\n"), argv[0], argc-woptind);
                    break;
                }
            }
--- a/complete.cpp
+++ b/complete.cpp
@ -1230,7 +1230,7 @@ void completer_t::complete_from_args(const wcstring &str,
    std::vector<completion_t> possible_comp;

    bool is_autosuggest = (this->type() == COMPLETE_AUTOSUGGEST);
-    parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false);
+    parser_t parser(is_autosuggest ? PARSER_TYPE_COMPLETIONS_ONLY : PARSER_TYPE_GENERAL, false /* don't show errors */);

    /* If type is COMPLETE_AUTOSUGGEST, it means we're on a background thread, so don't call proc_push_interactive */
    if (! is_autosuggest)
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -659,6 +659,14 @@ static void test_parser()
    {
        err(L"Invalid block mode when evaluating undetected");
    }
+    
+    /* These are disabled since they produce a long backtrace. We should find a way to either visually compress the backtrace, or disable error spewing */
+#if 1
+    /* Ensure that we don't crash on infinite self recursion and mutual recursion. These must use the principal parser because we cannot yet execute jobs on other parsers (!) */
+    say(L"Testing recursion detection");
+    parser_t::principal_parser().eval(L"function recursive ; recursive ; end ; recursive; ", io_chain_t(), TOP);
+    parser_t::principal_parser().eval(L"function recursive1 ; recursive2 ; end ; function recursive2 ; recursive1 ; end ; recursive1; ", io_chain_t(), TOP);
+#endif
 }

 static void test_indents()
--- a/parse_constants.h
+++ b/parse_constants.h
@ -128,6 +128,8 @@ enum {
 typedef unsigned int parser_test_error_bits_t;


+/** Maximum number of function calls. */
+#define FISH_MAX_STACK_DEPTH 128

 /**
   Error message for tokenizer error. The tokenizer message is
@ -140,15 +142,15 @@ typedef unsigned int parser_test_error_bits_t;
 */
 #define COND_ERR_MSG _( L"An additional command is required" )

-/**
-   Error message on a function that calls itself immediately
-*/
+/** Error message on a function that calls itself immediately */
 #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.")

-/**
-   Error message on reaching maximum recursion depth
-*/
-#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?")
+/** Error message on a function that calls itself immediately */
+#define INFINITE_FUNC_RECURSION_ERR_MSG _( L"The function '%ls' calls itself immediately, which would result in an infinite loop.")
+
+
+/** Error message on reaching maximum call stack depth */
+#define CALL_STACK_LIMIT_EXCEEDED_ERR_MSG _( L"The function call stack limit has been exceeded. Do you have an accidental infinite loop?")

 /**
   Error message used when the end of a block can't be located
--- a/parse_execution.cpp
+++ b/parse_execution.cpp
@ -48,6 +48,81 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co
    return offset;
 }

+const parse_node_t *parse_execution_context_t::infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const
+{
+    assert(job_list.type == symbol_job_list);
+    /*
+      This is a bit fragile. It is a test to see if we are
+      inside of function call, but not inside a block in that
+      function call. If, in the future, the rules for what
+      block scopes are pushed on function invocation changes,
+      then this check will break.
+    */
+    const block_t *current = parser->block_at_index(0), *parent = parser->block_at_index(1);
+    bool is_within_function_call = (current && parent && current->type() == TOP && parent->type() == FUNCTION_CALL);
+    if (! is_within_function_call)
+    {
+        return NULL;
+    }
+    
+    /* Check to see which function call is forbidden */
+    if (parser->forbidden_function.empty())
+    {
+        return NULL;
+    }
+    const wcstring &forbidden_function_name = parser->forbidden_function.back();
+    
+    /* Get the first job in the job list. */
+    const parse_node_t *first_job = tree.next_job_in_job_list(job_list, NULL);
+    if (first_job == NULL)
+    {
+        return NULL;
+    }
+
+    /* Here's the statement node we find that's infinite recursive */
+    const parse_node_t *infinite_recursive_statement = NULL;
+    
+    /* Get the list of statements */
+    const parse_node_tree_t::parse_node_list_t statements = tree.specific_statements_for_job(*first_job);
+    
+    /* Find all the decorated statements. We are interested in statements with no decoration (i.e. not command, not builtin) whose command expands to the forbidden function */
+    for (size_t i=0; i < statements.size(); i++)
+    {
+        /* We only care about decorated statements, not while statements, etc. */
+        const parse_node_t &statement = *statements.at(i);
+        if (statement.type != symbol_decorated_statement)
+        {
+            continue;
+        }
+        
+        const parse_node_t &plain_statement = tree.find_child(statement, symbol_plain_statement);
+        if (tree.decoration_for_plain_statement(plain_statement) != parse_statement_decoration_none)
+        {
+            /* This statement has a decoration like 'builtin' or 'command', and therefore is not infinite recursion. In particular this is what enables 'wrapper functions' */
+            continue;
+        }
+        
+        /* Ok, this is an undecorated plain statement. Get and expand its command */
+        wcstring cmd;
+        tree.command_for_plain_statement(plain_statement, src, &cmd);
+        expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES);
+        
+        if (cmd == forbidden_function_name)
+        {
+            /* This is it */
+            infinite_recursive_statement = &statement;
+            if (out_func_name != NULL)
+            {
+                *out_func_name = forbidden_function_name;
+            }
+            break;
+        }
+    }
+    
+    assert(infinite_recursive_statement == NULL || infinite_recursive_statement->type == symbol_decorated_statement);
+    return infinite_recursive_statement;
+}
+
 enum process_type_t parse_execution_context_t::process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const
 {
    assert(plain_statement.type == symbol_plain_statement);
@ -528,12 +603,10 @@ parse_execution_result_t parse_execution_context_t::report_error(const parse_nod
    error.text = vformat_string(fmt, va);
    va_end(va);
    
-    /* Output the error */
-    const wcstring desc = error.describe(this->src);
-    if (! desc.empty())
-    {
-        fprintf(stderr, "%ls\n", desc.c_str());
-    }
+    /* Get a backtrace */
+    wcstring backtrace;
+    const parse_error_list_t error_list = parse_error_list_t(1, error);
+    parser->get_backtrace(src, error_list, &backtrace);
    
    return parse_execution_errored;
 }
@ -669,7 +742,7 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t
    bool got_cmd = tree.command_for_plain_statement(statement, src, &cmd);
    assert(got_cmd);
    
-    /* Expand it as a command. Return NULL on failure. */
+    /* Expand it as a command. Return an error on failure. */
    bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES);
    if (! expanded)
    {
@ -680,6 +753,13 @@ parse_execution_result_t parse_execution_context_t::populate_plain_process(job_t
    /* Determine the process type */
    enum process_type_t process_type = process_type_for_command(statement, cmd);
    
+    /* Check for stack overflow */
+    if (process_type == INTERNAL_FUNCTION && parser->forbidden_function.size() > FISH_MAX_STACK_DEPTH)
+    {
+        this->report_error(statement, CALL_STACK_LIMIT_EXCEEDED_ERR_MSG);
+        return parse_execution_errored;
+    }
+    
    wcstring actual_cmd;
    if (process_type == EXTERNAL)
    {
@ -1296,10 +1376,24 @@ parse_execution_result_t parse_execution_context_t::eval_node_at_offset(node_off
    switch (node.type)
    {
        case symbol_job_list:
+        {
            /* We should only get a job list if it's the very first node. This is because this is the entry point for both top-level execution (the first node) and INTERNAL_BLOCK_NODE execution (which does block statements, but never job lists) */
            assert(offset == 0);
-            status = this->run_job_list(node, associated_block);
+            wcstring func_name;
+            const parse_node_t *infinite_recursive_node = this->infinite_recursive_statement_in_job_list(node, &func_name);
+            if (infinite_recursive_node != NULL)
+            {
+                /* We have an infinite recursion */
+                this->report_error(*infinite_recursive_node, INFINITE_FUNC_RECURSION_ERR_MSG, func_name.c_str());
+                status = parse_execution_errored;
+            }
+            else
+            {
+                /* No infinite recursion */
+                status = this->run_job_list(node, associated_block);
+            }
            break;
+        }
        
        case symbol_block_statement:
            status = this->run_block_statement(node);
--- a/parse_execution.h
+++ b/parse_execution.h
@ -70,6 +70,7 @@ class parse_execution_context_t
    wcstring get_source(const parse_node_t &node) const;
    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
    node_offset_t get_offset(const parse_node_t &node) const;
+    const parse_node_t *infinite_recursive_statement_in_job_list(const parse_node_t &job_list, wcstring *out_func_name) const;
    
    enum process_type_t process_type_for_command(const parse_node_t &plain_statement, const wcstring &cmd) const;
    
--- a/parse_tree.cpp
+++ b/parse_tree.cpp
@ -1405,7 +1405,7 @@ enum token_type parse_node_tree_t::type_for_redirection(const parse_node_t &redi
    return result;
 }

-const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node)
+const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const parse_node_t &node) const
 {
    const parse_node_t *result = NULL;
    if (node.type == symbol_block_statement)
@ -1418,3 +1418,63 @@ const parse_node_t *parse_node_tree_t::header_node_for_block_statement(const par
    }
    return result;
 }
+
+parse_node_tree_t::parse_node_list_t parse_node_tree_t::specific_statements_for_job(const parse_node_t &job) const
+{
+    assert(job.type == symbol_job);
+    parse_node_list_t result;
+    
+    /* Initial statement (non-specific) */
+    result.push_back(get_child(job, 0, symbol_statement));
+    
+    /* Our cursor variable. Walk over the list of continuations. */
+    const parse_node_t *continuation = get_child(job, 1, symbol_job_continuation);
+    while (continuation != NULL && continuation->child_count > 0)
+    {
+        result.push_back(get_child(*continuation, 1, symbol_statement));
+        continuation = get_child(*continuation, 2, symbol_job_continuation);
+    }
+    
+    /* Result now contains a list of statements. But we want a list of specific statements e.g. symbol_switch_statement. So replace them in-place in the vector. */
+    for (size_t i=0; i < result.size(); i++)
+    {
+        const parse_node_t *statement = result.at(i);
+        assert(statement->type == symbol_statement);
+        result.at(i) = this->get_child(*statement, 0);
+    }
+    
+    return result;
+}
+
+const parse_node_t *parse_node_tree_t::next_job_in_job_list(const parse_node_t &top_job_list, const parse_node_t **out_list_tail) const
+{
+    assert(top_job_list.type == symbol_job_list);
+    
+    /* Our cursor variable */
+    const parse_node_t *job_list = &top_job_list;
+    
+    /* Skip over a run of empty jobs */
+    assert(job_list->type == symbol_job_list);
+    while (job_list->production_idx == 2)
+    {
+        job_list = this->get_child(*job_list, 1, symbol_job_list);
+    }
+    
+    /* Should now be at production 0 or 1 */
+    assert(job_list->type == symbol_job_list);
+    assert(job_list->production_idx == 0 || job_list->production_idx == 1);
+    
+    /* Pull out the job */
+    const parse_node_t *job = NULL;
+    const parse_node_t *list_tail = NULL;
+    if (job_list->production_idx == 1)
+    {
+        job = this->get_child(*job_list, 0, symbol_job);
+        list_tail = this->get_child(*job_list, 1, symbol_job_list);
+    }
+    
+    /* Return them */
+    if (out_list_tail != NULL)
+        *out_list_tail = list_tail;
+    return job;
+}
--- a/parse_tree.h
+++ b/parse_tree.h
@ -209,7 +209,13 @@ public:
    enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const;
    
    /* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
-    const parse_node_t *header_node_for_block_statement(const parse_node_t &node);
+    const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const;
+    
+    /* Given a job list, returns the next job (or NULL), and the tail of the job list. */
+    const parse_node_t *next_job_in_job_list(const parse_node_t &job_list, const parse_node_t **list_tail) const;
+    
+    /* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */
+    parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
 };

 /* Fish grammar:
@ -252,7 +258,7 @@ public:
    while_header = WHILE job
    begin_header = BEGIN
    
-# Functions take arguments, and require at least one (the name)    
+# Functions take arguments, and require at least one (the name). No redirections allowed.
    function_header = FUNCTION argument argument_list

 # A boolean statement is AND or OR or NOT
--- a/parser.cpp
+++ b/parser.cpp
@ -47,11 +47,6 @@ The fish parser. Contains functions for parsing and evaluating code.
 #include "parse_tree.h"
 #include "parse_execution.h"

-/**
-   Maximum number of function calls, i.e. recursion depth.
-*/
-#define MAX_RECURSION_DEPTH 128
-
 /**
   Error message for unknown builtin
 */
@ -78,11 +73,6 @@ The fish parser. Contains functions for parsing and evaluating code.
 */
 #define INFINITE_RECURSION_ERR_MSG _( L"The function calls itself immediately, which would result in an infinite loop.")

-/**
-   Error message on reaching maximum recursion depth
-*/
-#define OVERFLOW_RECURSION_ERR_MSG _( L"Maximum recursion depth reached. Accidental infinite loop?")
-
 /**
   Error message used when the end of a block can't be located
 */
@ -584,12 +574,6 @@ void parser_t::error(int ec, size_t p, const wchar_t *str, ...)
    va_start(va, str);
    err_buff = vformat_string(str, va);
    va_end(va);
-    
-    if (parser_use_ast())
-    {
-        fprintf(stderr, "parser error: %ls\n", err_buff.c_str());
-        err_buff.clear();
-    }
 }

 /**
@ -746,7 +730,6 @@ void parser_t::print_errors_stderr()

 void parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
 {
-
    expand_flags_t eflags = 0;
    if (! show_errors)
        eflags |= EXPAND_NO_DESCRIPTIONS;
@ -757,7 +740,7 @@ void parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)

    if (! line) return;

-    // PCA we need to suppress calling proc_push_interactive off of the main thread. I'm not sure exactly what it does.
+    // PCA we need to suppress calling proc_push_interactive off of the main thread.
    if (this->parser_type == PARSER_TYPE_GENERAL)
        proc_push_interactive(0);

@ -988,10 +971,7 @@ int parser_t::line_number_of_character_at_offset(size_t idx) const

 const wchar_t *parser_t::current_filename() const
 {
-    /* We query a global array for the current file name, so it only makes sense to ask this on the principal parser. */
    ASSERT_IS_MAIN_THREAD();
-    assert(this == &principal_parser());
-

    for (size_t i=0; i < this->block_count(); i++)
    {
@ -1002,7 +982,13 @@ const wchar_t *parser_t::current_filename() const
            return function_get_definition_file(fb->name);
        }
    }
-    return reader_current_filename();
+    
+    /* We query a global array for the current file name, but only do that if we are the principal parser */
+    if (this == &principal_parser())
+    {
+        return reader_current_filename();
+    }
+    return NULL;
 }

 /**
@ -1221,7 +1207,7 @@ void parser_t::job_promote(job_t *job)
 {
    signal_block();

-    job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job);
+    job_list_t::iterator loc = std::find(my_job_list.begin(), my_job_list.end(), job);    
    assert(loc != my_job_list.end());

    /* Move the job to the beginning */
@ -1940,9 +1926,9 @@ int parser_t::parse_job(process_t *p, job_t *j, tokenizer_t *tok)
                /*
                  Check if we have reached the maximum recursion depth
                */
-                if (forbidden_function.size() > MAX_RECURSION_DEPTH)
+                if (forbidden_function.size() > FISH_MAX_STACK_DEPTH)
                {
-                    error(SYNTAX_ERROR, tok_get_pos(tok), OVERFLOW_RECURSION_ERR_MSG);
+                    error(SYNTAX_ERROR, tok_get_pos(tok), CALL_STACK_LIMIT_EXCEEDED_ERR_MSG);
                }
                else
                {
--- a/proc.cpp
+++ b/proc.cpp
@ -136,7 +136,8 @@ static bool proc_had_barrier = false;
 int get_is_interactive(void)
 {
    ASSERT_IS_MAIN_THREAD();
-    return is_interactive;
+    // The tests leave is_interactive as -1, which is interpreted as true. So let's have them default to false.
+    return is_interactive > 0;
 }

 bool get_proc_had_barrier()