fish-shell/src/parse_tree.h
ridiculousfish 151e75d141 Autosuggestions to validate the first command, not the last command
When considering an autosuggestion from history, we attempt to validate the
command to ensure that we don't suggest invalid (e.g. path-dependent)
commands. Prior to this fix, we would validate the last command in the
command line (e.g. in `cd /bin && ./stuff` we would validate "./stuff".
This doesn't really make sense; we should be validating the first command
because it has the potential to change the PWD. Switch to validating the
first command.

Also remove some helper functions that became dead through this change.
2019-11-02 13:40:31 -07:00

228 lines
8.9 KiB
C++

// Programmatic representation of fish code.
#ifndef FISH_PARSE_PRODUCTIONS_H
#define FISH_PARSE_PRODUCTIONS_H
#include <stddef.h>
#include <stdint.h>
#include <sys/types.h>
#include <memory>
#include <vector>
#include "common.h"
#include "maybe.h"
#include "parse_constants.h"
#include "parse_grammar.h"
#include "tokenizer.h"
class parse_node_tree_t;
typedef uint32_t node_offset_t;
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
typedef uint32_t source_offset_t;
constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
/// A struct representing the token type that we use internally.
struct parse_token_t {
enum parse_token_type_t type; // The type of the token as represented by the parser
enum parse_keyword_t keyword { parse_keyword_none }; // Any keyword represented by this token
bool has_dash_prefix{false}; // Hackish: whether the source contains a dash prefix
bool is_help_argument{false}; // Hackish: whether the source looks like '-h' or '--help'
bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline.
bool preceding_escaped_nl{false}; // Whether there was an escaped newline preceding this token.
source_offset_t source_start{SOURCE_OFFSET_INVALID};
source_offset_t source_length{0};
wcstring describe() const;
wcstring user_presentable_description() const;
constexpr parse_token_t(parse_token_type_t type) : type(type) {}
};
enum {
parse_flag_none = 0,
/// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
/// disconnected trees. This is intended to be used by syntax highlighting.
parse_flag_continue_after_error = 1 << 0,
/// Include comment tokens.
parse_flag_include_comments = 1 << 1,
/// Indicate that the tokenizer should accept incomplete tokens */
parse_flag_accept_incomplete_tokens = 1 << 2,
/// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
/// tree where some nodes may have no productions.
parse_flag_leave_unterminated = 1 << 3,
/// Indicate that the parser should generate job_list entries for blank lines.
parse_flag_show_blank_lines = 1 << 4
};
typedef unsigned int parse_tree_flags_t;
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
const wchar_t *token_type_description(parse_token_type_t type);
const wchar_t *keyword_description(parse_keyword_t type);
// Node flags.
enum {
/// Flag indicating that the node has associated comment nodes.
parse_node_flag_has_comments = 1 << 0,
/// Flag indicating that the token was preceded by an escaped newline, e.g.
/// echo abc | \
/// cat
parse_node_flag_preceding_escaped_nl = 1 << 1,
};
typedef uint8_t parse_node_flags_t;
/// Node-type specific tag value.
typedef uint8_t parse_node_tag_t;
/// Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields
/// is important.
class parse_node_t {
public:
// Start in the source code.
source_offset_t source_start;
// Length of our range in the source code.
source_offset_t source_length;
// Parent
node_offset_t parent;
// Children
node_offset_t child_start;
// Number of children.
uint8_t child_count;
// Type of the node.
enum parse_token_type_t type;
// Keyword associated with node.
enum parse_keyword_t keyword;
// Node flags.
parse_node_flags_t flags : 4;
// This is used to store e.g. the statement decoration.
parse_node_tag_t tag : 4;
// Description
wcstring describe() const;
// Constructor
explicit parse_node_t(parse_token_type_t ty)
: source_start(SOURCE_OFFSET_INVALID),
source_length(0),
parent(NODE_OFFSET_INVALID),
child_start(0),
child_count(0),
type(ty),
keyword(parse_keyword_none),
flags(0),
tag(0) {}
node_offset_t child_offset(node_offset_t which) const {
PARSE_ASSERT(which < child_count);
return child_start + which;
}
/// Indicate if this node has a range of source code associated with it.
bool has_source() const {
// Should never have a nonempty range with an invalid offset.
assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0);
return this->source_length > 0;
}
/// Indicate if the node has comment nodes.
bool has_comments() const { return this->flags & parse_node_flag_has_comments; }
/// Indicates if we have a preceding escaped newline.
bool has_preceding_escaped_newline() const {
return this->flags & parse_node_flag_preceding_escaped_nl;
}
/// Gets source for the node, or the empty string if it has no source.
wcstring get_source(const wcstring &str) const {
if (!has_source())
return wcstring();
else
return wcstring(str, this->source_start, this->source_length);
}
/// Returns whether the given location is within the source range or at its end.
bool location_in_or_at_end_of_source_range(size_t loc) const {
return has_source() && source_start <= loc && loc - source_start <= source_length;
}
};
template <typename Type>
class tnode_t;
/// The parse tree itself.
class parse_node_tree_t : public std::vector<parse_node_t> {
public:
parse_node_tree_t() {}
parse_node_tree_t(parse_node_tree_t &&) = default;
parse_node_tree_t &operator=(parse_node_tree_t &&) = default;
parse_node_tree_t(const parse_node_tree_t &) = delete; // no copying
parse_node_tree_t &operator=(const parse_node_tree_t &) = delete; // no copying
// Get the node corresponding to a child of the given node, or NULL if there is no such child.
// If expected_type is provided, assert that the node has that type.
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which,
parse_token_type_t expected_type = token_type_invalid) const;
// Find the first direct child of the given node of the given type. asserts on failure.
const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const;
template <typename Type>
tnode_t<Type> find_child(const parse_node_t &parent) const;
// Get the node corresponding to the parent of the given node, or NULL if there is no such
// child. If expected_type is provided, only returns the parent if it is of that type. Note the
// asymmetry: get_child asserts since the children are known, but get_parent does not, since the
// parent may not be known.
const parse_node_t *get_parent(const parse_node_t &node,
parse_token_type_t expected_type = token_type_invalid) const;
// Finds a node containing the given source location. If 'parent' is not NULL, it must be an
// ancestor.
const parse_node_t *find_node_matching_source_location(parse_token_type_t type,
size_t source_loc,
const parse_node_t *parent) const;
// Utilities
/// Given a node, return all of its comment nodes.
std::vector<tnode_t<grammar::comment>> comment_nodes_for_node(const parse_node_t &node) const;
private:
template <typename Type>
friend class tnode_t;
/// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return
/// the next element of the given type in that list, and the tail (by reference). Returns NULL
/// if we've exhausted the list.
const parse_node_t *next_node_in_node_list(const parse_node_t &node_list,
parse_token_type_t item_type,
const parse_node_t **list_tail) const;
};
/// The big entry point. Parse a string, attempting to produce a tree for the given goal type.
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags,
parse_node_tree_t *output, parse_error_list_t *errors,
parse_token_type_t goal = symbol_job_list);
/// A type wrapping up a parse tree and the original source behind it.
struct parsed_source_t {
wcstring src;
parse_node_tree_t tree;
parsed_source_t(wcstring s, parse_node_tree_t t) : src(std::move(s)), tree(std::move(t)) {}
parsed_source_t(const parsed_source_t &) = delete;
void operator=(const parsed_source_t &) = delete;
parsed_source_t(parsed_source_t &&) = default;
parsed_source_t &operator=(parsed_source_t &&) = default;
};
/// Return a shared pointer to parsed_source_t, or null on failure.
using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors,
parse_token_type_t goal = symbol_job_list);
#endif