Merge branch 'parser_cleanup_3'

This merges a sequence of changes which eliminates the "parse tree"
construct and replaces it with a new abstract syntax tree implementation.
This is simpler and easier to understand/use.
This commit is contained in:
ridiculousfish 2020-07-04 15:06:41 -07:00
commit 44944146e2
33 changed files with 4242 additions and 4072 deletions

View File

@ -114,14 +114,14 @@ set(FISH_SRCS
src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp
src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/intern.cpp
src/io.cpp src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp
src/parse_execution.cpp src/parse_tree.cpp
src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp
src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp
src/signal.cpp src/tinyexpr.cpp src/tnode.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
src/signal.cpp src/tinyexpr.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp
src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp
src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp
src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp
src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp src/ast.cpp
)
# Header files are just globbed.

1206
src/ast.cpp Normal file

File diff suppressed because it is too large Load Diff

1018
src/ast.h Normal file

File diff suppressed because it is too large Load Diff

60
src/ast_node_types.inc Normal file
View File

@ -0,0 +1,60 @@
// Define ELEM and optionally ELEMLIST before including this file.
// ELEM is for ordinary nodes.
// ELEMLIST(x, y) marks list nodes and the type they contain.
#ifndef ELEMLIST
#define ELEMLIST(x, y) ELEM(x)
#endif
ELEM(keyword_base)
ELEM(token_base)
ELEM(maybe_newlines)
ELEM(argument)
ELEMLIST(argument_list, argument)
ELEM(redirection)
ELEM(argument_or_redirection)
ELEMLIST(argument_or_redirection_list, argument_or_redirection)
ELEM(variable_assignment)
ELEMLIST(variable_assignment_list, variable_assignment)
ELEM(job)
ELEM(job_conjunction)
// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed.
ELEMLIST(job_list, job_conjunction)
ELEM(job_conjunction_continuation)
ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation)
ELEM(job_continuation)
ELEMLIST(job_continuation_list, job_continuation)
ELEM(andor_job)
ELEMLIST(andor_job_list, andor_job)
ELEM(statement)
ELEM(not_statement)
ELEM(block_statement)
ELEM(for_header)
ELEM(while_header)
ELEM(function_header)
ELEM(begin_header)
ELEM(if_statement)
ELEM(if_clause)
ELEM(elseif_clause)
ELEMLIST(elseif_clause_list, elseif_clause)
ELEM(else_clause)
ELEM(switch_statement)
ELEM(case_item)
ELEMLIST(case_item_list, case_item)
ELEM(decorated_statement)
ELEM(freestanding_argument_list)
#undef ELEM
#undef ELEMLIST

View File

@ -200,8 +200,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring
/// Define a function. Calls into `function.cpp` to perform the heavy lifting of defining a
/// function.
int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
const parsed_source_ref_t &source,
tnode_t<grammar::block_statement> func_node) {
const parsed_source_ref_t &source, const ast::block_statement_t &func_node) {
assert(source && "Missing source in builtin_function");
// The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with
// that property. This is needed because this builtin has a different signature than the other
@ -252,7 +251,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
props->shadow_scope = opts.shadow_scope;
props->named_arguments = std::move(opts.named_arguments);
props->parsed_source = source;
props->func_node = func_node;
props->func_node = &func_node;
// Populate inherit_vars.
for (const wcstring &name : opts.inherit_vars) {

View File

@ -8,7 +8,10 @@
class parser_t;
struct io_streams_t;
namespace ast {
struct block_statement_t;
}
int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
const parsed_source_ref_t &source,
tnode_t<grammar::block_statement> func_node);
const parsed_source_ref_t &source, const ast::block_statement_t &func_node);
#endif

View File

@ -45,7 +45,6 @@
#include "path.h"
#include "proc.h"
#include "reader.h"
#include "tnode.h"
#include "util.h"
#include "wcstringutil.h"
#include "wildcard.h"

View File

@ -623,10 +623,10 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
if (p->type == process_type_t::block_node) {
const parsed_source_ref_t &source = p->block_node_source;
tnode_t<grammar::statement> node = p->internal_block_node;
const ast::statement_t *node = p->internal_block_node;
assert(source && node && "Process is missing node info");
return [=](parser_t &parser) {
return parser.eval_node(source, node, io_chain, job_group).status;
return parser.eval_node(source, *node, io_chain, job_group).status;
};
} else {
assert(p->type == process_type_t::function);
@ -638,7 +638,7 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
auto argv = move_to_sharedptr(p->get_argv_array().to_list());
return [=](parser_t &parser) {
// Pull out the job list from the function.
tnode_t<grammar::job_list> body = props->func_node.child<1>();
const ast::job_list_t &body = props->func_node->jobs;
const block_t *fb = function_prepare_environment(parser, *argv, *props);
auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group);
function_restore_environment(parser, fb);

View File

@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include <tuple>
#include <vector>
#include "ast.h"
#include "color.h"
#include "common.h"
#include "env.h"
@ -43,16 +44,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "operation_context.h"
#include "output.h"
#include "parse_constants.h"
#include "parse_util.h"
#include "print_help.h"
#include "tnode.h"
#include "wutil.h" // IWYU pragma: keep
// The number of spaces per indent isn't supposed to be configurable.
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
#define SPACES_PER_INDENT 4
// An indent_t represents an abstract indent depth. 2 means we are in a doubly-nested block, etc.
using indent_t = unsigned int;
static bool dump_parse_tree = false;
static int ret = 0;
@ -83,232 +82,539 @@ static wcstring read_file(FILE *f) {
return result;
}
struct prettifier_t {
namespace {
/// From C++14.
template <bool B, typename T = void>
using enable_if_t = typename std::enable_if<B, T>::type;
/// \return the number of escaping backslashes before a character.
/// \p idx may be "one past the end."
size_t count_preceding_backslashes(const wcstring &text, size_t idx) {
assert(idx <= text.size() && "Out of bounds");
size_t backslashes = 0;
while (backslashes < idx && text.at(idx - backslashes - 1) == L'\\') {
backslashes++;
}
return backslashes;
}
/// \return whether a character at a given index is escaped.
/// A character is escaped if it has an odd number of backslashes.
bool char_is_escaped(const wcstring &text, size_t idx) {
return count_preceding_backslashes(text, idx) % 2 == 1;
}
using namespace ast;
struct pretty_printer_t {
// Note: this got somewhat more complicated after introducing the new AST, because that AST no
// longer encodes detailed lexical information (e.g. every newline). This feels more complex
// than necessary and would probably benefit from a more layered approach where we identify
// certain runs, weight line breaks, have a cost model, etc.
pretty_printer_t(const wcstring &src, bool do_indent)
: source(src),
indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
ast(ast_t::parse(src, parse_flags())),
do_indent(do_indent),
gaps(compute_gaps()),
preferred_semi_locations(compute_preferred_semi_locations()) {
assert(indents.size() == source.size() && "indents and source should be same length");
}
// Original source.
const wcstring &source;
// The indents of our string.
// This has the same length as 'source' and describes the indentation level.
const std::vector<int> indents;
// The parsed ast.
const ast_t ast;
// The prettifier output.
wcstring output;
// The indent of the source range which we are currently emitting.
int current_indent{0};
// Whether to indent, or just insert spaces.
const bool do_indent;
// Whether we are at the beginning of a new line.
bool has_new_line = true;
// Whether the next gap text should hide the first newline.
bool gap_text_mask_newline{false};
// Whether the last token was a semicolon.
bool last_was_semicolon = false;
// The "gaps": a sorted set of ranges between tokens.
// These contain whitespace, comments, semicolons, and other lexical elements which are not
// present in the ast.
const std::vector<source_range_t> gaps;
// Whether we need to append a continuation new line before continuing.
bool needs_continuation_newline = false;
// The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
// This is computed ahead of time for convenience.
const std::vector<uint32_t> preferred_semi_locations;
// Additional indentation due to line continuation (escaped newline)
uint32_t line_continuation_indent = 0;
// Flags we support.
using gap_flags_t = uint32_t;
enum {
default_flags = 0,
prettifier_t(const wcstring &source, bool do_indent) : source(source), do_indent(do_indent) {}
// Whether to allow line splitting via escaped newlines.
// For example, in argument lists:
//
// echo a \
// b
//
// If this is not set, then split-lines will be joined.
allow_escaped_newlines = 1 << 0,
void prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx, indent_t node_indent,
parse_token_type_t parent_type);
// Whether to require a space before this token.
// This is used when emitting semis:
// echo a; echo b;
// No space required between 'a' and ';', or 'b' and ';'.
skip_space = 1 << 1,
};
void maybe_prepend_escaped_newline(const parse_node_t &node) {
if (node.has_preceding_escaped_newline()) {
output.append(L" \\");
append_newline(true);
// \return gap text flags for the gap text that comes *before* a given node type.
static gap_flags_t gap_text_flags_before_node(const node_t &node) {
gap_flags_t result = default_flags;
switch (node.type) {
// Allow escaped newlines in argument and redirection lists.
case type_t::argument:
case type_t::redirection:
result |= allow_escaped_newlines;
break;
case type_t::token_base:
// Allow escaped newlines before && and ||, and also pipes.
switch (node.as<token_base_t>()->type) {
case parse_token_type_andand:
case parse_token_type_oror:
case parse_token_type_pipe:
result |= allow_escaped_newlines;
break;
default:
break;
}
break;
default:
break;
}
return result;
}
// \return whether we are at the start of a new line.
bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
// \return whether we have a space before the output.
// This ignores escaped spaces and escaped newlines.
bool has_preceding_space() const {
long idx = static_cast<long>(output.size()) - 1;
// Skip escaped newlines.
// This is historical. Example:
//
// cmd1 \
// | cmd2
//
// we want the pipe to "see" the space after cmd1.
// TODO: this is too tricky, we should factor this better.
while (idx >= 0 && output.at(idx) == L'\n') {
size_t backslashes = count_preceding_backslashes(source, idx);
if (backslashes % 2 == 0) {
// Not escaped.
return false;
}
idx -= (1 + backslashes);
}
return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
}
// Entry point. Prettify our source code and return it.
wcstring prettify() {
output = wcstring{};
node_visitor(*this).accept(ast.top());
// Trailing gap text.
emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
// Replace all trailing newlines with just a single one.
while (!output.empty() && at_line_start()) {
output.pop_back();
}
emit_newline();
wcstring result = std::move(output);
return result;
}
// \return a substring of source.
wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
// Return the gap ranges from our ast.
std::vector<source_range_t> compute_gaps() const {
auto range_compare = [](source_range_t r1, source_range_t r2) {
if (r1.start != r2.start) return r1.start < r2.start;
return r1.length < r2.length;
};
// Collect the token ranges into a list.
std::vector<source_range_t> tok_ranges;
for (const node_t &node : ast) {
if (node.category == category_t::leaf) {
auto r = node.source_range();
if (r.length > 0) tok_ranges.push_back(r);
}
}
// Place a zero length range at end to aid in our inverting.
tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
// Our tokens should be sorted.
assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
// For each range, add a gap range between the previous range and this range.
std::vector<source_range_t> gaps;
uint32_t prev_end = 0;
for (source_range_t tok_range : tok_ranges) {
assert(tok_range.start >= prev_end &&
"Token range should not overlap or be out of order");
if (tok_range.start >= prev_end) {
gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
}
prev_end = tok_range.start + tok_range.length;
}
return gaps;
}
// Return sorted list of semi-preferring semi_nl nodes.
std::vector<uint32_t> compute_preferred_semi_locations() const {
std::vector<uint32_t> result;
auto mark_as_semi = [&result](const optional_t<semi_nl_t> &n) {
if (n && n->has_source()) result.push_back(n->range.start);
};
// andor_job_lists get semis if they are short enough.
for (const auto &node : ast) {
// See if we have a condition and an andor_job_list.
const optional_t<semi_nl_t> *condition = nullptr;
const andor_job_list_t *andors = nullptr;
if (const auto *ifc = node.try_as<if_clause_t>()) {
condition = &ifc->condition.semi_nl;
andors = &ifc->andor_tail;
} else if (const auto *wc = node.try_as<while_header_t>()) {
condition = &wc->condition.semi_nl;
andors = &wc->andor_tail;
}
// This describes the heuristic of when to place and_or job lists on separate lines.
// That is, do we want:
// if true; and false
// or do we want:
// if true
// and false
// Lists with two or fewer get semis.
// Note the effective count is then three, because this list does not include the main
// condition.
if (andors && andors->count() > 0 && andors->count() <= 2) {
if (condition) mark_as_semi(*condition);
// Mark all but last of the andor list.
for (uint32_t i = 0; i + 1 < andors->count(); i++) {
mark_as_semi(andors->at(i)->job.semi_nl);
}
}
}
// `x ; and y` gets semis if it has them already, and they are on the same line.
for (const auto &node : ast) {
if (const auto *job_list = node.try_as<job_list_t>()) {
const semi_nl_t *prev_job_semi_nl = nullptr;
for (const job_conjunction_t &job : *job_list) {
// Set up prev_job_semi_nl for the next iteration to make control flow easier.
const semi_nl_t *prev = prev_job_semi_nl;
prev_job_semi_nl = job.semi_nl.contents.get();
// Is this an 'and' or 'or' job?
if (!job.decorator) continue;
// Now see if we want to mark 'prev' as allowing a semi.
// Did we have a previous semi_nl which was a newline?
if (!prev || substr(prev->range) != L";") continue;
// Is there a newline between them?
assert(prev->range.start <= job.decorator->range.start &&
"Ranges out of order");
auto start = source.begin() + prev->range.start;
auto end = source.begin() + job.decorator->range.end();
if (std::find(start, end, L'\n') == end) {
// We're going to allow the previous semi_nl to be a semi.
result.push_back(prev->range.start);
}
}
}
}
std::sort(result.begin(), result.end());
return result;
}
// Emit a space or indent as necessary, depending on the previous output.
void emit_space_or_indent(gap_flags_t flags = default_flags) {
if (at_line_start()) {
output.append(SPACES_PER_INDENT * current_indent, L' ');
} else if (!(flags & skip_space) && !has_preceding_space()) {
output.append(1, L' ');
}
}
void append_newline(bool is_continuation = false) {
output.push_back('\n');
has_new_line = true;
needs_continuation_newline = false;
line_continuation_indent = is_continuation ? 1 : 0;
// Emit "gap text:" newlines and comments from the original source.
// Gap text may be a few things:
//
// 1. Just a space is common. We will trim the spaces to be empty.
//
// Here the gap text is the comment, followed by the newline:
//
// echo abc # arg
// echo def
//
// 2. It may also be an escaped newline:
// Here the gap text is a space, backslash, newline, space.
//
// echo \
// hi
//
// 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
//
// begin | stuff
//
// We do not handle errors here - instead our caller does.
void emit_gap_text(const wcstring &gap_text, gap_flags_t flags) {
// Common case: if we are only spaces, do nothing.
if (gap_text.find_first_not_of(L' ') == wcstring::npos) return;
// Look to see if there is an escaped newline.
// Emit it if either we allow it, or it comes before the first comment.
// Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
// text - we already know it has no semantic significance.
size_t escaped_nl = gap_text.find(L"\\\n");
if (escaped_nl != wcstring::npos) {
size_t comment_idx = gap_text.find(L'#');
if ((flags & allow_escaped_newlines) ||
(comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
// Emit a space before the escaped newline.
if (!at_line_start() && !has_preceding_space()) {
output.append(L" ");
}
output.append(L"\\\n");
}
}
// It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
// always emit one.
bool needs_nl = false;
tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
while (maybe_t<tok_t> tok = tokenizer.next()) {
wcstring tok_text = tokenizer.text_of(*tok);
if (needs_nl) {
emit_newline();
needs_nl = false;
if (tok_text == L"\n") continue;
} else if (gap_text_mask_newline) {
// We only respect mask_newline the first time through the loop.
gap_text_mask_newline = false;
if (tok_text == L"\n") continue;
}
if (tok->type == token_type_t::comment) {
emit_space_or_indent();
output.append(tok_text);
needs_nl = true;
} else if (tok->type == token_type_t::end) {
// This may be either a newline or semicolon.
// Semicolons found here are not part of the ast and can simply be removed.
// Newlines are preserved unless mask_newline is set.
if (tok_text == L"\n") {
emit_newline();
}
} else {
fprintf(stderr,
"Gap text should only have comments and newlines - instead found token "
"type %d with text: %ls\n",
(int)tok->type, tok_text.c_str());
DIE("Gap text should only have comments and newlines");
}
}
if (needs_nl) emit_newline();
}
// Append whitespace as necessary. If we have a newline, append the appropriate indent.
// Otherwise, append a space.
void append_whitespace(indent_t node_indent) {
if (needs_continuation_newline) {
append_newline(true);
/// \return the gap text ending at a given index into the string, or empty if none.
source_range_t gap_text_to(uint32_t end) const {
auto where = std::lower_bound(
gaps.begin(), gaps.end(), end,
[](source_range_t r, uint32_t end) { return r.start + r.length < end; });
if (where == gaps.end() || where->start + where->length != end) {
// Not found.
return source_range_t{0, 0};
} else {
return *where;
}
if (!has_new_line) {
output.push_back(L' ');
} else if (do_indent) {
output.append((node_indent + line_continuation_indent) * SPACES_PER_INDENT, L' ');
}
/// \return whether a range \p r overlaps an error range from our ast.
bool range_contained_error(source_range_t r) const {
const auto &errs = ast.extras().errors;
auto range_is_before = [](source_range_t x, source_range_t y) {
return x.start + x.length <= y.start;
};
assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
"Error ranges should be sorted");
return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
}
// Emit the gap text before a source range.
void emit_gap_text_before(source_range_t r, gap_flags_t flags) {
assert(r.start <= source.size() && "source out of bounds");
uint32_t start = r.start;
if (start < indents.size()) current_indent = indents.at(start);
// Find the gap text which ends at start.
source_range_t range = gap_text_to(start);
if (range.length > 0) {
// If this range contained an error, append the gap text without modification.
// For example in: echo foo "
// We don't want to mess with the quote.
if (range_contained_error(range)) {
output.append(substr(range));
} else {
emit_gap_text(substr(range), flags);
}
}
// Always clear gap_text_mask_newline after emitting even empty gap text.
gap_text_mask_newline = false;
}
/// Given a string \p input, remove unnecessary quotes, etc.
wcstring clean_text(const wcstring &input) {
// Unescape the string - this leaves special markers around if there are any
// expansions or anything. We specifically tell it to not compute backslash-escapes
// like \U or \x, because we want to leave them intact.
wcstring unescaped = input;
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
// Remove INTERNAL_SEPARATOR because that's a quote.
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
// If no non-"good" char is left, use the unescaped version.
// This can be extended to other characters, but giving the precise list is tough,
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
// people feel more at ease.
auto goodchars = [](wchar_t ch) {
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
};
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
!unescaped.empty()) {
return unescaped;
} else {
return input;
}
}
// Emit a range of original text. This indents as needed, and also inserts preceding gap text.
// If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
// lines.
void emit_text(source_range_t r, gap_flags_t flags) {
emit_gap_text_before(r, flags);
current_indent = indents.at(r.start);
if (r.length > 0) {
emit_space_or_indent(flags);
output.append(clean_text(substr(r)));
}
}
template <type_t Type>
void emit_node_text(const leaf_t<Type> &node) {
emit_text(node.range, gap_text_flags_before_node(node));
}
// Emit one newline.
void emit_newline() { output.push_back(L'\n'); }
// Emit a semicolon.
void emit_semi() { output.push_back(L';'); }
// For branch and list nodes, default is to visit their children.
template <typename Node>
enable_if_t<Node::Category == category_t::branch> visit(const Node &node) {
node_visitor(*this).accept_children_of(node);
}
template <typename Node>
enable_if_t<Node::Category == ast::category_t::list> visit(const Node &node) {
node_visitor(*this).accept_children_of(node);
}
// Leaf nodes we just visit their text.
void visit(const keyword_base_t &node) { emit_node_text(node); }
void visit(const token_base_t &node) { emit_node_text(node); }
void visit(const argument_t &node) { emit_node_text(node); }
void visit(const variable_assignment_t &node) { emit_node_text(node); }
void visit(const semi_nl_t &node) {
// These are semicolons or newlines which are part of the ast. That means it includes e.g.
// ones terminating a job or 'if' header, but not random semis in job lists. We respect
// preferred_semi_locations to decide whether or not these should stay as newlines or
// become semicolons.
// Check if we should prefer a semicolon.
bool prefer_semi = node.range.length > 0 &&
std::binary_search(preferred_semi_locations.begin(),
preferred_semi_locations.end(), node.range.start);
emit_gap_text_before(node.range, gap_text_flags_before_node(node));
// Don't emit anything if the gap text put us on a newline (because it had a comment).
if (!at_line_start()) {
prefer_semi ? emit_semi() : emit_newline();
// If it was a semi but we emitted a newline, swallow a subsequent newline.
if (!prefer_semi && substr(node.range) == L";") {
gap_text_mask_newline = true;
}
}
}
void visit(const redirection_t &node) {
// No space between a redirection operator and its target (#2899).
emit_text(node.oper.range, default_flags);
emit_text(node.target.range, skip_space);
}
void visit(const maybe_newlines_t &node) {
// Our newlines may have comments embedded in them, example:
// cmd |
// # something
// cmd2
// Treat it as gap text.
if (node.range.length > 0) {
auto flags = gap_text_flags_before_node(node);
current_indent = indents.at(node.range.start);
emit_gap_text_before(node.range, flags);
wcstring text = source.substr(node.range.start, node.range.length);
emit_gap_text(text, flags);
}
}
void visit(const begin_header_t &node) {
// 'begin' does not require a newline after it, but we insert one.
node_visitor(*this).accept_children_of(node);
if (!at_line_start()) {
emit_newline();
}
}
// The flags we use to parse.
static parse_tree_flags_t parse_flags() {
return parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_leave_unterminated | parse_flag_show_blank_lines;
}
};
// Dump a parse tree node in a form helpful to someone debugging the behavior of this program.
static void dump_node(indent_t node_indent, const parse_node_t &node, const wcstring &source) {
wchar_t nextc = L' ';
wchar_t prevc = L' ';
wcstring source_txt;
if (node.source_start != SOURCE_OFFSET_INVALID && node.source_length != SOURCE_OFFSET_INVALID) {
int nextc_idx = node.source_start + node.source_length;
if (static_cast<size_t>(nextc_idx) < source.size()) {
nextc = source[node.source_start + node.source_length];
}
if (node.source_start > 0) prevc = source[node.source_start - 1];
source_txt = source.substr(node.source_start, node.source_length);
}
wchar_t prevc_str[4] = {prevc, 0, 0, 0};
wchar_t nextc_str[4] = {nextc, 0, 0, 0};
if (prevc < L' ') {
prevc_str[0] = L'\\';
prevc_str[1] = L'c';
prevc_str[2] = prevc + '@';
}
if (nextc < L' ') {
nextc_str[0] = L'\\';
nextc_str[1] = L'c';
nextc_str[2] = nextc + '@';
}
std::fwprintf(stderr, L"{off %4u, len %4u, indent %2u, kw %ls, %ls} [%ls|%ls|%ls]\n",
node.source_start, node.source_length, node_indent,
keyword_description(node.keyword), token_type_description(node.type), prevc_str,
source_txt.c_str(), nextc_str);
}
void prettifier_t::prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx,
indent_t node_indent, parse_token_type_t parent_type) {
// Use an explicit stack to avoid stack overflow.
struct pending_node_t {
node_offset_t index;
indent_t indent;
parse_token_type_t parent_type;
};
std::stack<pending_node_t> pending_node_stack;
pending_node_stack.push({node_idx, node_indent, parent_type});
while (!pending_node_stack.empty()) {
pending_node_t args = pending_node_stack.top();
pending_node_stack.pop();
auto node_idx = args.index;
auto node_indent = args.indent;
auto parent_type = args.parent_type;
const parse_node_t &node = tree.at(node_idx);
const parse_token_type_t node_type = node.type;
const parse_token_type_t prev_node_type =
node_idx > 0 ? tree.at(node_idx - 1).type : token_type_invalid;
// Increment the indent if we are either a root job_list, or root case_item_list, or in an
// if or while header (#1665).
const bool is_root_job_list =
node_type == symbol_job_list && parent_type != symbol_job_list;
const bool is_root_case_list =
node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
const bool is_if_while_header =
(node_type == symbol_job_conjunction || node_type == symbol_andor_job_list) &&
(parent_type == symbol_if_clause || parent_type == symbol_while_header);
if (is_root_job_list || is_root_case_list || is_if_while_header) {
node_indent += 1;
}
if (dump_parse_tree) dump_node(node_indent, node, source);
// Prepend any escaped newline, but only for certain cases.
// We allow it to split arguments (including at the end - this is like trailing commas in
// lists, makes for better diffs), to separate pipelines (but it has to be *before* the
// pipe, so the pipe symbol is the first thing on the new line after the indent) and to
// separate &&/|| job lists (`and` and `or` are handled separately below, as they *allow*
// semicolons)
// TODO: Handle
// foo | \
// bar
// so it just removes the escape - pipes don't need it. This was changed in some fish
// version, figure out which it was and if it is worth supporting.
if (prev_node_type == symbol_arguments_or_redirections_list ||
prev_node_type == symbol_argument_list || node_type == parse_token_type_andand ||
node_type == parse_token_type_pipe || node_type == parse_token_type_end) {
maybe_prepend_escaped_newline(node);
}
// handle comments, which come before the text
if (node.has_comments()) {
auto comment_nodes = tree.comment_nodes_for_node(node);
for (const auto &comment : comment_nodes) {
maybe_prepend_escaped_newline(*comment.node());
append_whitespace(node_indent);
auto source_range = comment.source_range();
output.append(source, source_range->start, source_range->length);
needs_continuation_newline = true;
}
}
if (node_type == parse_token_type_end) {
// For historical reasons, semicolon also get "TOK_END".
// We need to distinguish between them, because otherwise `a;;;;` gets extra lines
// instead of the semicolons. Semicolons are just ignored, unless they are followed by a
// command. So `echo;` removes the semicolon, but `echo; echo` removes it and adds a
// newline.
last_was_semicolon = false;
if (node.get_source(source) == L"\n") {
append_newline();
} else if (!has_new_line) {
// The semicolon is only useful if we haven't just had a newline.
last_was_semicolon = true;
}
} else if ((node_type >= FIRST_PARSE_TOKEN_TYPE && node_type <= LAST_PARSE_TOKEN_TYPE) ||
node_type == parse_special_type_parse_error) {
if (last_was_semicolon) {
// We keep the semicolon for `; and` and `; or`,
// others we turn into newlines.
if (node.keyword != parse_keyword_t::kw_and &&
node.keyword != parse_keyword_t::kw_or) {
append_newline();
} else {
output.push_back(L';');
}
last_was_semicolon = false;
}
if (node.has_source()) {
// Some type representing a particular token.
if (prev_node_type != parse_token_type_redirection) {
append_whitespace(node_indent);
}
wcstring unescaped{source, node.source_start, node.source_length};
// Unescape the string - this leaves special markers around if there are any
// expansions or anything. We specifically tell it to not compute backslash-escapes
// like \U or \x, because we want to leave them intact.
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
// Remove INTERNAL_SEPARATOR because that's a quote.
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote),
unescaped.end());
// If no non-"good" char is left, use the unescaped version.
// This can be extended to other characters, but giving the precise list is tough,
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
// people feel more at ease.
auto goodchars = [](wchar_t ch) {
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
};
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) ==
unescaped.end() &&
!unescaped.empty()) {
output.append(unescaped);
} else {
output.append(source, node.source_start, node.source_length);
}
has_new_line = false;
}
}
// Put all children in stack in reversed order
// This way they will be processed in correct order.
for (node_offset_t idx = node.child_count; idx > 0; idx--) {
// Note: We pass our type to our child, which becomes its parent node type.
// Note: While node.child_start could be -1 (NODE_OFFSET_INVALID) the addition is safe
// because we won't execute this call in that case since node.child_count should be
// zero.
pending_node_stack.push({node.child_start + (idx - 1), node_indent, node_type});
}
}
}
} // namespace
static const char *highlight_role_to_string(highlight_role_t role) {
#define TEST_ROLE(x) \
@ -394,29 +700,17 @@ static std::string make_pygments_csv(const wcstring &src) {
// Entry point for prettification.
static wcstring prettify(const wcstring &src, bool do_indent) {
parse_node_tree_t parse_tree;
int parse_flags = (parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_leave_unterminated | parse_flag_show_blank_lines);
if (!parse_tree_from_string(src, parse_flags, &parse_tree, nullptr)) {
return src; // we return the original string on failure
}
if (dump_parse_tree) {
const wcstring dump = parse_dump_tree(parse_tree, src);
std::fwprintf(stderr, L"%ls\n", dump.c_str());
auto ast =
ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
parse_flag_show_extra_semis);
wcstring ast_dump = ast.dump(src);
std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
}
// We may have a forest of disconnected trees on a parse failure. We have to handle all nodes
// that have no parent, and all parse errors.
prettifier_t prettifier{src, do_indent};
for (node_offset_t i = 0; i < parse_tree.size(); i++) {
const parse_node_t &node = parse_tree.at(i);
if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) {
// A root node.
prettifier.prettify_node(parse_tree, i, 0, symbol_job_list);
}
}
return std::move(prettifier.output);
pretty_printer_t printer{src, do_indent};
wcstring output = printer.prettify();
return output;
}
/// Given a string and list of colors of the same size, return the string with HTML span elements

View File

@ -40,6 +40,7 @@
#include <utility>
#include <vector>
#include "ast.h"
#include "autoload.h"
#include "builtin.h"
#include "color.h"
@ -75,7 +76,6 @@
#include "signal.h"
#include "termsize.h"
#include "timer.h"
#include "tnode.h"
#include "tokenizer.h"
#include "topic_monitor.h"
#include "utf8.h"
@ -978,15 +978,18 @@ static void test_debounce_timeout() {
}
static parser_test_error_bits_t detect_argument_errors(const wcstring &src) {
parse_node_tree_t tree;
if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL, symbol_argument_list)) {
using namespace ast;
auto ast = ast_t::parse_argument_list(src, parse_flag_none);
if (ast.errored()) {
return PARSER_TEST_ERROR;
}
assert(!tree.empty()); //!OCLINT(multiple unary operator)
tnode_t<grammar::argument_list> arg_list{&tree, &tree.at(0)};
auto first_arg = arg_list.next_in_list<grammar::argument>();
return parse_util_detect_errors_in_argument(first_arg, first_arg.get_source(src));
const ast::argument_t *first_arg =
ast.top()->as<freestanding_argument_list_t>()->arguments.at(0);
if (!first_arg) {
err(L"Failed to parse an argument");
return 0;
}
return parse_util_detect_errors_in_argument(*first_arg, first_arg->source(src));
}
/// Test the parser.
@ -1084,7 +1087,7 @@ static void test_parser() {
}
if (parse_util_detect_errors(L"echo (\nfoo\n bar") != PARSER_TEST_INCOMPLETE) {
err(L"unterminated multiline subhsell not reported properly");
err(L"unterminated multiline subshell not reported properly");
}
if (parse_util_detect_errors(L"begin ; true ; end | ") != PARSER_TEST_INCOMPLETE) {
@ -1268,75 +1271,121 @@ static void test_cancellation() {
parser.clear_cancel();
}
namespace indent_tests {
// A struct which is either text or a new indent.
struct segment_t {
// The indent to set
int indent{0};
const char *text{nullptr};
/* implicit */ segment_t(int indent) : indent(indent) {}
/* implicit */ segment_t(const char *text) : text(text) {}
};
using test_t = std::vector<segment_t>;
using test_list_t = std::vector<test_t>;
// Add a new test to a test list based on a series of ints and texts.
template <typename... Types>
void add_test(test_list_t *v, const Types &... types) {
segment_t segments[] = {types...};
v->emplace_back(std::begin(segments), std::end(segments));
}
} // namespace indent_tests
static void test_indents() {
say(L"Testing indents");
using namespace indent_tests;
// Here are the components of our source and the indents we expect those to be.
struct indent_component_t {
const wchar_t *txt;
int indent;
};
test_list_t tests;
add_test(&tests, //
0, "if", 1, " foo", //
0, "\nend");
const indent_component_t components1[] = {{L"if foo", 0}, {L"end", 0}, {NULL, -1}};
add_test(&tests, //
0, "if", 1, " foo", //
1, "\nfoo", //
0, "\nend");
const indent_component_t components2[] = {{L"if foo", 0},
{L"", 1}, // trailing newline!
{NULL, -1}};
add_test(&tests, //
0, "if", 1, " foo", //
1, "\nif", 2, " bar", //
1, "\nend", //
0, "\nend");
const indent_component_t components3[] = {{L"if foo", 0},
{L"foo", 1},
{L"end", 0}, // trailing newline!
{NULL, -1}};
add_test(&tests, //
0, "if", 1, " foo", //
1, "\nif", 2, " bar", //
1, "\n", // FIXME: this should be 2 but parse_util_compute_indents has a bug
1, "\nend\n");
const indent_component_t components4[] = {{L"if foo", 0}, {L"if bar", 1}, {L"end", 1},
{L"end", 0}, {L"", 0}, {NULL, -1}};
add_test(&tests, //
0, "if", 1, " foo", //
1, "\nif", 2, " bar", //
2, "\n");
const indent_component_t components5[] = {{L"if foo", 0}, {L"if bar", 1}, {L"", 2}, {NULL, -1}};
add_test(&tests, //
0, "begin", //
1, "\nfoo", //
1, "\n");
const indent_component_t components6[] = {{L"begin", 0}, {L"foo", 1}, {L"", 1}, {NULL, -1}};
add_test(&tests, //
0, "begin", //
1, "\n;", //
0, "end", //
0, "\nfoo", 0, "\n");
const indent_component_t components7[] = {{L"begin", 0}, {L";", 1}, {L"end", 0},
{L"foo", 0}, {L"", 0}, {NULL, -1}};
add_test(&tests, //
0, "begin", //
1, "\n;", //
0, "end", //
0, "\nfoo", 0, "\n");
const indent_component_t components8[] = {{L"if foo", 0}, {L"if bar", 1}, {L"baz", 2},
{L"end", 1}, {L"", 1}, {NULL, -1}};
add_test(&tests, //
0, "if", 1, " foo", //
1, "\nif", 2, " bar", //
2, "\nbaz", //
1, "\nend", 1, "\n");
const indent_component_t components9[] = {{L"switch foo", 0}, {L"", 1}, {NULL, -1}};
add_test(&tests, //
0, "switch foo", //
1, "\n" //
);
const indent_component_t components10[] = {
{L"switch foo", 0}, {L"case bar", 1}, {L"case baz", 1}, {L"quux", 2}, {L"", 2}, {NULL, -1}};
add_test(&tests, //
0, "switch foo", //
1, "\ncase bar", //
1, "\ncase baz", //
2, "\nquux", //
2, "\nquux" //
);
const indent_component_t components11[] = {{L"switch foo", 0},
{L"cas", 1}, // parse error indentation handling
{NULL, -1}};
add_test(&tests, //
0, "switch foo", //
1, "\ncas" // parse error indentation handling
);
const indent_component_t components12[] = {{L"while false", 0},
{L"# comment", 1}, // comment indentation handling
{L"command", 1}, // comment indentation handling
{L"# comment2", 1}, // comment indentation handling
{NULL, -1}};
add_test(&tests, //
0, "while", 1, " false", //
1, "\n# comment", // comment indentation handling
1, "\ncommand", //
1, "\n# comment 2" //
);
const indent_component_t *tests[] = {components1, components2, components3, components4,
components5, components6, components7, components8,
components9, components10, components11, components12};
for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) {
const indent_component_t *components = tests[which];
// Count how many we have.
size_t component_count = 0;
while (components[component_count].txt != NULL) {
component_count++;
}
// Generate the expected indents.
int test_idx = 0;
for (const test_t &test : tests) {
// Construct the input text and expected indents.
wcstring text;
std::vector<int> expected_indents;
for (size_t i = 0; i < component_count; i++) {
if (i > 0) {
text.push_back(L'\n');
expected_indents.push_back(components[i].indent);
int current_indent = 0;
for (const segment_t &segment : test) {
if (!segment.text) {
current_indent = segment.indent;
} else {
wcstring tmp = str2wcstring(segment.text);
text.append(tmp);
expected_indents.insert(expected_indents.end(), tmp.size(), current_indent);
}
text.append(components[i].txt);
expected_indents.resize(text.size(), components[i].indent);
}
do_test(expected_indents.size() == text.size());
@ -1350,11 +1399,13 @@ static void test_indents() {
do_test(expected_indents.size() == indents.size());
for (size_t i = 0; i < text.size(); i++) {
if (expected_indents.at(i) != indents.at(i)) {
err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i,
which + 1, expected_indents.at(i), indents.at(i), text.c_str());
break; // don't keep showing errors for the rest of the line
err(L"Wrong indent at index %lu (char 0x%02x) in test #%lu (expected %d, actual "
L"%d):\n%ls\n",
i, text.at(i), test_idx, expected_indents.at(i), indents.at(i), text.c_str());
break; // don't keep showing errors for the rest of the test
}
}
test_idx++;
}
}
@ -4298,12 +4349,12 @@ static void test_new_parser_correctness() {
{L"true || false; and true", true},
{L"true || ||", false},
{L"|| true", false},
{L"true || \n\n false", true},
{L"true || \n\n false", false},
};
for (const auto &test : parser_tests) {
parse_node_tree_t parse_tree;
bool success = parse_tree_from_string(test.src, parse_flag_none, &parse_tree, NULL);
auto ast = ast::ast_t::parse(test.src);
bool success = !ast.errored();
if (success && !test.ok) {
err(L"\"%ls\" should NOT have parsed, but did", test.src);
} else if (!success && test.ok) {
@ -4332,7 +4383,7 @@ static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_co
}
static void test_new_parser_fuzzing() {
say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
say(L"Fuzzing parser");
const wcstring fuzzes[] = {
L"if", L"else", L"for", L"in", L"while", L"begin", L"function",
L"switch", L"case", L"end", L"and", L"or", L"not", L"command",
@ -4343,7 +4394,6 @@ static void test_new_parser_fuzzing() {
wcstring src;
src.reserve(128);
parse_node_tree_t node_tree;
parse_error_list_t errors;
double start = timef();
@ -4357,7 +4407,7 @@ static void test_new_parser_fuzzing() {
unsigned long permutation = 0;
while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++,
&src)) {
parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
ast::ast_t::parse(src);
}
if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation);
}
@ -4369,33 +4419,36 @@ static void test_new_parser_fuzzing() {
// true if successful.
static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args,
enum parse_statement_decoration_t *out_deco) {
using namespace ast;
out_cmd->clear();
out_joined_args->clear();
*out_deco = parse_statement_decoration_none;
parse_node_tree_t tree;
if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
return false;
}
auto ast = ast_t::parse(src);
if (ast.errored()) return false;
// Get the statement. Should only have one.
tnode_t<grammar::job_list> job_list{&tree, &tree.at(0)};
auto stmts = job_list.descendants<grammar::plain_statement>();
if (stmts.size() != 1) {
say(L"Unexpected number of statements (%lu) found in '%ls'", stmts.size(), src.c_str());
return false;
const decorated_statement_t *statement = nullptr;
for (const auto &n : ast) {
if (const auto *tmp = n.try_as<decorated_statement_t>()) {
if (statement) {
say(L"More than one decorated statement found in '%ls'", src.c_str());
return false;
}
statement = tmp;
}
}
tnode_t<grammar::plain_statement> stmt = stmts.at(0);
// Return its decoration and command.
*out_deco = get_decoration(stmt);
*out_cmd = *command_for_plain_statement(stmt, src);
*out_deco = statement->decoration();
*out_cmd = statement->command.source(src);
// Return arguments separated by spaces.
bool first = true;
for (auto arg_node : stmt.descendants<grammar::argument>()) {
for (const ast::argument_or_redirection_t &arg : statement->args_or_redirs) {
if (!arg.is_argument()) continue;
if (!first) out_joined_args->push_back(L' ');
out_joined_args->append(arg_node.get_source(src));
out_joined_args->append(arg.source(src));
first = false;
}
@ -4404,19 +4457,22 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
// Verify that 'function -h' and 'function --help' are plain statements but 'function --foo' is
// not (issue #1240).
template <typename Type>
template <ast::type_t Type>
static void check_function_help(const wchar_t *src) {
parse_node_tree_t tree;
if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
using namespace ast;
auto ast = ast_t::parse(src);
if (ast.errored()) {
err(L"Failed to parse '%ls'", src);
}
tnode_t<grammar::job_list> node{&tree, &tree.at(0)};
auto node_list = node.descendants<Type>();
if (node_list.size() == 0) {
err(L"Failed to find node of type '%ls'", token_type_description(Type::token));
} else if (node_list.size() > 1) {
err(L"Found too many nodes of type '%ls'", token_type_description(Type::token));
int count = 0;
for (const node_t &node : ast) {
count += (node.type == Type);
}
if (count == 0) {
err(L"Failed to find node of type '%ls'", ast_type_to_string(Type));
} else if (count > 1) {
err(L"Found too many nodes of type '%ls'", ast_type_to_string(Type));
}
}
@ -4463,30 +4519,32 @@ static void test_new_parser_ll2() {
test.src.c_str(), (int)test.deco, (int)deco, (long)__LINE__);
}
check_function_help<grammar::plain_statement>(L"function -h");
check_function_help<grammar::plain_statement>(L"function --help");
check_function_help<grammar::function_header>(L"function --foo; end");
check_function_help<grammar::function_header>(L"function foo; end");
check_function_help<ast::type_t::decorated_statement>(L"function -h");
check_function_help<ast::type_t::decorated_statement>(L"function --help");
check_function_help<ast::type_t::function_header>(L"function --foo; end");
check_function_help<ast::type_t::function_header>(L"function foo; end");
}
static void test_new_parser_ad_hoc() {
using namespace ast;
// Very ad-hoc tests for issues encountered.
say(L"Testing new parser ad hoc tests");
// Ensure that 'case' terminates a job list.
const wcstring src = L"switch foo ; case bar; case baz; end";
parse_node_tree_t parse_tree;
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
if (!success) {
auto ast = ast_t::parse(src);
if (ast.errored()) {
err(L"Parsing failed");
}
// Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd
// Expect two case_item_lists. The bug was that we'd
// try to run a command 'case'.
tnode_t<grammar::job_list> root{&parse_tree, &parse_tree.at(0)};
auto node_list = root.descendants<grammar::case_item_list>();
if (node_list.size() != 3) {
err(L"Expected 3 case item nodes, found %lu", node_list.size());
int count = 0;
for (const auto &n : ast) {
count += (n.type == type_t::case_item);
}
if (count != 2) {
err(L"Expected 2 case item nodes, found %d", count);
}
}
@ -4507,7 +4565,9 @@ static void test_new_parser_errors() {
{L"if true ; end ; else", parse_error_unbalancing_else},
{L"case", parse_error_unbalancing_case},
{L"if true ; case ; end", parse_error_unbalancing_case},
{L"if true ; case ; end", parse_error_generic},
{L"true | and", parse_error_andor_in_pipeline},
};
for (const auto &test : tests) {
@ -4515,15 +4575,17 @@ static void test_new_parser_errors() {
parse_error_code_t expected_code = test.code;
parse_error_list_t errors;
parse_node_tree_t parse_tree;
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
if (success) {
auto ast = ast::ast_t::parse(src, parse_flag_none, &errors);
if (!ast.errored()) {
err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
}
if (errors.size() != 1) {
err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors",
src.c_str(), errors.size());
for (const auto &err : errors) {
fprintf(stderr, "%ls\n", err.describe(src, false).c_str());
}
} else if (errors.at(0).code != expected_code) {
err(L"Source '%ls' was expected to produce error code %lu, but instead produced error "
L"code %lu",
@ -4862,6 +4924,12 @@ static void test_highlighting() {
{L")", highlight_role_t::error},
});
highlight_tests.push_back({
{L"echo", highlight_role_t::command},
{L"stuff", highlight_role_t::param},
{L"# comment", highlight_role_t::comment},
});
auto &vars = parser_t::principal_parser().vars();
// Verify variables and wildcards in commands using /bin/cat.
vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"});

View File

@ -64,9 +64,7 @@ class category_list_t {
category_t exec_fork{L"exec-fork", L"Calls to fork()"};
category_t output_invalid{L"output-invalid", L"Trying to print invalid output"};
category_t parse_productions{L"parse-productions", L"Resolving tokens"};
category_t parse_productions_chatty{L"parse-productions-chatty",
L"Resolving tokens (chatty messages)"};
category_t ast_construction{L"ast-construction", L"Parsing fish AST"};
category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"};

View File

@ -224,17 +224,14 @@ bool function_get_definition(const wcstring &name, wcstring &out_definition) {
const function_info_t *func = funcset->get_info(name);
if (!func || !func->props) return false;
// We want to preserve comments that the AST attaches to the header (#5285).
// Take everything from the end of the header to the end of the body.
// Take everything from the end of the header to the 'end' keyword.
const auto &props = func->props;
namespace g = grammar;
tnode_t<g::block_header> header = props->func_node.child<0>();
tnode_t<g::job_list> jobs = props->func_node.child<1>();
auto header_src = header.source_range();
auto jobs_src = jobs.source_range();
if (header_src && jobs_src) {
auto header_src = props->func_node->header->try_source_range();
auto end_kw_src = props->func_node->end.try_source_range();
if (header_src && end_kw_src) {
uint32_t body_start = header_src->start + header_src->length;
uint32_t body_end = jobs_src->start + jobs_src->length;
assert(body_start <= jobs_src->start && "job list must come after header");
uint32_t body_end = end_kw_src->start;
assert(body_start <= body_end && "end keyword should come after header");
out_definition = wcstring(props->parsed_source->src, body_start, body_end - body_start);
}
return true;
@ -313,7 +310,7 @@ int function_get_definition_lineno(const wcstring &name) {
// return one plus the number of newlines at offsets less than the start of our function's
// statement (which includes the header).
// TODO: merge with line_offset_of_character_at_offset?
auto source_range = func->props->func_node.source_range();
auto source_range = func->props->func_node->try_source_range();
assert(source_range && "Function has no source range");
uint32_t func_start = source_range->start;
const wcstring &source = func->props->parsed_source->src;

View File

@ -11,10 +11,13 @@
#include "env.h"
#include "event.h"
#include "parse_tree.h"
#include "tnode.h"
class parser_t;
namespace ast {
struct block_statement_t;
}
/// A function's constant properties. These do not change once initialized.
struct function_properties_t {
/// Parsed source containing the function.
@ -23,7 +26,7 @@ struct function_properties_t {
/// Node containing the function statement, pointing into parsed_source.
/// We store block_statement, not job_list, so that comments attached to the header are
/// preserved.
tnode_t<grammar::block_statement> func_node;
const ast::block_statement_t *func_node;
/// List of all named arguments for this function.
wcstring_list_t named_arguments;

View File

@ -16,6 +16,7 @@
#include <unordered_set>
#include <utility>
#include "ast.h"
#include "builtin.h"
#include "color.h"
#include "common.h"
@ -31,14 +32,11 @@
#include "parse_util.h"
#include "parser.h"
#include "path.h"
#include "tnode.h"
#include "tokenizer.h"
#include "wcstringutil.h"
#include "wildcard.h"
#include "wutil.h" // IWYU pragma: keep
namespace g = grammar;
#define CURSOR_POSITION_INVALID static_cast<size_t>(-1)
static const wchar_t *get_highlight_var_name(highlight_role_t role) {
@ -338,12 +336,11 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d
// Given a plain statement node in a parse tree, get the command and return it, expanded
// appropriately for commands. If we succeed, return true.
static bool plain_statement_get_expanded_command(const wcstring &src,
tnode_t<g::plain_statement> stmt,
const operation_context_t &ctx,
wcstring *out_cmd) {
static bool statement_get_expanded_command(const wcstring &src,
const ast::decorated_statement_t &stmt,
const operation_context_t &ctx, wcstring *out_cmd) {
// Get the command. Try expanding it. If we cannot, it's an error.
maybe_t<wcstring> cmd = command_for_plain_statement(stmt, src);
maybe_t<wcstring> cmd = stmt.command.source(src);
if (!cmd) return false;
expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr);
return err == expand_result_t::ok;
@ -384,6 +381,9 @@ rgb_color_t highlight_get_color(const highlight_spec_t &highlight, bool is_backg
return result;
}
static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration,
const wcstring &working_directory, const environment_t &vars);
static bool has_expand_reserved(const wcstring &str) {
bool result = false;
for (auto wc : str) {
@ -399,27 +399,22 @@ static bool has_expand_reserved(const wcstring &str) {
// command (as a string), if any. This is used to validate autosuggestions.
static bool autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx,
wcstring *out_expanded_command, wcstring *out_arg) {
// Parse the buffer.
parse_node_tree_t parse_tree;
parse_tree_from_string(buff,
parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
&parse_tree, nullptr);
auto ast = ast::ast_t::parse(
buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens);
// Find the first statement.
tnode_t<g::plain_statement> first_statement{};
for (const auto &node : parse_tree) {
if (node.type == symbol_plain_statement) {
first_statement = tnode_t<g::plain_statement>(&parse_tree, &node);
break;
}
const ast::decorated_statement_t *first_statement = nullptr;
if (const ast::job_conjunction_t *jc = ast.top()->as<ast::job_list_t>()->at(0)) {
first_statement = jc->job.statement.contents->try_as<ast::decorated_statement_t>();
}
if (first_statement &&
plain_statement_get_expanded_command(buff, first_statement, ctx, out_expanded_command)) {
// Find the first argument.
auto args_and_redirs = first_statement.child<1>();
if (auto arg = args_and_redirs.next_in_list<grammar::argument>()) {
*out_arg = arg.get_source(buff);
statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) {
// Check if the first argument or redirection is, in fact, an argument.
if (const auto *arg_or_redir = first_statement->args_or_redirs.at(0)) {
if (arg_or_redir && arg_or_redir->is_argument()) {
*out_arg = arg_or_redir->argument().source(buff);
}
}
return true;
}
@ -775,31 +770,56 @@ class highlighter_t {
const bool io_ok;
// Working directory.
const wcstring working_directory;
// The ast we produced.
ast::ast_t ast;
// The resulting colors.
using color_array_t = std::vector<highlight_spec_t>;
color_array_t color_array;
// The parse tree of the buff.
parse_node_tree_t parse_tree;
// Flags we use for AST parsing.
static constexpr parse_tree_flags_t ast_flags =
parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated |
parse_flag_show_extra_semis;
// Color a command.
void color_command(tnode_t<g::tok_string> node);
// Color an argument.
void color_argument(tnode_t<g::tok_string> node);
void color_command(const ast::string_t &node);
// Color a node as if it were an argument.
void color_as_argument(const ast::node_t &node);
// Color a redirection.
void color_redirection(tnode_t<g::redirection> node);
// Color a list of arguments. If cmd_is_cd is true, then the arguments are for 'cd'; detect
// invalid directories.
void color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd = false);
// Color the redirections of the given node.
void color_redirections(tnode_t<g::arguments_or_redirections_list> list);
void color_redirection(const ast::redirection_t &node);
// Color all the children of the command with the given type.
void color_children(const parse_node_t &parent, parse_token_type_t type,
highlight_spec_t color);
void color_children(const ast::node_t &parent, ast::type_t type, highlight_spec_t color);
// Colors the source range of a node with a given color.
void color_node(const parse_node_t &node, highlight_spec_t color);
void color_node(const ast::node_t &node, highlight_spec_t color);
// Colors a range with a given color.
void color_range(source_range_t range, highlight_spec_t color);
// return whether a plain statement is 'cd'.
bool is_cd(tnode_t<g::plain_statement> stmt) const;
bool is_cd(const ast::decorated_statement_t &stmt) const;
/// \return a substring of our buffer.
wcstring get_source(source_range_t r) const;
public:
// Visit the children of a node.
void visit_children(const ast::node_t &node) {
ast::node_visitor(*this).accept_children_of(&node);
}
// AST visitor implementations.
void visit(const ast::keyword_base_t &kw);
void visit(const ast::token_base_t &tok);
void visit(const ast::redirection_t &redir);
void visit(const ast::variable_assignment_t &varas);
void visit(const ast::semi_nl_t &semi_nl);
void visit(const ast::decorated_statement_t &stmt);
// Visit an argument, perhaps knowing that our command is cd.
void visit(const ast::argument_t &arg, bool cmd_is_cd = false);
// Default implementation is to just visit children.
void visit(const ast::node_t &node) { visit_children(node); }
// Constructor
highlighter_t(const wcstring &str, size_t pos, const operation_context_t &ctx, wcstring wd,
bool can_do_io)
@ -808,52 +828,44 @@ class highlighter_t {
ctx(ctx),
io_ok(can_do_io),
working_directory(std::move(wd)),
color_array(str.size()) {
// Parse the tree.
parse_tree_from_string(buff,
parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens,
&this->parse_tree, nullptr);
}
ast(ast::ast_t::parse(buff, ast_flags)) {}
// Perform highlighting, returning an array of colors.
color_array_t highlight();
};
void highlighter_t::color_node(const parse_node_t &node, highlight_spec_t color) {
// Can only color nodes with valid source ranges.
if (!node.has_source() || node.source_length == 0) return;
// Fill the color array with our color in the corresponding range.
size_t source_end = node.source_start + node.source_length;
assert(source_end >= node.source_start);
assert(source_end <= color_array.size());
std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end,
color);
wcstring highlighter_t::get_source(source_range_t r) const {
assert(r.start + r.length >= r.start && "Overflow");
assert(r.start + r.length <= this->buff.size() && "Out of range");
return this->buff.substr(r.start, r.length);
}
void highlighter_t::color_command(tnode_t<g::tok_string> node) {
auto source_range = node.source_range();
if (!source_range) return;
void highlighter_t::color_node(const ast::node_t &node, highlight_spec_t color) {
color_range(node.source_range(), color);
}
const wcstring cmd_str = node.get_source(this->buff);
void highlighter_t::color_range(source_range_t range, highlight_spec_t color) {
assert(range.start + range.length <= this->color_array.size() && "Range out of bounds");
std::fill_n(this->color_array.begin() + range.start, range.length, color);
}
void highlighter_t::color_command(const ast::string_t &node) {
source_range_t source_range = node.source_range();
const wcstring cmd_str = get_source(source_range);
// Get an iterator to the colors associated with the argument.
const size_t arg_start = source_range->start;
const size_t arg_start = source_range.start;
const color_array_t::iterator colors = color_array.begin() + arg_start;
color_string_internal(cmd_str, highlight_role_t::command, colors);
}
// node does not necessarily have type symbol_argument here.
void highlighter_t::color_argument(tnode_t<g::tok_string> node) {
void highlighter_t::color_as_argument(const ast::node_t &node) {
auto source_range = node.source_range();
if (!source_range) return;
const wcstring arg_str = node.get_source(this->buff);
const wcstring arg_str = get_source(source_range);
// Get an iterator to the colors associated with the argument.
const size_t arg_start = source_range->start;
const size_t arg_start = source_range.start;
const color_array_t::iterator arg_colors = color_array.begin() + arg_start;
// Color this argument without concern for command substitutions.
@ -905,15 +917,13 @@ void highlighter_t::color_argument(tnode_t<g::tok_string> node) {
/// Indicates whether the source range of the given node forms a valid path in the given
/// working_directory.
static bool node_is_potential_path(const wcstring &src, const parse_node_t &node,
const operation_context_t &ctx,
const wcstring &working_directory) {
if (!node.has_source()) return false;
static bool range_is_potential_path(const wcstring &src, const source_range_t &range,
const operation_context_t &ctx,
const wcstring &working_directory) {
// Get the node source, unescape it, and then pass it to is_potential_path along with the
// working directory (as a one element list).
bool result = false;
wcstring token(src, node.source_start, node.source_length);
wcstring token = src.substr(range.start, range.length);
if (unescape_string_in_place(&token, UNESCAPE_SPECIAL)) {
// Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY.
// Put it back.
@ -925,172 +935,257 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node
return result;
}
bool highlighter_t::is_cd(tnode_t<g::plain_statement> stmt) const {
bool cmd_is_cd = false;
if (this->io_ok && stmt.has_source()) {
wcstring cmd_str;
if (plain_statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
cmd_is_cd = (cmd_str == L"cd");
}
bool highlighter_t::is_cd(const ast::decorated_statement_t &stmt) const {
wcstring cmd_str;
if (this->io_ok && statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
return cmd_str == L"cd";
}
return cmd_is_cd;
return false;
}
// Color all of the arguments of the given node list, which should be argument_list or
// argument_or_redirection_list.
void highlighter_t::color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd) {
// Find all the arguments of this list.
for (tnode_t<g::argument> arg : args) {
this->color_argument(arg.child<0>());
void highlighter_t::visit(const ast::keyword_base_t &kw) {
highlight_role_t role = highlight_role_t::normal;
switch (kw.kw) {
case parse_keyword_t::kw_begin:
case parse_keyword_t::kw_builtin:
case parse_keyword_t::kw_case:
case parse_keyword_t::kw_command:
case parse_keyword_t::kw_else:
case parse_keyword_t::kw_end:
case parse_keyword_t::kw_exec:
case parse_keyword_t::kw_for:
case parse_keyword_t::kw_function:
case parse_keyword_t::kw_if:
case parse_keyword_t::kw_in:
case parse_keyword_t::kw_switch:
case parse_keyword_t::kw_while:
role = highlight_role_t::command;
break;
if (cmd_is_cd) {
// Mark this as an error if it's not 'help' and not a valid cd path.
wcstring param = arg.get_source(this->buff);
if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
bool is_help = string_prefixes_string(param, L"--help") ||
string_prefixes_string(param, L"-h");
if (!is_help && this->io_ok &&
!is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
this->color_node(arg, highlight_role_t::error);
}
case parse_keyword_t::kw_and:
case parse_keyword_t::kw_or:
case parse_keyword_t::kw_not:
case parse_keyword_t::kw_exclam:
case parse_keyword_t::kw_time:
role = highlight_role_t::operat;
break;
case parse_keyword_t::none:
break;
}
color_node(kw, role);
}
void highlighter_t::visit(const ast::token_base_t &tok) {
maybe_t<highlight_role_t> role = highlight_role_t::normal;
switch (tok.type) {
case parse_token_type_end:
case parse_token_type_pipe:
case parse_token_type_background:
role = highlight_role_t::statement_terminator;
break;
case parse_token_type_andand:
case parse_token_type_oror:
role = highlight_role_t::operat;
break;
case parse_token_type_string:
// Assume all strings are params. This handles e.g. the variables a for header or
// function header. Other strings (like arguments to commands) need more complex
// handling, which occurs in their respective overrides of visit().
role = highlight_role_t::param;
default:
break;
}
if (role) color_node(tok, *role);
}
void highlighter_t::visit(const ast::semi_nl_t &semi_nl) {
color_node(semi_nl, highlight_role_t::statement_terminator);
}
void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd) {
color_as_argument(arg);
if (cmd_is_cd && io_ok) {
// Mark this as an error if it's not 'help' and not a valid cd path.
wcstring param = arg.source(this->buff);
if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
bool is_help =
string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h");
if (!is_help && this->io_ok &&
!is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
this->color_node(arg, highlight_role_t::error);
}
}
}
}
void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node) {
if (!redirection_node.has_source()) return;
void highlighter_t::visit(const ast::variable_assignment_t &varas) {
color_as_argument(varas);
// TODO: Color the '=' in the variable assignment as an operator, for fun.
// if (auto where = variable_assignment_equals_pos(varas.source(this->buff))) {
// this->color_array.at(*where) = highlight_role_t::operat;
// }
}
tnode_t<g::tok_redirection> redir_prim = redirection_node.child<0>(); // like 2>
tnode_t<g::tok_string> redir_target = redirection_node.child<1>(); // like &1 or file path
void highlighter_t::visit(const ast::decorated_statement_t &stmt) {
// Color any decoration.
if (stmt.opt_decoration) this->visit(*stmt.opt_decoration);
if (redir_prim) {
wcstring target;
const maybe_t<pipe_or_redir_t> redirect =
redirection_for_node(redirection_node, this->buff, &target);
// Color the command's source code.
// If we get no source back, there's nothing to color.
maybe_t<wcstring> cmd = stmt.command.try_source(this->buff);
if (!cmd.has_value()) return;
// We may get a missing redirection type if the redirection is invalid.
auto hl = (redirect && redirect->is_valid()) ? highlight_role_t::redirection
: highlight_role_t::error;
this->color_node(redir_prim, hl);
wcstring expanded_cmd;
bool is_valid_cmd = false;
if (!this->io_ok) {
// We cannot check if the command is invalid, so just assume it's valid.
is_valid_cmd = true;
} else if (variable_assignment_equals_pos(*cmd)) {
is_valid_cmd = true;
} else {
// Check to see if the command is valid.
// Try expanding it. If we cannot, it's an error.
bool expanded = statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
if (expanded && !has_expand_reserved(expanded_cmd)) {
is_valid_cmd =
command_is_valid(expanded_cmd, stmt.decoration(), working_directory, ctx.vars);
}
}
// Check if the argument contains a command substitution. If so, highlight it as a param
// even though it's a command redirection, and don't try to do any other validation.
if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
this->color_argument(redir_target);
// Color our statement.
if (is_valid_cmd) {
this->color_command(stmt.command);
} else {
this->color_node(stmt.command, highlight_role_t::error);
}
// Color arguments and redirections.
// Except if our command is 'cd' we have special logic for how arguments are colored.
bool is_cd = (expanded_cmd == L"cd");
for (const ast::argument_or_redirection_t &v : stmt.args_or_redirs) {
if (v.is_argument()) {
this->visit(v.argument(), is_cd);
} else {
// No command substitution, so we can highlight the target file or fd. For example,
// disallow redirections into a non-existent directory.
bool target_is_valid = true;
this->visit(v.redirection());
}
}
}
if (!redirect || !redirect->is_valid()) {
// not a valid redirection
target_is_valid = false;
} else if (!this->io_ok) {
// I/O is disallowed, so we don't have much hope of catching anything but gross
// errors. Assume it's valid.
target_is_valid = true;
} else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
// Could not be expanded.
target_is_valid = false;
} else {
// Ok, we successfully expanded our target. Now verify that it works with this
// redirection. We will probably need it as a path (but not in the case of fd
// redirections). Note that the target is now unescaped.
const wcstring target_path =
path_apply_working_directory(target, this->working_directory);
switch (redirect->mode) {
case redirection_mode_t::fd: {
if (target == L"-") {
target_is_valid = true;
} else {
int fd = fish_wcstoi(target.c_str());
target_is_valid = !errno && fd >= 0;
}
break;
void highlighter_t::visit(const ast::redirection_t &redir) {
maybe_t<pipe_or_redir_t> oper =
pipe_or_redir_t::from_string(redir.oper.source(this->buff)); // like 2>
wcstring target = redir.target.source(this->buff); // like &1 or file path
assert(oper.has_value() &&
"Should have successfully parsed a pipe_or_redir_t since it was in our ast");
// Color the > part.
// It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1)
// If so, color the whole thing invalid and stop.
if (!oper->is_valid()) {
this->color_node(redir, highlight_role_t::error);
return;
}
// Color the operator part like 2>.
this->color_node(redir.oper, highlight_role_t::redirection);
// Color the target part.
// Check if the argument contains a command substitution. If so, highlight it as a param
// even though it's a command redirection, and don't try to do any other validation.
if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
this->color_as_argument(redir.target);
} else {
// No command substitution, so we can highlight the target file or fd. For example,
// disallow redirections into a non-existent directory.
bool target_is_valid = true;
if (!this->io_ok) {
// I/O is disallowed, so we don't have much hope of catching anything but gross
// errors. Assume it's valid.
target_is_valid = true;
} else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
// Could not be expanded.
target_is_valid = false;
} else {
// Ok, we successfully expanded our target. Now verify that it works with this
// redirection. We will probably need it as a path (but not in the case of fd
// redirections). Note that the target is now unescaped.
const wcstring target_path =
path_apply_working_directory(target, this->working_directory);
switch (oper->mode) {
case redirection_mode_t::fd: {
if (target == L"-") {
target_is_valid = true;
} else {
int fd = fish_wcstoi(target.c_str());
target_is_valid = !errno && fd >= 0;
}
case redirection_mode_t::input: {
// Input redirections must have a readable non-directory.
struct stat buf = {};
target_is_valid = !waccess(target_path, R_OK) &&
!wstat(target_path, &buf) && !S_ISDIR(buf.st_mode);
break;
break;
}
case redirection_mode_t::input: {
// Input redirections must have a readable non-directory.
struct stat buf = {};
target_is_valid = !waccess(target_path, R_OK) && !wstat(target_path, &buf) &&
!S_ISDIR(buf.st_mode);
break;
}
case redirection_mode_t::overwrite:
case redirection_mode_t::append:
case redirection_mode_t::noclob: {
// Test whether the file exists, and whether it's writable (possibly after
// creating it). access() returns failure if the file does not exist.
bool file_exists = false, file_is_writable = false;
int err = 0;
struct stat buf = {};
if (wstat(target_path, &buf) < 0) {
err = errno;
}
case redirection_mode_t::overwrite:
case redirection_mode_t::append:
case redirection_mode_t::noclob: {
// Test whether the file exists, and whether it's writable (possibly after
// creating it). access() returns failure if the file does not exist.
bool file_exists = false, file_is_writable = false;
int err = 0;
struct stat buf = {};
if (wstat(target_path, &buf) < 0) {
err = errno;
}
if (string_suffixes_string(L"/", target)) {
// Redirections to things that are directories is definitely not
// allowed.
file_exists = false;
file_is_writable = false;
} else if (err == 0) {
// No err. We can write to it if it's not a directory and we have
// permission.
file_exists = true;
file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
} else if (err == ENOENT) {
// File does not exist. Check if its parent directory is writable.
wcstring parent = wdirname(target_path);
if (string_suffixes_string(L"/", target)) {
// Redirections to things that are directories is definitely not
// allowed.
file_exists = false;
file_is_writable = false;
} else if (err == 0) {
// No err. We can write to it if it's not a directory and we have
// permission.
file_exists = true;
file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
} else if (err == ENOENT) {
// File does not exist. Check if its parent directory is writable.
wcstring parent = wdirname(target_path);
// Ensure that the parent ends with the path separator. This will ensure
// that we get an error if the parent directory is not really a
// directory.
if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');
// Ensure that the parent ends with the path separator. This will ensure
// that we get an error if the parent directory is not really a
// directory.
if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');
// Now the file is considered writable if the parent directory is
// writable.
file_exists = false;
file_is_writable = (0 == waccess(parent, W_OK));
} else {
// Other errors we treat as not writable. This includes things like
// ENOTDIR.
file_exists = false;
file_is_writable = false;
}
// NOCLOB means that we must not overwrite files that exist.
target_is_valid =
file_is_writable &&
!(file_exists && redirect->mode == redirection_mode_t::noclob);
break;
// Now the file is considered writable if the parent directory is
// writable.
file_exists = false;
file_is_writable = (0 == waccess(parent, W_OK));
} else {
// Other errors we treat as not writable. This includes things like
// ENOTDIR.
file_exists = false;
file_is_writable = false;
}
// NOCLOB means that we must not overwrite files that exist.
target_is_valid =
file_is_writable &&
!(file_exists && oper->mode == redirection_mode_t::noclob);
break;
}
}
if (redir_target) {
auto hl = target_is_valid ? highlight_role_t::redirection : highlight_role_t::error;
this->color_node(redir_target, hl);
}
}
}
}
/// Color all of the redirections of the given command.
void highlighter_t::color_redirections(tnode_t<g::arguments_or_redirections_list> list) {
for (const auto &node : list.descendants<g::redirection>()) {
this->color_redirection(node);
}
}
/// Color all the children of the command with the given type.
void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type,
highlight_spec_t color) {
for (node_offset_t idx = 0; idx < parent.child_count; idx++) {
const parse_node_t *child = this->parse_tree.get_child(parent, idx);
if (child != nullptr && child->type == type) {
this->color_node(*child, color);
}
this->color_node(redir.target,
target_is_valid ? highlight_role_t::redirection : highlight_role_t::error);
}
}
@ -1145,171 +1240,42 @@ highlighter_t::color_array_t highlighter_t::highlight() {
ASSERT_IS_BACKGROUND_THREAD();
}
const size_t length = buff.size();
assert(this->buff.size() == this->color_array.size());
if (length == 0) return color_array;
// Start out at zero.
this->color_array.resize(this->buff.size());
std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{});
// Walk the node tree.
for (const parse_node_t &node : parse_tree) {
if (ctx.check_cancel()) return std::move(color_array);
switch (node.type) {
// Color direct string descendants, e.g. 'for' and 'in'.
case symbol_while_header:
case symbol_begin_header:
case symbol_function_header:
case symbol_if_clause:
case symbol_else_clause:
case symbol_case_item:
case symbol_decorated_statement:
case symbol_if_statement: {
this->color_children(node, parse_token_type_string, highlight_role_t::command);
break;
}
case symbol_switch_statement: {
tnode_t<g::switch_statement> switchn(&parse_tree, &node);
auto literal_switch = switchn.child<0>();
auto switch_arg = switchn.child<1>();
this->color_node(literal_switch, highlight_role_t::command);
this->color_node(switch_arg, highlight_role_t::param);
break;
}
case symbol_for_header: {
tnode_t<g::for_header> fhead(&parse_tree, &node);
// Color the 'for' and 'in' as commands.
auto literal_for = fhead.child<0>();
auto literal_in = fhead.child<2>();
this->color_node(literal_for, highlight_role_t::command);
this->color_node(literal_in, highlight_role_t::command);
this->visit_children(*ast.top());
if (ctx.check_cancel()) return std::move(color_array);
// Color the variable name as a parameter.
this->color_argument(fhead.child<1>());
break;
}
case parse_token_type_andand:
case parse_token_type_oror:
this->color_node(node, highlight_role_t::operat);
break;
case symbol_not_statement:
this->color_children(node, parse_token_type_string, highlight_role_t::operat);
break;
case symbol_job_decorator:
this->color_node(node, highlight_role_t::operat);
break;
case symbol_variable_assignment: {
tnode_t<g::variable_assignment> variable_assignment = {&parse_tree, &node};
this->color_argument(variable_assignment.child<0>());
break;
}
case parse_token_type_pipe:
case parse_token_type_background:
case parse_token_type_end:
case symbol_optional_background: {
this->color_node(node, highlight_role_t::statement_terminator);
break;
}
case symbol_optional_time: {
this->color_node(node, highlight_role_t::operat);
break;
}
case symbol_plain_statement: {
tnode_t<g::plain_statement> stmt(&parse_tree, &node);
// Get the decoration from the parent.
enum parse_statement_decoration_t decoration = get_decoration(stmt);
// Color the command.
tnode_t<g::tok_string> cmd_node = stmt.child<0>();
maybe_t<wcstring> cmd = cmd_node.get_source(buff);
if (!cmd) {
break; // not much as we can do without a node that has source text
}
bool is_valid_cmd = false;
if (!this->io_ok) {
// We cannot check if the command is invalid, so just assume it's valid.
is_valid_cmd = true;
} else if (variable_assignment_equals_pos(*cmd)) {
is_valid_cmd = true;
} else {
wcstring expanded_cmd;
// Check to see if the command is valid.
// Try expanding it. If we cannot, it's an error.
bool expanded =
plain_statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
if (expanded && !has_expand_reserved(expanded_cmd)) {
is_valid_cmd =
command_is_valid(expanded_cmd, decoration, working_directory, ctx.vars);
}
}
if (!is_valid_cmd) {
this->color_node(*cmd_node, highlight_role_t::error);
} else {
this->color_command(cmd_node);
}
break;
}
// Only work on root lists, so that we don't re-color child lists.
case symbol_arguments_or_redirections_list: {
tnode_t<g::arguments_or_redirections_list> list(&parse_tree, &node);
if (argument_list_is_root(list)) {
bool cmd_is_cd = is_cd(list.try_get_parent<g::plain_statement>());
this->color_arguments(list.descendants<g::argument>(), cmd_is_cd);
this->color_redirections(list);
}
break;
}
case symbol_argument_list: {
tnode_t<g::argument_list> list(&parse_tree, &node);
if (argument_list_is_root(list)) {
this->color_arguments(list.descendants<g::argument>());
}
break;
}
case symbol_end_command: {
this->color_node(node, highlight_role_t::command);
break;
}
case parse_special_type_parse_error:
case parse_special_type_tokenizer_error: {
this->color_node(node, highlight_role_t::error);
break;
}
case parse_special_type_comment: {
this->color_node(node, highlight_role_t::comment);
break;
}
default: {
break;
}
}
// Color every comment.
const auto &extras = ast.extras();
for (const source_range_t &r : extras.comments) {
this->color_range(r, highlight_role_t::comment);
}
if (!this->io_ok || this->cursor_pos > this->buff.size()) {
return std::move(color_array);
// Color every extra semi.
for (const source_range_t &r : extras.semis) {
this->color_range(r, highlight_role_t::statement_terminator);
}
// If the cursor is over an argument, and that argument is a valid path, underline it.
for (const auto &node : parse_tree) {
// Must be an argument with source.
if (node.type != symbol_argument || !node.has_source()) continue;
// Color every error range.
for (const source_range_t &r : extras.errors) {
this->color_range(r, highlight_role_t::error);
}
if (ctx.check_cancel()) return std::move(color_array);
// Underline every valid path.
if (node_is_potential_path(buff, node, ctx, working_directory)) {
// It is, underline it.
for (size_t i = node.source_start; i < node.source_start + node.source_length; i++) {
// Underline every valid path.
if (io_ok) {
for (const ast::node_t &node : ast) {
const ast::argument_t *arg = node.try_as<ast::argument_t>();
if (!arg || arg->unsourced) continue;
if (ctx.check_cancel()) break;
if (range_is_potential_path(buff, arg->range, ctx, working_directory)) {
// Don't color highlight_role_t::error because it looks dorky. For example,
// trying to cd into a non-directory would show an underline and also red.
if (this->color_array.at(i).foreground != highlight_role_t::error) {
this->color_array.at(i).valid_path = true;
for (size_t i = arg->range.start, end = arg->range.start + arg->range.length;
i < end; i++) {
if (this->color_array.at(i).foreground != highlight_role_t::error) {
this->color_array.at(i).valid_path = true;
}
}
}
}

View File

@ -29,6 +29,7 @@
#include <type_traits>
#include <unordered_set>
#include "ast.h"
#include "common.h"
#include "env.h"
#include "fallback.h" // IWYU pragma: keep
@ -44,7 +45,6 @@
#include "parser.h"
#include "path.h"
#include "reader.h"
#include "tnode.h"
#include "wcstringutil.h"
#include "wildcard.h" // IWYU pragma: keep
#include "wutil.h" // IWYU pragma: keep
@ -1096,8 +1096,7 @@ void history_impl_t::populate_from_config_path() {
static bool should_import_bash_history_line(const wcstring &line) {
if (line.empty()) return false;
parse_node_tree_t parse_tree;
if (!parse_tree_from_string(line, parse_flag_none, &parse_tree, nullptr)) return false;
if (ast::ast_t::parse(line).errored()) return false;
// In doing this test do not allow incomplete strings. Hence the "false" argument.
parse_error_list_t errors;
@ -1274,38 +1273,33 @@ void history_t::add_pending_with_file_detection(const wcstring &str,
// Find all arguments that look like they could be file paths.
bool needs_sync_write = false;
parse_node_tree_t tree;
parse_tree_from_string(str, parse_flag_none, &tree, nullptr);
using namespace ast;
auto ast = ast_t::parse(str);
path_list_t potential_paths;
for (const parse_node_t &node : tree) {
if (!node.has_source()) {
continue;
}
if (node.type == symbol_argument) {
wcstring potential_path = node.get_source(str);
for (const node_t &node : ast) {
if (const argument_t *arg = node.try_as<argument_t>()) {
wcstring potential_path = arg->source(str);
bool unescaped = unescape_string_in_place(&potential_path, UNESCAPE_DEFAULT);
if (unescaped && string_could_be_path(potential_path)) {
potential_paths.push_back(potential_path);
}
} else if (node.type == symbol_plain_statement) {
} else if (const decorated_statement_t *stmt = node.try_as<decorated_statement_t>()) {
// Hack hack hack - if the command is likely to trigger an exit, then don't do
// background file detection, because we won't be able to write it to our history file
// before we exit.
// Also skip it for 'echo'. This is because echo doesn't take file paths, but also
// because the history file test wants to find the commands in the history file
// immediately after running them, so it can't tolerate the asynchronous file detection.
if (get_decoration({&tree, &node}) == parse_statement_decoration_exec) {
if (stmt->decoration() == parse_statement_decoration_exec) {
needs_sync_write = true;
}
if (maybe_t<wcstring> command = command_for_plain_statement({&tree, &node}, str)) {
unescape_string_in_place(&*command, UNESCAPE_DEFAULT);
if (*command == L"exit" || *command == L"reboot" || *command == L"restart" ||
*command == L"echo") {
needs_sync_write = true;
}
wcstring command = stmt->command.source(str);
unescape_string_in_place(&command, UNESCAPE_DEFAULT);
if (command == L"exit" || command == L"reboot" || command == L"restart" ||
command == L"echo") {
needs_sync_write = true;
}
}
}

View File

@ -6,54 +6,27 @@
#include "common.h"
#define PARSE_ASSERT(a) assert(a)
#define PARSER_DIE() \
do { \
FLOG(error, L"Parser dying!"); \
exit_without_destructors(-1); \
} while (0)
// A range of source code.
struct source_range_t {
uint32_t start;
uint32_t length;
uint32_t end() const {
assert(start + length >= start && "Overflow");
return start + length;
}
};
// IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
enum parse_token_type_t : uint8_t {
token_type_invalid = 1,
// Non-terminal tokens
symbol_job_list,
symbol_job_conjunction,
symbol_job_conjunction_continuation,
symbol_job_decorator,
symbol_job,
symbol_job_continuation,
symbol_statement,
symbol_block_statement,
symbol_block_header,
symbol_for_header,
symbol_while_header,
symbol_begin_header,
symbol_function_header,
symbol_if_statement,
symbol_if_clause,
symbol_else_clause,
symbol_else_continuation,
symbol_switch_statement,
symbol_case_item_list,
symbol_case_item,
symbol_not_statement,
symbol_decorated_statement,
symbol_plain_statement,
symbol_variable_assignment,
symbol_variable_assignments,
symbol_arguments_or_redirections_list,
symbol_andor_job_list,
symbol_argument_list,
// Freestanding argument lists are parsed from the argument list supplied to 'complete -a'.
// They are not generated by parse trees rooted in symbol_job_list.
symbol_freestanding_argument_list,
symbol_argument,
symbol_redirection,
symbol_optional_background,
symbol_optional_newlines,
symbol_optional_time,
symbol_end_command,
// Terminal types.
parse_token_type_string,
parse_token_type_pipe,
@ -68,13 +41,6 @@ enum parse_token_type_t : uint8_t {
parse_special_type_parse_error,
parse_special_type_tokenizer_error,
parse_special_type_comment,
LAST_TOKEN_TYPE = parse_special_type_comment,
FIRST_TERMINAL_TYPE = parse_token_type_string,
LAST_TERMINAL_TYPE = parse_token_type_terminate,
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string,
LAST_PARSE_TOKEN_TYPE = parse_token_type_end
};
const enum_map<parse_token_type_t> token_enum_map[] = {
@ -89,9 +55,6 @@ const enum_map<parse_token_type_t> token_enum_map[] = {
{parse_token_type_andand, L"parse_token_type_andand"},
{parse_token_type_oror, L"parse_token_type_oror"},
{parse_token_type_terminate, L"parse_token_type_terminate"},
// Define all symbols
#define ELEM(sym) {symbol_##sym, L"symbol_" #sym},
#include "parse_grammar_elements.inc"
{token_type_invalid, L"token_type_invalid"},
{token_type_invalid, nullptr}};
#define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map)
@ -147,7 +110,7 @@ const enum_map<parse_keyword_t> keyword_enum_map[] = {{parse_keyword_t::kw_excla
// Node tag values.
// Statement decorations, stored in node tag.
// Statement decorations.
enum parse_statement_decoration_t {
parse_statement_decoration_none,
parse_statement_decoration_command,
@ -155,19 +118,6 @@ enum parse_statement_decoration_t {
parse_statement_decoration_exec,
};
// Job decorations, stored in node tag.
enum parse_job_decoration_t {
parse_job_decoration_none,
parse_job_decoration_and,
parse_job_decoration_or,
};
// Whether a statement is backgrounded.
enum parse_optional_background_t { parse_no_background, parse_background };
// Whether a job is prefixed with "time".
enum parse_optional_time_t { parse_optional_time_no_time, parse_optional_time_time };
// Parse error code list.
enum parse_error_code_t {
parse_error_none,
@ -193,6 +143,26 @@ enum parse_error_code_t {
parse_error_andor_in_pipeline, // "and" or "or" after a pipe
};
enum {
parse_flag_none = 0,
/// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
/// disconnected trees. This is intended to be used by syntax highlighting.
parse_flag_continue_after_error = 1 << 0,
/// Include comment tokens.
parse_flag_include_comments = 1 << 1,
/// Indicate that the tokenizer should accept incomplete tokens */
parse_flag_accept_incomplete_tokens = 1 << 2,
/// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
/// tree where some nodes may have no productions.
parse_flag_leave_unterminated = 1 << 3,
/// Indicate that the parser should generate job_list entries for blank lines.
parse_flag_show_blank_lines = 1 << 4,
/// Indicate that extra semis should be generated.
parse_flag_show_extra_semis = 1 << 5,
};
typedef unsigned int parse_tree_flags_t;
enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 };
typedef unsigned int parser_test_error_bits_t;
@ -214,6 +184,9 @@ struct parse_error_t {
};
typedef std::vector<parse_error_t> parse_error_list_t;
wcstring token_type_user_presentable_description(parse_token_type_t type,
parse_keyword_t keyword = parse_keyword_t::none);
// Special source_start value that means unknown.
#define SOURCE_LOCATION_UNKNOWN (static_cast<size_t>(-1))
@ -221,6 +194,13 @@ typedef std::vector<parse_error_t> parse_error_list_t;
/// errors in a substring of a larger source buffer.
void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt);
// The location of a pipeline.
enum class pipeline_position_t {
none, // not part of a pipeline
first, // first command in a pipeline
subsequent // second or further command in a pipeline
};
/// Maximum number of function calls.
#define FISH_MAX_STACK_DEPTH 128

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,10 @@
// Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.).
// Provides the "linkage" between an ast and actual execution structures (job_t, etc.).
#ifndef FISH_PARSE_EXECUTION_H
#define FISH_PARSE_EXECUTION_H
#include <stddef.h>
#include "ast.h"
#include "common.h"
#include "io.h"
#include "parse_constants.h"
@ -38,7 +39,7 @@ class parse_execution_context_t {
const operation_context_t &ctx;
// The currently executing job node, used to indicate the line number.
tnode_t<grammar::job> executing_job_node{};
const ast::job_t *executing_job_node{};
// Cached line number information.
size_t cached_lineno_offset = 0;
@ -59,88 +60,91 @@ class parse_execution_context_t {
// Report an error, setting $status to \p status. Always returns
// 'end_execution_reason_t::error'.
end_execution_reason_t report_error(int status, const parse_node_t &node, const wchar_t *fmt,
end_execution_reason_t report_error(int status, const ast::node_t &node, const wchar_t *fmt,
...) const;
end_execution_reason_t report_errors(int status, const parse_error_list_t &error_list) const;
/// Command not found support.
end_execution_reason_t handle_command_not_found(const wcstring &cmd,
tnode_t<grammar::plain_statement> statement,
const ast::decorated_statement_t &statement,
int err_code);
// Utilities
wcstring get_source(const parse_node_t &node) const;
tnode_t<grammar::plain_statement> infinite_recursive_statement_in_job_list(
tnode_t<grammar::job_list> job_list, wcstring *out_func_name) const;
wcstring get_source(const ast::node_t &node) const;
const ast::decorated_statement_t *infinite_recursive_statement_in_job_list(
const ast::job_list_t &job_list, wcstring *out_func_name) const;
// Expand a command which may contain variables, producing an expand command and possibly
// arguments. Prints an error message on error.
end_execution_reason_t expand_command(tnode_t<grammar::plain_statement> statement,
end_execution_reason_t expand_command(const ast::decorated_statement_t &statement,
wcstring *out_cmd, wcstring_list_t *out_args) const;
/// Return whether we should skip a job with the given bool statement type.
bool should_skip(parse_job_decoration_t type) const;
/// Indicates whether a job is a simple block (one block, no redirections).
bool job_is_simple_block(tnode_t<grammar::job> job) const;
bool job_is_simple_block(const ast::job_t &job) const;
enum process_type_t process_type_for_command(tnode_t<grammar::plain_statement> statement,
enum process_type_t process_type_for_command(const ast::decorated_statement_t &statement,
const wcstring &cmd) const;
end_execution_reason_t apply_variable_assignments(
process_t *proc, tnode_t<grammar::variable_assignments> variable_assignments,
process_t *proc, const ast::variable_assignment_list_t &variable_assignments,
const block_t **block);
// These create process_t structures from statements.
end_execution_reason_t populate_job_process(
job_t *job, process_t *proc, tnode_t<grammar::statement> statement,
tnode_t<grammar::variable_assignments> variable_assignments);
job_t *job, process_t *proc, const ast::statement_t &statement,
const ast::variable_assignment_list_t &variable_assignments_list_t);
end_execution_reason_t populate_not_process(job_t *job, process_t *proc,
tnode_t<grammar::not_statement> not_statement);
const ast::not_statement_t &not_statement);
end_execution_reason_t populate_plain_process(job_t *job, process_t *proc,
tnode_t<grammar::plain_statement> statement);
const ast::decorated_statement_t &statement);
template <typename Type>
end_execution_reason_t populate_block_process(job_t *job, process_t *proc,
tnode_t<grammar::statement> statement,
tnode_t<Type> specific_statement);
const ast::statement_t &statement,
const Type &specific_statement);
// These encapsulate the actual logic of various (block) statements.
end_execution_reason_t run_block_statement(tnode_t<grammar::block_statement> statement,
end_execution_reason_t run_block_statement(const ast::block_statement_t &statement,
const block_t *associated_block);
end_execution_reason_t run_for_statement(tnode_t<grammar::for_header> header,
tnode_t<grammar::job_list> contents);
end_execution_reason_t run_if_statement(tnode_t<grammar::if_statement> statement,
end_execution_reason_t run_for_statement(const ast::for_header_t &header,
const ast::job_list_t &contents);
end_execution_reason_t run_if_statement(const ast::if_statement_t &statement,
const block_t *associated_block);
end_execution_reason_t run_switch_statement(tnode_t<grammar::switch_statement> statement);
end_execution_reason_t run_while_statement(tnode_t<grammar::while_header> header,
tnode_t<grammar::job_list> contents,
end_execution_reason_t run_switch_statement(const ast::switch_statement_t &statement);
end_execution_reason_t run_while_statement(const ast::while_header_t &header,
const ast::job_list_t &contents,
const block_t *associated_block);
end_execution_reason_t run_function_statement(tnode_t<grammar::block_statement> statement,
tnode_t<grammar::function_header> header);
end_execution_reason_t run_begin_statement(tnode_t<grammar::job_list> contents);
end_execution_reason_t run_function_statement(const ast::block_statement_t &statement,
const ast::function_header_t &header);
end_execution_reason_t run_begin_statement(const ast::job_list_t &contents);
enum globspec_t { failglob, nullglob };
using argument_node_list_t = std::vector<tnode_t<grammar::argument>>;
end_execution_reason_t expand_arguments_from_nodes(const argument_node_list_t &argument_nodes,
using ast_args_list_t = std::vector<const ast::argument_t *>;
static ast_args_list_t get_argument_nodes(const ast::argument_list_t &args);
static ast_args_list_t get_argument_nodes(const ast::argument_or_redirection_list_t &args);
end_execution_reason_t expand_arguments_from_nodes(const ast_args_list_t &argument_nodes,
wcstring_list_t *out_arguments,
globspec_t glob_behavior);
// Determines the list of redirections for a node.
end_execution_reason_t determine_redirections(
tnode_t<grammar::arguments_or_redirections_list> node,
redirection_spec_list_t *out_redirections);
end_execution_reason_t determine_redirections(const ast::argument_or_redirection_list_t &list,
redirection_spec_list_t *out_redirections);
end_execution_reason_t run_1_job(tnode_t<grammar::job> job, const block_t *associated_block);
end_execution_reason_t run_job_conjunction(tnode_t<grammar::job_conjunction> job_expr,
end_execution_reason_t run_1_job(const ast::job_t &job, const block_t *associated_block);
end_execution_reason_t test_and_run_1_job_conjunction(const ast::job_conjunction_t &jc,
const block_t *associated_block);
end_execution_reason_t run_job_conjunction(const ast::job_conjunction_t &job_expr,
const block_t *associated_block);
template <typename Type>
end_execution_reason_t run_job_list(tnode_t<Type> job_list_node,
end_execution_reason_t run_job_list(const ast::job_list_t &job_list_node,
const block_t *associated_block);
end_execution_reason_t populate_job_from_job_node(job_t *j, tnode_t<grammar::job> job_node,
end_execution_reason_t run_job_list(const ast::andor_job_list_t &job_list_node,
const block_t *associated_block);
end_execution_reason_t populate_job_from_job_node(job_t *j, const ast::job_t &job_node,
const block_t *associated_block);
// Returns the line number of the node. Not const since it touches cached_lineno_offset.
int line_offset_of_node(tnode_t<grammar::job> node);
int line_offset_of_node(const ast::job_t *node);
int line_offset_of_character_at_offset(size_t offset);
public:
@ -159,14 +163,14 @@ class parse_execution_context_t {
/// Returns the source string.
const wcstring &get_source() const { return pstree->src; }
/// Return the parse tree.
const parse_node_tree_t &tree() const { return pstree->tree; }
/// Return the parsed ast.
const ast::ast_t &ast() const { return *pstree->ast; }
/// Start executing at the given node. Returns 0 if there was no error, 1 if there was an
/// error.
end_execution_reason_t eval_node(tnode_t<grammar::statement> statement,
end_execution_reason_t eval_node(const ast::statement_t &statement,
const block_t *associated_block);
end_execution_reason_t eval_node(tnode_t<grammar::job_list> job_list,
end_execution_reason_t eval_node(const ast::job_list_t &job_list,
const block_t *associated_block);
};

View File

@ -1,401 +0,0 @@
// Programmatic representation of fish grammar
#ifndef FISH_PARSE_GRAMMAR_H
#define FISH_PARSE_GRAMMAR_H
#include <array>
#include <tuple>
#include <type_traits>
#include "parse_constants.h"
#include "tokenizer.h"
struct parse_token_t;
typedef uint8_t parse_node_tag_t;
using parse_node_tag_t = uint8_t;
struct parse_token_t;
namespace grammar {
using production_element_t = uint8_t;
enum {
// The maximum length of any seq production.
MAX_PRODUCTION_LENGTH = 6
};
// Define primitive types.
template <enum parse_token_type_t Token>
struct primitive {
using type_tuple = std::tuple<>;
static constexpr parse_token_type_t token = Token;
static constexpr production_element_t element() { return Token; }
};
using tok_end = primitive<parse_token_type_end>;
using tok_string = primitive<parse_token_type_string>;
using tok_pipe = primitive<parse_token_type_pipe>;
using tok_background = primitive<parse_token_type_background>;
using tok_redirection = primitive<parse_token_type_redirection>;
using tok_andand = primitive<parse_token_type_andand>;
using tok_oror = primitive<parse_token_type_oror>;
// Define keyword types.
template <parse_keyword_t Keyword>
struct keyword {
using type_tuple = std::tuple<>;
static constexpr parse_token_type_t token = parse_token_type_string;
static constexpr production_element_t element() {
// Convert a parse_keyword_t enum to a production_element_t enum.
return static_cast<uint32_t>(Keyword) + LAST_TOKEN_OR_SYMBOL + 1;
}
};
// Define special types.
// Comments are not emitted as part of productions, but specially by the parser.
struct comment {
using type_tuple = std::tuple<>;
static constexpr parse_token_type_t token = parse_special_type_comment;
};
// Forward declare all the symbol types.
#define ELEM(T) struct T;
#include "parse_grammar_elements.inc"
// A production is a sequence of production elements.
// +1 to hold the terminating token_type_invalid
template <size_t Count>
using production_t = std::array<const production_element_t, Count + 1>;
// This is an ugly hack to avoid ODR violations
// Given some type, return a pointer to its production.
template <typename T>
const production_element_t *production_for() {
static constexpr auto prod = T::production;
return prod.data();
}
// Get some production element.
template <typename T>
constexpr production_element_t element() {
return T::element();
}
// Template goo.
namespace detail {
template <typename T, typename Tuple>
struct tuple_contains;
template <typename T>
struct tuple_contains<T, std::tuple<>> : std::false_type {};
template <typename T, typename U, typename... Ts>
struct tuple_contains<T, std::tuple<U, Ts...>> : tuple_contains<T, std::tuple<Ts...>> {};
template <typename T, typename... Ts>
struct tuple_contains<T, std::tuple<T, Ts...>> : std::true_type {};
struct void_type {
using type = void;
};
// Support for checking whether the index N is valid for T::type_tuple.
template <size_t N, typename T>
static constexpr bool index_valid() {
return N < std::tuple_size<typename T::type_tuple>::value;
}
// Get the Nth type of T::type_tuple.
template <size_t N, typename T>
using tuple_element = std::tuple_element<N, typename T::type_tuple>;
// Get the Nth type of T::type_tuple, or void if N is out of bounds.
template <size_t N, typename T>
using tuple_element_or_void =
typename std::conditional<index_valid<N, T>(), tuple_element<N, T>, void_type>::type::type;
// Make a tuple by mapping the Nth item of a list of 'seq's.
template <size_t N, typename... Ts>
struct tuple_nther {
// A tuple of the Nth types of tuples (or voids).
using type = std::tuple<tuple_element_or_void<N, Ts>...>;
};
// Given a list of Options, each one a seq, check to see if any of them contain type Desired at
// index Index.
template <typename Desired, size_t Index, typename... Options>
inline constexpr bool type_possible() {
using nths = typename tuple_nther<Index, Options...>::type;
return tuple_contains<Desired, nths>::value;
}
} // namespace detail
// Partial specialization hack.
#define ELEM(T) \
template <> \
constexpr production_element_t element<T>() { \
return symbol_##T; \
}
#include "parse_grammar_elements.inc"
// Empty produces nothing.
struct empty {
using type_tuple = std::tuple<>;
static constexpr production_t<0> production = {{token_type_invalid}};
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
parse_node_tag_t *) {
return production_for<empty>();
}
};
// Sequence represents a list of (at least two) productions.
template <class T0, class... Ts>
struct seq {
static constexpr production_t<1 + sizeof...(Ts)> production = {
{element<T0>(), element<Ts>()..., token_type_invalid}};
static_assert(1 + sizeof...(Ts) <= MAX_PRODUCTION_LENGTH, "MAX_PRODUCTION_LENGTH too small");
using type_tuple = std::tuple<T0, Ts...>;
template <typename Desired, size_t Index>
static constexpr bool type_possible() {
using element_t = detail::tuple_element_or_void<Index, seq>;
return std::is_same<Desired, element_t>::value;
}
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
parse_node_tag_t *) {
return production_for<seq>();
}
};
template <class... Args>
using produces_sequence = seq<Args...>;
// Ergonomic way to create a production for a single element.
template <class T>
using single = seq<T>;
template <class T>
using produces_single = single<T>;
// Alternative represents a choice.
struct alternative {};
// Following are the grammar productions.
#define BODY(T) static constexpr parse_token_type_t token = symbol_##T;
#define DEF(T) struct T : public
#define DEF_ALT(T) struct T : public alternative
#define ALT_BODY(T, ...) \
BODY(T) \
using type_tuple = std::tuple<>; \
template <typename Desired, size_t Index> \
static constexpr bool type_possible() { \
return detail::type_possible<Desired, Index, __VA_ARGS__>(); \
} \
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \
parse_node_tag_t *)
// A job_list is a list of job_conjunctions, separated by semicolons or newlines
DEF_ALT(job_list) {
using normal = seq<job_decorator, job_conjunction, job_list>;
using empty_line = seq<tok_end, job_list>;
using empty = grammar::empty;
ALT_BODY(job_list, normal, empty_line, empty);
};
// Job decorators are 'and' and 'or'. These apply to the whole job.
DEF_ALT(job_decorator) {
using ands = single<keyword<parse_keyword_t::kw_and>>;
using ors = single<keyword<parse_keyword_t::kw_or>>;
using empty = grammar::empty;
ALT_BODY(job_decorator, ands, ors, empty);
};
// A job_conjunction is a job followed by a continuation.
DEF(job_conjunction) produces_sequence<job, job_conjunction_continuation>{BODY(job_conjunction)};
DEF_ALT(job_conjunction_continuation) {
using andands = seq<tok_andand, optional_newlines, job_conjunction>;
using orors = seq<tok_oror, optional_newlines, job_conjunction>;
using empty = grammar::empty;
ALT_BODY(job_conjunction_continuation, andands, orors, empty);
};
/// The time builtin.
DEF_ALT(optional_time) {
using empty = grammar::empty;
using time = single<keyword<parse_keyword_t::kw_time>>;
ALT_BODY(optional_time, empty, time);
};
// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases
// like if statements, where we require a command). To represent "non-empty", we require a
// statement, followed by a possibly empty job_continuation, and then optionally a background
// specifier '&'
DEF(job)
produces_sequence<optional_time, variable_assignments, statement, job_continuation,
optional_background>{BODY(job)};
DEF_ALT(job_continuation) {
using piped =
seq<tok_pipe, optional_newlines, variable_assignments, statement, job_continuation>;
using empty = grammar::empty;
ALT_BODY(job_continuation, piped, empty);
};
// A list of assignments like HOME=$PWD
DEF_ALT(variable_assignments) {
using empty = grammar::empty;
using var = seq<variable_assignment, variable_assignments>;
ALT_BODY(variable_assignments, empty, var);
};
// A string token like VAR=value
DEF(variable_assignment) produces_single<tok_string>{BODY(variable_assignment)};
// A statement is a normal command, or an if / while / and etc
DEF_ALT(statement) {
using nots = single<not_statement>;
using block = single<block_statement>;
using ifs = single<if_statement>;
using switchs = single<switch_statement>;
using decorated = single<decorated_statement>;
ALT_BODY(statement, nots, block, ifs, switchs, decorated);
};
// A block is a conditional, loop, or begin/end
DEF(if_statement)
produces_sequence<if_clause, else_clause, end_command, arguments_or_redirections_list>{
BODY(if_statement)};
DEF(if_clause)
produces_sequence<keyword<parse_keyword_t::kw_if>, job_conjunction, tok_end, andor_job_list,
job_list>{BODY(if_clause)};
DEF_ALT(else_clause) {
using empty = grammar::empty;
using else_cont = seq<keyword<parse_keyword_t::kw_else>, else_continuation>;
ALT_BODY(else_clause, empty, else_cont);
};
DEF_ALT(else_continuation) {
using else_if = seq<if_clause, else_clause>;
using else_only = seq<tok_end, job_list>;
ALT_BODY(else_continuation, else_if, else_only);
};
DEF(switch_statement)
produces_sequence<keyword<parse_keyword_t::kw_switch>, argument, tok_end, case_item_list,
end_command, arguments_or_redirections_list>{BODY(switch_statement)};
DEF_ALT(case_item_list) {
using empty = grammar::empty;
using case_items = seq<case_item, case_item_list>;
using blank_line = seq<tok_end, case_item_list>;
ALT_BODY(case_item_list, empty, case_items, blank_line);
};
DEF(case_item)
produces_sequence<keyword<parse_keyword_t::kw_case>, argument_list, tok_end, job_list>{
BODY(case_item)};
DEF(block_statement)
produces_sequence<block_header, job_list, end_command, arguments_or_redirections_list>{
BODY(block_statement)};
DEF_ALT(block_header) {
using forh = single<for_header>;
using whileh = single<while_header>;
using funch = single<function_header>;
using beginh = single<begin_header>;
ALT_BODY(block_header, forh, whileh, funch, beginh);
};
DEF(for_header)
produces_sequence<keyword<parse_keyword_t::kw_for>, tok_string, keyword<parse_keyword_t::kw_in>,
argument_list, tok_end>{BODY(for_header)};
DEF(while_header)
produces_sequence<keyword<parse_keyword_t::kw_while>, job_conjunction, tok_end, andor_job_list>{
BODY(while_header)};
DEF(begin_header) produces_single<keyword<parse_keyword_t::kw_begin>>{BODY(begin_header)};
// Functions take arguments, and require at least one (the name). No redirections allowed.
DEF(function_header)
produces_sequence<keyword<parse_keyword_t::kw_function>, argument, argument_list, tok_end>{
BODY(function_header)};
DEF_ALT(not_statement) {
using nots =
seq<keyword<parse_keyword_t::kw_not>, variable_assignments, optional_time, statement>;
using exclams =
seq<keyword<parse_keyword_t::kw_exclam>, variable_assignments, optional_time, statement>;
ALT_BODY(not_statement, nots, exclams);
};
// An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean
// statement.
DEF_ALT(andor_job_list) {
using empty = grammar::empty;
using andor_job = seq<job_decorator, job_conjunction, andor_job_list>;
using empty_line = seq<tok_end, andor_job_list>;
ALT_BODY(andor_job_list, empty, andor_job, empty_line);
};
// A decorated_statement is a command with a list of arguments_or_redirections, possibly with
// "builtin" or "command" or "exec"
DEF_ALT(decorated_statement) {
using plains = single<plain_statement>;
using cmds = seq<keyword<parse_keyword_t::kw_command>, plain_statement>;
using builtins = seq<keyword<parse_keyword_t::kw_builtin>, plain_statement>;
using execs = seq<keyword<parse_keyword_t::kw_exec>, plain_statement>;
ALT_BODY(decorated_statement, plains, cmds, builtins, execs);
};
DEF(plain_statement)
produces_sequence<tok_string, arguments_or_redirections_list>{BODY(plain_statement)};
DEF_ALT(argument_list) {
using empty = grammar::empty;
using arg = seq<argument, argument_list>;
ALT_BODY(argument_list, empty, arg);
};
DEF_ALT(arguments_or_redirections_list) {
using empty = grammar::empty;
using arg = seq<argument, arguments_or_redirections_list>;
using redir = seq<redirection, arguments_or_redirections_list>;
ALT_BODY(arguments_or_redirections_list, empty, arg, redir);
};
DEF(argument) produces_single<tok_string>{BODY(argument)};
DEF(redirection) produces_sequence<tok_redirection, tok_string>{BODY(redirection)};
DEF_ALT(optional_background) {
using empty = grammar::empty;
using background = single<tok_background>;
ALT_BODY(optional_background, empty, background);
};
DEF(end_command) produces_single<keyword<parse_keyword_t::kw_end>>{BODY(end_command)};
// Note optional_newlines only allows newline-style tok_end, not semicolons.
DEF_ALT(optional_newlines) {
using empty = grammar::empty;
using newlines = seq<tok_end, optional_newlines>;
ALT_BODY(optional_newlines, empty, newlines);
};
// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
// TOK_END (newlines, and even semicolons, for historical reasons)
DEF_ALT(freestanding_argument_list) {
using empty = grammar::empty;
using arg = seq<argument, freestanding_argument_list>;
using semicolon = seq<tok_end, freestanding_argument_list>;
ALT_BODY(freestanding_argument_list, empty, arg, semicolon);
};
} // namespace grammar
#endif

View File

@ -1,37 +0,0 @@
// Define ELEM before including this file.
ELEM(job_list)
ELEM(job)
ELEM(job_decorator)
ELEM(job_conjunction)
ELEM(job_conjunction_continuation)
ELEM(job_continuation)
ELEM(statement)
ELEM(if_statement)
ELEM(if_clause)
ELEM(else_clause)
ELEM(else_continuation)
ELEM(switch_statement)
ELEM(case_item_list)
ELEM(case_item)
ELEM(block_statement)
ELEM(block_header)
ELEM(for_header)
ELEM(while_header)
ELEM(begin_header)
ELEM(function_header)
ELEM(not_statement)
ELEM(andor_job_list)
ELEM(decorated_statement)
ELEM(variable_assignment)
ELEM(variable_assignments)
ELEM(plain_statement)
ELEM(argument_list)
ELEM(arguments_or_redirections_list)
ELEM(argument)
ELEM(redirection)
ELEM(optional_background)
ELEM(optional_newlines)
ELEM(optional_time)
ELEM(end_command)
ELEM(freestanding_argument_list)
#undef ELEM

View File

@ -1,466 +0,0 @@
#include "config.h" // IWYU pragma: keep
#include "parse_productions.h"
#include <stdio.h>
#include "common.h"
#include "flog.h"
#include "parse_constants.h"
#include "parse_grammar.h"
#include "parse_tree.h"
using namespace parse_productions;
using namespace grammar;
#define NO_PRODUCTION nullptr
// Herein are encoded the productions for our LL2 fish grammar.
//
// Each symbol (e.g. symbol_job_list) has a corresponding function (e.g. resolve_job_list). The
// function accepts two tokens, representing the first and second lookahead, and returns a
// production representing the rule, or NULL on error. There is also a tag value which is returned
// by reference; the tag is a sort of node annotation.
//
// Productions are generally a static const array, and we return a pointer to the array (yes,
// really).
#define RESOLVE(SYM) \
const production_element_t *SYM::resolve( \
const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag)
/// A job_list is a list of jobs, separated by semicolons or newlines.
RESOLVE(job_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string: {
// Some keywords are special.
switch (token1.keyword) {
case parse_keyword_t::kw_end:
case parse_keyword_t::kw_else:
case parse_keyword_t::kw_case: {
return production_for<empty>(); // end this job list
}
default: {
return production_for<normal>(); // normal string
}
}
}
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background: {
return production_for<normal>();
}
case parse_token_type_end: {
return production_for<empty_line>();
}
case parse_token_type_terminate: {
return production_for<empty>(); // no more commands, just transition to empty
}
default: {
return NO_PRODUCTION;
}
}
}
// A job decorator is AND or OR
RESOLVE(job_decorator) {
// If it's followed by --help, it's not a decoration.
if (token2.is_help_argument) {
*out_tag = parse_job_decoration_none;
return production_for<empty>();
}
switch (token1.keyword) {
case parse_keyword_t::kw_and: {
*out_tag = parse_job_decoration_and;
return production_for<ands>();
}
case parse_keyword_t::kw_or: {
*out_tag = parse_job_decoration_or;
return production_for<ors>();
}
default: {
*out_tag = parse_job_decoration_none;
return production_for<empty>();
}
}
}
RESOLVE(job_conjunction_continuation) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_andand:
*out_tag = parse_job_decoration_and;
return production_for<andands>();
case parse_token_type_oror:
*out_tag = parse_job_decoration_or;
return production_for<orors>();
default:
return production_for<empty>();
}
}
RESOLVE(job_continuation) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_pipe: {
return production_for<piped>(); // pipe, continuation
}
default: {
return production_for<empty>(); // not a pipe, no job continuation
}
}
}
// A statement is a normal command, or an if / while / and etc.
RESOLVE(statement) {
UNUSED(out_tag);
// The only block-like builtin that takes any parameters is 'function' So go to decorated
// statements if the subsequent token looks like '--'. The logic here is subtle:
//
// If we are 'begin', then we expect to be invoked with no arguments.
// If we are 'function', then we are a non-block if we are invoked with -h or --help
// If we are anything else, we require an argument, so do the same thing if the subsequent token
// is a statement terminator.
if (token1.type == parse_token_type_string) {
// If we are a function, then look for help arguments. Otherwise, if the next token looks
// like an option (starts with a dash), then parse it as a decorated statement.
if (token1.keyword == parse_keyword_t::kw_function && token2.is_help_argument) {
return production_for<decorated>();
} else if (token1.keyword != parse_keyword_t::kw_function && token2.has_dash_prefix) {
return production_for<decorated>();
}
// Likewise if the next token doesn't look like an argument at all. This corresponds to e.g.
// a "naked if".
bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_t::kw_begin &&
token1.keyword != parse_keyword_t::kw_end);
if (naked_invocation_invokes_help &&
(token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) {
return production_for<decorated>();
}
}
switch (token1.type) {
case parse_token_type_string: {
switch (token1.keyword) {
case parse_keyword_t::kw_not:
case parse_keyword_t::kw_exclam: {
return production_for<nots>();
}
case parse_keyword_t::kw_for:
case parse_keyword_t::kw_while:
case parse_keyword_t::kw_function:
case parse_keyword_t::kw_begin: {
return production_for<block>();
}
case parse_keyword_t::kw_if: {
return production_for<ifs>();
}
case parse_keyword_t::kw_else: {
return NO_PRODUCTION;
}
case parse_keyword_t::kw_switch: {
return production_for<switchs>();
}
case parse_keyword_t::kw_end: {
return NO_PRODUCTION;
}
// All other keywords fall through to decorated statement.
default: {
return production_for<decorated>();
}
}
}
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_terminate: {
return NO_PRODUCTION;
}
default: {
return NO_PRODUCTION;
}
}
}
RESOLVE(else_clause) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_t::kw_else: {
return production_for<else_cont>();
}
default: {
return production_for<empty>();
}
}
}
RESOLVE(else_continuation) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_t::kw_if: {
return production_for<else_if>();
}
default: {
return production_for<else_only>();
}
}
}
RESOLVE(case_item_list) {
UNUSED(token2);
UNUSED(out_tag);
if (token1.keyword == parse_keyword_t::kw_case)
return production_for<case_items>();
else if (token1.type == parse_token_type_end)
return production_for<blank_line>();
else
return production_for<empty>();
}
RESOLVE(not_statement) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_t::kw_not:
return production_for<nots>();
case parse_keyword_t::kw_exclam:
return production_for<exclams>();
default:
return NO_PRODUCTION;
}
}
RESOLVE(andor_job_list) {
UNUSED(out_tag);
if (token1.type == parse_token_type_end) {
return production_for<empty_line>();
} else if (token1.keyword == parse_keyword_t::kw_and ||
token1.keyword == parse_keyword_t::kw_or) {
// Check that the argument to and/or is a string that's not help. Otherwise it's either 'and
// --help' or a naked 'and', and not part of this list.
if (token2.type == parse_token_type_string && !token2.is_help_argument) {
return production_for<andor_job>();
}
}
// All other cases end the list.
return production_for<empty>();
}
RESOLVE(argument_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string: {
return production_for<arg>();
}
default: {
return production_for<empty>();
}
}
}
RESOLVE(freestanding_argument_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string: {
return production_for<arg>();
}
case parse_token_type_end: {
return production_for<semicolon>();
}
default: {
return production_for<empty>();
}
}
}
RESOLVE(block_header) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.keyword) {
case parse_keyword_t::kw_for: {
return production_for<forh>();
}
case parse_keyword_t::kw_while: {
return production_for<whileh>();
}
case parse_keyword_t::kw_function: {
return production_for<funch>();
}
case parse_keyword_t::kw_begin: {
return production_for<beginh>();
}
default: {
return NO_PRODUCTION;
}
}
}
RESOLVE(variable_assignments) {
UNUSED(token2);
UNUSED(out_tag);
if (token1.may_be_variable_assignment) {
assert(token1.type == parse_token_type_string);
return production_for<var>();
}
return production_for<empty>();
}
RESOLVE(decorated_statement) {
// and/or are typically parsed in job_conjunction at the beginning of a job
// However they may be reached here through e.g. true && and false.
// Refuse to parse them as a command except for --help. See #6089.
if ((token1.keyword == parse_keyword_t::kw_and || token1.keyword == parse_keyword_t::kw_or) &&
!token2.is_help_argument) {
return NO_PRODUCTION;
}
// If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the
// second token is not a string, then this is a naked 'command' and we should execute it as
// undecorated.
if (token2.type != parse_token_type_string || token2.has_dash_prefix) {
return production_for<plains>();
}
switch (token1.keyword) {
case parse_keyword_t::kw_command: {
*out_tag = parse_statement_decoration_command;
return production_for<cmds>();
}
case parse_keyword_t::kw_builtin: {
*out_tag = parse_statement_decoration_builtin;
return production_for<builtins>();
}
case parse_keyword_t::kw_exec: {
*out_tag = parse_statement_decoration_exec;
return production_for<execs>();
}
default: {
*out_tag = parse_statement_decoration_none;
return production_for<plains>();
}
}
}
RESOLVE(arguments_or_redirections_list) {
UNUSED(token2);
UNUSED(out_tag);
switch (token1.type) {
case parse_token_type_string:
return production_for<arg>();
case parse_token_type_redirection:
return production_for<redir>();
default:
return production_for<empty>();
}
}
RESOLVE(optional_newlines) {
UNUSED(token2);
UNUSED(out_tag);
if (token1.is_newline) return production_for<newlines>();
return production_for<empty>();
}
RESOLVE(optional_background) {
UNUSED(token2);
switch (token1.type) {
case parse_token_type_background: {
*out_tag = parse_background;
return production_for<background>();
}
default: {
*out_tag = parse_no_background;
return production_for<empty>();
}
}
}
RESOLVE(optional_time) {
if (token1.keyword == parse_keyword_t::kw_time && !token2.is_help_argument) {
*out_tag = parse_optional_time_time;
return production_for<time>();
}
*out_tag = parse_optional_time_no_time;
return production_for<empty>();
}
const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
const parse_token_t &input1,
const parse_token_t &input2,
parse_node_tag_t *out_tag) {
// this is **extremely** chatty
FLOGF(parse_productions_chatty, L"Resolving production for %ls with input token <%ls>",
token_type_description(node_type), input1.describe().c_str());
// Fetch the function to resolve the list of productions.
const production_element_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param)
const parse_token_t &input2, //!OCLINT(unused param)
parse_node_tag_t *out_tag) = //!OCLINT(unused param)
nullptr;
switch (node_type) {
// Handle all of our grammar elements
#define ELEM(SYM) \
case (symbol_##SYM): \
resolver = SYM::resolve; \
break;
#include "parse_grammar_elements.inc"
// Everything else is an error.
case parse_token_type_string:
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_andand:
case parse_token_type_oror:
case parse_token_type_end:
case parse_token_type_terminate: {
FLOGF(error, L"Terminal token type %ls passed to %s", token_type_description(node_type),
__FUNCTION__);
PARSER_DIE();
break;
}
case parse_special_type_parse_error:
case parse_special_type_tokenizer_error:
case parse_special_type_comment: {
FLOGF(error, L"Special type %ls passed to %s\n", token_type_description(node_type),
__FUNCTION__);
PARSER_DIE();
break;
}
case token_type_invalid: {
FLOGF(error, L"token_type_invalid passed to %s", __FUNCTION__);
PARSER_DIE();
break;
}
}
PARSE_ASSERT(resolver != nullptr);
const production_element_t *result = resolver(input1, input2, out_tag);
if (result == nullptr) {
FLOGF(parse_productions, L"Node type '%ls' has no production for input '%ls' (in %s)",
token_type_description(node_type), input1.describe().c_str(), __FUNCTION__);
}
return result;
}

View File

@ -1,49 +0,0 @@
// Programmatic representation of fish code.
#ifndef FISH_PARSE_TREE_CONSTRUCTION_H
#define FISH_PARSE_TREE_CONSTRUCTION_H
#include <sys/types.h>
#include "parse_constants.h"
struct parse_token_t;
namespace parse_productions {
// A production is an array of unsigned char. Symbols are encoded directly as their symbol value.
// Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together
// keywords and symbols.
typedef uint8_t production_element_t;
/// Resolve the type from a production element.
inline parse_token_type_t production_element_type(production_element_t elem) {
if (elem > LAST_TOKEN_OR_SYMBOL) {
return parse_token_type_string;
} else {
return static_cast<parse_token_type_t>(elem);
}
}
/// Resolve the keyword from a production element.
inline parse_keyword_t production_element_keyword(production_element_t elem) {
if (elem > LAST_TOKEN_OR_SYMBOL) {
// First keyword is LAST_TOKEN_OR_SYMBOL + 1.
return static_cast<parse_keyword_t>(elem - LAST_TOKEN_OR_SYMBOL - 1);
} else {
return parse_keyword_t::none;
}
}
/// Check if an element is valid.
inline bool production_element_is_valid(production_element_t elem) {
return elem != token_type_invalid;
}
/// Fetch a production. We are passed two input tokens. The first input token is guaranteed to not
/// be invalid; the second token may be invalid if there's no more tokens. We may also set flags.
const production_element_t *production_for_token(parse_token_type_t node_type,
const parse_token_t &input1,
const parse_token_t &input2, uint8_t *out_tag);
} // namespace parse_productions
#endif

File diff suppressed because it is too large Load Diff

View File

@ -6,30 +6,19 @@
#include <stdint.h>
#include <sys/types.h>
#include <deque>
#include <memory>
#include <vector>
#include "common.h"
#include "maybe.h"
#include "parse_constants.h"
#include "parse_grammar.h"
#include "tokenizer.h"
class parse_node_tree_t;
typedef uint32_t node_offset_t;
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
typedef uint32_t source_offset_t;
constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
struct source_range_t {
uint32_t start;
uint32_t length;
};
/// A struct representing the token type that we use internally.
struct parse_token_t {
enum parse_token_type_t type; // The type of the token as represented by the parser
@ -41,38 +30,35 @@ struct parse_token_t {
bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline.
bool preceding_escaped_nl{false}; // Whether there was an escaped newline preceding this token.
bool may_be_variable_assignment{false}; // Hackish: whether this token is a string like FOO=bar
tokenizer_error_t tok_error{tokenizer_error_t::none}; // If this is a tokenizer error, that error.
source_offset_t source_start{SOURCE_OFFSET_INVALID};
source_offset_t source_length{0};
/// \return the source range.
/// Note the start may be invalid.
source_range_t range() const {
return source_range_t{source_start, source_length};
}
/// \return whether we are a string with the dash prefix set.
bool is_dash_prefix_string() const {
return type == parse_token_type_string && has_dash_prefix;
}
wcstring describe() const;
wcstring user_presentable_description() const;
constexpr parse_token_t(parse_token_type_t type) : type(type) {}
};
enum {
parse_flag_none = 0,
/// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
/// disconnected trees. This is intended to be used by syntax highlighting.
parse_flag_continue_after_error = 1 << 0,
/// Include comment tokens.
parse_flag_include_comments = 1 << 1,
/// Indicate that the tokenizer should accept incomplete tokens */
parse_flag_accept_incomplete_tokens = 1 << 2,
/// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
/// tree where some nodes may have no productions.
parse_flag_leave_unterminated = 1 << 3,
/// Indicate that the parser should generate job_list entries for blank lines.
parse_flag_show_blank_lines = 1 << 4
};
typedef unsigned int parse_tree_flags_t;
wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src);
/// Return a new parse token, advancing the tokenizer.
parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage);
const wchar_t *token_type_description(parse_token_type_t type);
const wchar_t *keyword_description(parse_keyword_t type);
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err);
// Node flags.
enum {
/// Flag indicating that the node has associated comment nodes.
@ -88,145 +74,29 @@ typedef uint8_t parse_node_flags_t;
/// Node-type specific tag value.
typedef uint8_t parse_node_tag_t;
/// Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields
/// is important.
class parse_node_t {
public:
// Start in the source code.
source_offset_t source_start{SOURCE_OFFSET_INVALID};
// Length of our range in the source code.
source_offset_t source_length{0};
// Parent
node_offset_t parent{NODE_OFFSET_INVALID};
// Children
node_offset_t child_start{0};
// Number of children.
uint8_t child_count{0};
// Type of the node.
enum parse_token_type_t type;
// Keyword associated with node.
enum parse_keyword_t keyword { parse_keyword_t::none };
// Node flags.
parse_node_flags_t flags : 4;
// This is used to store e.g. the statement decoration.
parse_node_tag_t tag : 4;
// Description
wcstring describe() const;
// Constructor
explicit parse_node_t(parse_token_type_t ty) : type(ty), flags(0), tag(0) {}
node_offset_t child_offset(node_offset_t which) const {
PARSE_ASSERT(which < child_count);
return child_start + which;
}
/// Indicate if this node has a range of source code associated with it.
bool has_source() const {
// Should never have a nonempty range with an invalid offset.
assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0);
return this->source_length > 0;
}
/// Indicate if the node has comment nodes.
bool has_comments() const { return this->flags & parse_node_flag_has_comments; }
/// Indicates if we have a preceding escaped newline.
bool has_preceding_escaped_newline() const {
return this->flags & parse_node_flag_preceding_escaped_nl;
}
source_range_t source_range() const {
assert(has_source());
return {source_start, source_length};
}
/// Gets source for the node, or the empty string if it has no source.
wcstring get_source(const wcstring &str) const {
if (!has_source())
return wcstring();
else
return wcstring(str, this->source_start, this->source_length);
}
/// Returns whether the given location is within the source range or at its end.
bool location_in_or_at_end_of_source_range(size_t loc) const {
return has_source() && source_start <= loc && loc - source_start <= source_length;
}
};
template <typename Type>
class tnode_t;
/// The parse tree itself.
class parse_node_tree_t : public std::vector<parse_node_t> {
public:
parse_node_tree_t() {}
parse_node_tree_t(parse_node_tree_t &&) = default;
parse_node_tree_t &operator=(parse_node_tree_t &&) = default;
parse_node_tree_t(const parse_node_tree_t &) = delete; // no copying
parse_node_tree_t &operator=(const parse_node_tree_t &) = delete; // no copying
// Get the node corresponding to a child of the given node, or NULL if there is no such child.
// If expected_type is provided, assert that the node has that type.
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which,
parse_token_type_t expected_type = token_type_invalid) const;
// Find the first direct child of the given node of the given type. asserts on failure.
const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const;
template <typename Type>
tnode_t<Type> find_child(const parse_node_t &parent) const;
// Get the node corresponding to the parent of the given node, or NULL if there is no such
// child. If expected_type is provided, only returns the parent if it is of that type. Note the
// asymmetry: get_child asserts since the children are known, but get_parent does not, since the
// parent may not be known.
const parse_node_t *get_parent(const parse_node_t &node,
parse_token_type_t expected_type = token_type_invalid) const;
// Finds a node containing the given source location. If 'parent' is not NULL, it must be an
// ancestor.
const parse_node_t *find_node_matching_source_location(parse_token_type_t type,
size_t source_loc,
const parse_node_t *parent) const;
// Utilities
/// Given a node, return all of its comment nodes.
std::vector<tnode_t<grammar::comment>> comment_nodes_for_node(const parse_node_t &parent) const;
private:
template <typename Type>
friend class tnode_t;
/// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return
/// the next element of the given type in that list, and the tail (by reference). Returns NULL
/// if we've exhausted the list.
const parse_node_t *next_node_in_node_list(const parse_node_t &node_list,
parse_token_type_t entry_type,
const parse_node_t **list_tail) const;
};
/// The big entry point. Parse a string, attempting to produce a tree for the given goal type.
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags,
parse_node_tree_t *output, parse_error_list_t *errors,
parse_token_type_t goal = symbol_job_list);
namespace ast {
class ast_t;
}
/// A type wrapping up a parse tree and the original source behind it.
struct parsed_source_t {
wcstring src;
parse_node_tree_t tree;
std::unique_ptr<ast::ast_t> ast;
parsed_source_t(wcstring s, parse_node_tree_t t) : src(std::move(s)), tree(std::move(t)) {}
parsed_source_t(wcstring s, ast::ast_t &&ast);
~parsed_source_t();
parsed_source_t(const parsed_source_t &) = delete;
void operator=(const parsed_source_t &) = delete;
parsed_source_t(parsed_source_t &&) = default;
parsed_source_t &operator=(parsed_source_t &&) = default;
parsed_source_t(parsed_source_t &&) = delete;
parsed_source_t &operator=(parsed_source_t &&) = delete;
};
/// Return a shared pointer to parsed_source_t, or null on failure.
/// If parse_flag_continue_after_error is not set, this will return null on any error.
using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors,
parse_token_type_t goal = symbol_job_list);
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
parse_error_list_t *errors);
/// Error message for improper use of the exec builtin.
#define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline")

View File

@ -14,6 +14,7 @@
#include <string>
#include <type_traits>
#include "ast.h"
#include "builtin.h"
#include "common.h"
#include "expand.h"
@ -22,7 +23,6 @@
#include "parse_constants.h"
#include "parse_util.h"
#include "parser.h"
#include "tnode.h"
#include "tokenizer.h"
#include "wcstringutil.h"
#include "wildcard.h"
@ -565,121 +565,16 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
return result;
}
/// We are given a parse tree, the index of a node within the tree, its indent, and a vector of
/// indents the same size as the original source string. Set the indent correspdonding to the node's
/// source range, if appropriate.
///
/// trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false <ret>'
/// then we have an if node with an empty job list (without source) but we want the last line to be
/// indented anyways.
///
/// switch statements also indent.
///
/// max_visited_node_idx is the largest index we visited.
static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx,
int node_indent, parse_token_type_t parent_type,
std::vector<int> *indents, int *trailing_indent,
node_offset_t *max_visited_node_idx) {
// Guard against incomplete trees.
if (node_idx > tree.size()) return;
// Update max_visited_node_idx.
if (node_idx > *max_visited_node_idx) *max_visited_node_idx = node_idx;
// We could implement this by utilizing the fish grammar. But there's an easy trick instead:
// almost everything that wraps a job list should be indented by 1. So just find all of the job
// lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other
// exception is job_list itself: a job_list is a job and a job_list, and we want that child list
// to be indented the same as the parent. So just find all job_lists whose parent is not a
// job_list, and increment their indent by 1. We also want to treat andor_job_list like
// job_lists.
const parse_node_t &node = tree.at(node_idx);
const parse_token_type_t node_type = node.type;
// Increment the indent if we are either a root job_list, or root case_item_list.
const bool is_root_job_list = node_type != parent_type && (node_type == symbol_job_list ||
node_type == symbol_andor_job_list);
const bool is_root_case_item_list =
node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
if (is_root_job_list || is_root_case_item_list) {
node_indent += 1;
}
// If we have source, store the trailing indent unconditionally. If we do not have source, store
// the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job
// lists from affecting the trailing indent. For example, code like this:
//
// if foo
//
// will be parsed as this:
//
// job_list
// job
// if_statement
// job [if]
// job_list [empty]
// job_list [empty]
//
// There's two "terminal" job lists, and we want the innermost one.
//
// Note we are relying on the fact that nodes are in the same order as the source, i.e. an
// in-order traversal of the node tree also traverses the source from beginning to end.
if (node.has_source() || node_indent > *trailing_indent) {
*trailing_indent = node_indent;
}
// Store the indent into the indent array.
if (node.source_start != SOURCE_OFFSET_INVALID && node.source_start < indents->size()) {
if (node.has_source()) {
// A normal non-empty node. Store the indent unconditionally.
indents->at(node.source_start) = node_indent;
} else {
// An empty node. We have a source offset but no source length. This can come about when
// a node is legitimately empty:
//
// while true; end
//
// The job_list inside the while loop is empty. It still has a source offset (at the end
// of the while statement) but no source extent. We still need to capture that indent,
// because there may be comments inside:
//
// while true
// # loop forever
// end
//
// The 'loop forever' comment must be indented, by virtue of storing the indent.
//
// Now consider what happens if we remove the end:
//
// while true
// # loop forever
//
// Now both the job_list and end_command are unmaterialized. However, we want the indent
// to be of the job_list and not the end_command. Therefore, we only store the indent
// if it's bigger.
if (node_indent > indents->at(node.source_start)) {
indents->at(node.source_start) = node_indent;
}
}
}
// Recursive to all our children.
for (node_offset_t idx = 0; idx < node.child_count; idx++) {
// Note we pass our type to our child, which becomes its parent node type.
compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents,
trailing_indent, max_visited_node_idx);
}
}
std::vector<int> parse_util_compute_indents(const wcstring &src) {
// Make a vector the same size as the input string, which contains the indents. Initialize them
// to -1.
// to 0.
static wcstring ssss;
ssss = src;
const size_t src_size = src.size();
std::vector<int> indents(src_size, -1);
std::vector<int> indents(src_size, 0);
// Simple trick: if our source does not contain a newline, then all indents are 0.
if (src.find('\n') == wcstring::npos) {
std::fill(indents.begin(), indents.end(), 0);
return indents;
}
@ -687,78 +582,141 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
// were a case item list.
parse_node_tree_t tree;
parse_tree_from_string(src,
parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens,
&tree, nullptr /* errors */);
using namespace ast;
auto ast =
ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);
// Start indenting at the first node. If we have a parse error, we'll have to start indenting
// from the top again.
node_offset_t start_node_idx = 0;
int last_trailing_indent = 0;
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
// visiting its children.
struct indent_visitor_t {
explicit indent_visitor_t(std::vector<int> &indents) : indents(indents) {}
while (start_node_idx < tree.size()) {
// The indent that we'll get for the last line.
int trailing_indent = 0;
void visit(const node_t &node) {
int inc = 0;
int dec = 0;
switch (node.type) {
case type_t::job_list:
case type_t::andor_job_list:
// Job lists are never unwound.
inc = 1;
dec = 1;
break;
// Biggest offset we visited.
node_offset_t max_visited_node_idx = 0;
// Increment indents for conditions in headers (#1665).
case type_t::job_conjunction:
if (node.parent->type == type_t::while_header ||
node.parent->type == type_t::if_clause) {
inc = 1;
dec = 1;
}
break;
// Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which
// will prevent the really-root job list from indenting.
compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list,
&indents, &trailing_indent, &max_visited_node_idx);
// Increment indents for piped remainders.
case type_t::job_continuation_list:
if (node.as<job_continuation_list_t>()->count() > 0) {
inc = 1;
dec = 1;
}
break;
// We may have more to indent. The trailing indent becomes our current indent. Start at the
// node after the last we visited.
last_trailing_indent = trailing_indent;
start_node_idx = max_visited_node_idx + 1;
}
case type_t::case_item_list:
// Here's a hack. Consider:
// switch abc
// cas
//
// fish will see that 'cas' is not valid inside a switch statement because it is
// not "case". It will then unwind back to the top level job list, producing a
// parse tree like:
//
// job_list
// switch_job
// <err>
// normal_job
// cas
//
// And so we will think that the 'cas' job is at the same level as the switch.
// To address this, if we see that the switch statement was not closed, do not
// decrement the indent afterwards.
inc = 1;
dec = node.parent->as<switch_statement_t>()->end.unsourced ? 0 : 1;
break;
// Handle comments. Each comment node has a parent (which is whatever the top of the symbol
// stack was when the comment was encountered). So the source range of the comment has the same
// indent as its parent.
const size_t tree_size = tree.size();
for (node_offset_t i = 0; i < tree_size; i++) {
const parse_node_t &node = tree.at(i);
if (node.type == parse_special_type_comment && node.has_source() &&
node.parent < tree_size) {
const parse_node_t &parent = tree.at(node.parent);
if (parent.source_start != SOURCE_OFFSET_INVALID) {
indents.at(node.source_start) = indents.at(parent.source_start);
default:
break;
}
}
}
indent += inc;
// Now apply the indents. The indents array has -1 for places where the indent does not change,
// so start at each value and extend it along the run of -1s.
int last_indent = 0;
for (size_t i = 0; i < src_size; i++) {
int this_indent = indents.at(i);
if (this_indent < 0) {
indents.at(i) = last_indent;
// If we increased the indentation, apply it to the remainder of the string, even if the
// list is empty. For example (where _ represents the cursor):
//
// if foo
// _
//
// we want to indent the newline.
if (inc) {
std::fill(indents.begin() + last_leaf_end, indents.end(), indent);
last_indent = indent;
}
// If this is a leaf node, apply the current indentation.
if (node.category == category_t::leaf) {
auto range = node.source_range();
if (range.length > 0) {
// Fill to the end.
// Later nodes will come along and overwrite these.
std::fill(indents.begin() + range.start, indents.end(), indent);
last_leaf_end = range.start + range.length;
last_indent = indent;
}
}
node_visitor(*this).accept_children_of(&node);
indent -= dec;
}
// The one-past-the-last index of the most recently encountered leaf node.
// We use this to populate the indents even if there's no tokens in the range.
size_t last_leaf_end{0};
// The last indent which we assigned.
int last_indent{-1};
// List of indents, which we populate.
std::vector<int> &indents;
// Initialize our starting indent to -1, as our top-level node is a job list which
// will immediately increment it.
int indent{-1};
};
indent_visitor_t iv(indents);
node_visitor(iv).accept(ast.top());
// All newlines now get the *next* indent.
// For example, in this code:
// if true
// stuff
// the newline "belongs" to the if statement as it ends its job.
// But when rendered, it visually belongs to the job list.
// FIXME: if there's a middle newline, we will indent it wrongly.
// For example:
// if true
//
// end
// Here the middle newline should be indented by 1.
size_t idx = src_size;
int next_indent = iv.last_indent;
while (idx--) {
if (src.at(idx) == L'\n') {
indents.at(idx) = next_indent;
} else {
// New indent level.
last_indent = this_indent;
// Make all whitespace before a token have the new level. This avoid using the wrong
// indentation level if a new line starts with whitespace.
size_t prev_char_idx = i;
while (prev_char_idx--) {
if (!std::wcschr(L" \n\t\r", src.at(prev_char_idx))) break;
indents.at(prev_char_idx) = last_indent;
}
next_indent = indents.at(idx);
}
}
// Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly
// indented even if it is empty.
size_t suffix_idx = src_size;
while (suffix_idx--) {
if (!std::wcschr(L" \n\t\r", src.at(suffix_idx))) break;
indents.at(suffix_idx) = last_trailing_indent;
}
return indents;
}
@ -790,17 +748,13 @@ bool parse_util_argument_is_help(const wchar_t *s) {
return std::wcscmp(L"-h", s) == 0 || std::wcscmp(L"--help", s) == 0;
}
/// Check if the first argument under the given node is --help.
static bool first_argument_is_help(tnode_t<grammar::plain_statement> statement,
const wcstring &src) {
bool is_help = false;
auto arg_nodes = get_argument_nodes(statement.child<1>());
if (!arg_nodes.empty()) {
// Check the first argument only.
wcstring first_arg_src = arg_nodes.front().get_source(src);
is_help = parse_util_argument_is_help(first_arg_src.c_str());
// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
// there are no arguments.
const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) {
for (const ast::argument_or_redirection_t &v : list) {
if (v.is_argument()) return &v.argument();
}
return is_help;
return nullptr;
}
/// Given a wide character immediately after a dollar sign, return the appropriate error message.
@ -956,11 +910,13 @@ static parser_test_error_bits_t detect_dollar_cmdsub_errors(size_t arg_src_offse
/// Test if this argument contains any errors. Detected errors include syntax errors in command
/// substitutions, improperly escaped characters and improper use of the variable expansion
/// operator.
parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::argument> node,
parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg,
const wcstring &arg_src,
parse_error_list_t *out_errors) {
assert(node.has_source() && "argument has no source");
auto source_start = node.source_range()->start;
maybe_t<source_range_t> source_range = arg.try_source_range();
if (!source_range.has_value()) return 0;
size_t source_start = source_range->start;
int err = 0;
wchar_t *paran_begin, *paran_end;
int do_loop = 1;
@ -1054,10 +1010,10 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::a
}
/// Given that the job given by node should be backgrounded, return true if we detect any errors.
static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
static bool detect_errors_in_backgrounded_job(const ast::job_t &job,
parse_error_list_t *parse_errors) {
namespace g = grammar;
auto source_range = job.source_range();
using namespace ast;
auto source_range = job.try_source_range();
if (!source_range) return false;
bool errored = false;
@ -1066,54 +1022,77 @@ static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
// foo & ; or bar
// if foo & ; end
// while foo & ; end
auto job_conj = job.try_get_parent<g::job_conjunction>();
if (job_conj.try_get_parent<g::if_clause>()) {
errored = append_syntax_error(parse_errors, source_range->start,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
} else if (job_conj.try_get_parent<g::while_header>()) {
errored = append_syntax_error(parse_errors, source_range->start,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
} else if (auto jlist = job_conj.try_get_parent<g::job_list>()) {
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
// Fetch the job list and then advance it by one.
auto first_jconj = jlist.next_in_list<g::job_conjunction>();
assert(first_jconj == job.try_get_parent<g::job_conjunction>() &&
"Expected first job to be the node we found");
(void)first_jconj;
const job_conjunction_t *job_conj = job.parent->try_as<job_conjunction_t>();
if (!job_conj) return false;
// Try getting the next job's decorator.
if (auto next_job_dec = jlist.next_in_list<g::job_decorator>()) {
// The next job is indeed a boolean statement.
parse_job_decoration_t bool_type = bool_statement_type(next_job_dec);
if (bool_type == parse_job_decoration_and) {
errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and");
} else if (bool_type == parse_job_decoration_or) {
errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or");
if (job_conj->parent->try_as<if_clause_t>()) {
errored = append_syntax_error(parse_errors, source_range->start,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
} else if (job_conj->parent->try_as<while_header_t>()) {
errored = append_syntax_error(parse_errors, source_range->start,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
} else if (const ast::job_list_t *jlist = job_conj->parent->try_as<ast::job_list_t>()) {
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
// Find the index of ourselves in the job list.
size_t index;
for (index = 0; index < jlist->count(); index++) {
if (jlist->at(index) == job_conj) break;
}
assert(index < jlist->count() && "Should have found the job in the list");
// Try getting the next job and check its decorator.
if (const job_conjunction_t *next = jlist->at(index + 1)) {
if (const keyword_base_t *deco = next->decorator.contents.get()) {
assert(
(deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) &&
"Unexpected decorator keyword");
const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or");
errored = append_syntax_error(parse_errors, deco->source_range().start,
BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name);
}
}
}
return errored;
}
static bool detect_errors_in_plain_statement(const wcstring &buff_src,
const parse_node_tree_t &node_tree,
tnode_t<grammar::plain_statement> pst,
parse_error_list_t *parse_errors) {
using namespace grammar;
static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
const ast::decorated_statement_t &dst,
parse_error_list_t *parse_errors) {
using namespace ast;
bool errored = false;
auto source_start = pst.source_range()->start;
auto source_start = dst.source_range().start;
const parse_statement_decoration_t decoration = dst.decoration();
// In a few places below, we want to know if we are in a pipeline.
tnode_t<statement> st = pst.try_get_parent<decorated_statement>().try_get_parent<statement>();
pipeline_position_t pipe_pos = get_pipeline_position(st);
bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
// Determine if the first argument is help.
bool first_arg_is_help = false;
if (const auto *arg = get_first_arg(dst.args_or_redirs)) {
wcstring arg_src = arg->source(buff_src);
first_arg_is_help = parse_util_argument_is_help(arg_src.c_str());
}
// We need to know the decoration.
const enum parse_statement_decoration_t decoration = get_decoration(pst);
// Get the statement we are part of.
const statement_t *st = dst.parent->as<statement_t>();
// Walk up to the job.
const ast::job_t *job = nullptr;
for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) {
assert(cursor && "Reached root without finding a job");
job = cursor->try_as<ast::job_t>();
}
assert(job && "Should have found the job");
// Check our pipeline position.
pipeline_position_t pipe_pos;
if (job->continuation.empty()) {
pipe_pos = pipeline_position_t::none;
} else if (&job->statement == st) {
pipe_pos = pipeline_position_t::first;
} else {
pipe_pos = pipeline_position_t::subsequent;
}
// Check that we don't try to pipe through exec.
bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
if (is_in_pipeline && decoration == parse_statement_decoration_exec) {
errored = append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, L"exec");
}
@ -1124,14 +1103,14 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
if (pipe_pos == pipeline_position_t::subsequent) {
// check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
// commands.
wcstring command = pst.child<0>().get_source(buff_src);
wcstring command = dst.command.source(buff_src);
if (command == L"and" || command == L"or") {
errored =
append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, command.c_str());
}
}
if (maybe_t<wcstring> unexp_command = command_for_plain_statement(pst, buff_src)) {
if (maybe_t<wcstring> unexp_command = dst.command.try_source(buff_src)) {
wcstring command;
// Check that we can expand the command.
if (expand_to_command_and_args(*unexp_command, operation_context_t::empty(), &command,
@ -1148,40 +1127,40 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
// Check that we don't return from outside a function. But we allow it if it's
// 'return --help'.
if (!errored && command == L"return") {
if (!errored && command == L"return" && !first_arg_is_help) {
// See if we are in a function.
bool found_function = false;
for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
ancestor = node_tree.get_parent(*ancestor)) {
auto fh = tnode_t<block_statement>::try_create(&node_tree, ancestor)
.child<0>()
.try_get_child<function_header, 0>();
if (fh) {
found_function = true;
break;
for (const node_t *cursor = &dst; cursor != nullptr; cursor = cursor->parent) {
if (const auto *bs = cursor->try_as<block_statement_t>()) {
if (bs->header->type == type_t::function_header) {
found_function = true;
break;
}
}
}
if (!found_function && !first_argument_is_help(pst, buff_src)) {
if (!found_function) {
errored = append_syntax_error(parse_errors, source_start, INVALID_RETURN_ERR_MSG);
}
}
// Check that we don't break or continue from outside a loop.
if (!errored && (command == L"break" || command == L"continue")) {
if (!errored && (command == L"break" || command == L"continue") && !first_arg_is_help) {
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
// stop the search; we can't break an outer loop from inside a function.
// This is a little funny because we can't tell if it's a 'for' or 'while'
// loop from the ancestor alone; we need the header. That is, we hit a
// block_statement, and have to check its header.
bool found_loop = false;
for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
ancestor = node_tree.get_parent(*ancestor)) {
tnode_t<block_header> bh =
tnode_t<block_statement>::try_create(&node_tree, ancestor).child<0>();
if (bh.try_get_child<while_header, 0>() || bh.try_get_child<for_header, 0>()) {
for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) {
const auto *block = ancestor->try_as<block_statement_t>();
if (!block) continue;
if (block->header->type == type_t::for_header ||
block->header->type == type_t::while_header) {
// This is a loop header, so we can break or continue.
found_loop = true;
break;
} else if (bh.try_get_child<function_header, 0>()) {
} else if (block->header->type == type_t::function_header) {
// This is a function header, so we cannot break or
// continue. We stop our search here.
found_loop = false;
@ -1189,7 +1168,7 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
}
}
if (!found_loop && !first_argument_is_help(pst, buff_src)) {
if (!found_loop) {
errored = append_syntax_error(
parse_errors, source_start,
(command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG));
@ -1208,12 +1187,21 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
return errored;
}
// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that
// there are no arguments in the list.
static bool detect_errors_in_block_redirection_list(
const ast::argument_or_redirection_list_t &args_or_redirs, parse_error_list_t *out_errors) {
if (const auto *first_arg = get_first_arg(args_or_redirs)) {
return append_syntax_error(out_errors, first_arg->source_range().start,
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
}
return false;
}
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
parse_error_list_t *out_errors,
bool allow_incomplete,
parsed_source_ref_t *out_pstree) {
namespace g = grammar;
parse_node_tree_t node_tree;
parse_error_list_t parse_errors;
parser_test_error_bits_t res = 0;
@ -1233,12 +1221,15 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
// allow_incomplete is set.
bool has_unclosed_quote_or_subshell = false;
// Parse the input string into a parse tree. Some errors are detected here.
bool parsed = parse_tree_from_string(
buff_src, allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none, &node_tree,
&parse_errors);
const parse_tree_flags_t parse_flags =
allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none;
// Parse the input string into an ast. Some errors are detected here.
using namespace ast;
auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors);
if (allow_incomplete) {
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
// successfully.
size_t idx = parse_errors.size();
while (idx--) {
if (parse_errors.at(idx).code == parse_error_tokenizer_unterminated_quote ||
@ -1250,19 +1241,14 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
}
}
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
// successfully. A better fix would be to have parse_tree_from_string return this information
// directly (but it would be a shame to munge up its nice bool return).
if (parse_errors.empty() && has_unclosed_quote_or_subshell) {
parsed = true;
}
if (!parsed) {
errored = true;
}
// has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
assert(!has_unclosed_quote_or_subshell || allow_incomplete);
if (has_unclosed_quote_or_subshell) {
// We do not bother to validate the rest of the tree in this case.
return PARSER_TEST_INCOMPLETE;
}
errored = !parse_errors.empty();
// Expand all commands.
// Verify 'or' and 'and' not used inside pipelines.
@ -1271,21 +1257,17 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
// Verify no variable expansions.
if (!errored) {
for (const parse_node_t &node : node_tree) {
if (node.type == symbol_end_command && !node.has_source()) {
// An 'end' without source is an unclosed block.
has_unclosed_block = true;
} else if (node.type == symbol_statement && !node.has_source()) {
// Check for a statement without source in a pipeline, i.e. unterminated pipeline.
auto pipe_pos = get_pipeline_position({&node_tree, &node});
if (pipe_pos != pipeline_position_t::none) {
for (const node_t &node : ast) {
if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) {
// Somewhat clumsy way of checking for a statement without source in a pipeline.
// See if our pipe has source but our statement does not.
if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) {
has_unclosed_pipe = true;
}
} else if (node.type == symbol_argument) {
tnode_t<g::argument> arg{&node_tree, &node};
const wcstring arg_src = node.get_source(buff_src);
res |= parse_util_detect_errors_in_argument(arg, arg_src, &parse_errors);
} else if (node.type == symbol_job) {
} else if (const argument_t *arg = node.try_as<argument_t>()) {
wcstring arg_src = arg->source(buff_src);
res |= parse_util_detect_errors_in_argument(*arg, arg_src, &parse_errors);
} else if (const ast::job_t *job = node.try_as<ast::job_t>()) {
// Disallow background in the following cases:
//
// foo & ; and bar
@ -1293,25 +1275,27 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
// if foo & ; end
// while foo & ; end
// If it's not a background job, nothing to do.
auto job = tnode_t<g::job>{&node_tree, &node};
if (job_node_is_background(job)) {
errored |= detect_errors_in_backgrounded_job(job, &parse_errors);
if (job->bg) {
errored |= detect_errors_in_backgrounded_job(*job, &parse_errors);
}
} else if (node.type == symbol_arguments_or_redirections_list) {
// verify no arguments to the end command of if, switch, begin (#986).
auto list = tnode_t<g::arguments_or_redirections_list>{&node_tree, &node};
if (list.try_get_parent<g::if_statement>() ||
list.try_get_parent<g::switch_statement>() ||
list.try_get_parent<g::block_statement>()) {
if (auto arg = list.next_in_list<g::argument>()) {
errored = append_syntax_error(&parse_errors, arg.source_range()->start,
END_ARG_ERR_MSG);
}
}
} else if (node.type == symbol_plain_statement) {
tnode_t<grammar::plain_statement> pst{&node_tree, &node};
} else if (const ast::decorated_statement_t *stmt =
node.try_as<decorated_statement_t>()) {
errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &parse_errors);
} else if (const auto *block = node.try_as<block_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (block->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_plain_statement(buff_src, node_tree, pst, &parse_errors);
detect_errors_in_block_redirection_list(block->args_or_redirs, &parse_errors);
} else if (const auto *ifs = node.try_as<if_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (ifs->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_block_redirection_list(ifs->args_or_redirs, &parse_errors);
} else if (const auto *switchs = node.try_as<switch_statement_t>()) {
// If our 'end' had no source, we are unsourced.
if (switchs->end.unsourced) has_unclosed_block = true;
errored |=
detect_errors_in_block_redirection_list(switchs->args_or_redirs, &parse_errors);
}
}
}
@ -1325,8 +1309,9 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
*out_errors = std::move(parse_errors);
}
// \return the ast to our caller if requested.
if (out_pstree != nullptr) {
*out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(node_tree));
*out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(ast));
}
return res;
@ -1341,25 +1326,21 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
false /* don't skip caret */);
};
// Parse the string as an argument list.
// Parse the string as a freestanding argument list.
using namespace ast;
parse_error_list_t errors;
parse_node_tree_t tree;
if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, &errors,
symbol_freestanding_argument_list)) {
// Failed to parse.
auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors);
if (!errors.empty()) {
return get_error_text(errors);
}
// Get the root argument list and extract arguments from it.
// Test each of these.
assert(!tree.empty() && "Should have parsed a tree");
tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
while (auto arg = arg_list.next_in_list<grammar::argument>()) {
const wcstring arg_src = arg.get_source(arg_list_src);
for (const argument_t &arg : ast.top()->as<freestanding_argument_list_t>()->arguments) {
const wcstring arg_src = arg.source(arg_list_src);
if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) {
return get_error_text(errors);
}
}
return none();
}

View File

@ -10,6 +10,10 @@
#include "parse_tree.h"
#include "tokenizer.h"
namespace ast {
struct argument_t;
}
/// Find the beginning and end of the first subshell in the specified string.
///
/// \param in the string to search for subshells
@ -127,7 +131,6 @@ std::vector<int> parse_util_compute_indents(const wcstring &src);
/// incomplete (e.g. an unclosed quote), an error is not returned and the PARSER_TEST_INCOMPLETE bit
/// is set in the return value. If allow_incomplete is not set, then incomplete strings result in an
/// error. If out_pstree is not NULL, the resulting tree is returned by reference.
class parse_node_tree_t;
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
parse_error_list_t *out_errors = nullptr,
bool allow_incomplete = true,
@ -141,10 +144,9 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
/// Test if this argument contains any errors. Detected errors include syntax errors in command
/// substitutions, improperly escaped characters and improper use of the variable expansion
/// operator. This does NOT currently detect unterminated quotes.
class parse_node_t;
parser_test_error_bits_t parse_util_detect_errors_in_argument(
tnode_t<grammar::argument> node, const wcstring &arg_src,
parse_error_list_t *out_errors = nullptr);
const ast::argument_t &arg, const wcstring &arg_src, parse_error_list_t *out_errors = nullptr);
/// Given a string containing a variable expansion error, append an appropriate error to the errors
/// list. The global_token_pos is the offset of the token in the larger source, and the dollar_pos

View File

@ -11,6 +11,7 @@
#include <memory>
#include <utility>
#include "ast.h"
#include "common.h"
#include "env.h"
#include "event.h"
@ -25,7 +26,6 @@
#include "proc.h"
#include "reader.h"
#include "sanity.h"
#include "tnode.h"
#include "wutil.h" // IWYU pragma: keep
class io_chain_t;
@ -328,19 +328,18 @@ completion_list_t parser_t::expand_argument_list(const wcstring &arg_list_src,
expand_flags_t eflags,
const operation_context_t &ctx) {
// Parse the string as an argument list.
parse_node_tree_t tree;
if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, nullptr /* errors */,
symbol_freestanding_argument_list)) {
auto ast = ast::ast_t::parse_argument_list(arg_list_src);
if (ast.errored()) {
// Failed to parse. Here we expect to have reported any errors in test_args.
return {};
}
// Get the root argument list and extract arguments from it.
completion_list_t result;
assert(!tree.empty());
tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
while (auto arg = arg_list.next_in_list<grammar::argument>()) {
const wcstring arg_src = arg.get_source(arg_list_src);
const ast::freestanding_argument_list_t *list =
ast.top()->as<ast::freestanding_argument_list_t>();
for (const ast::argument_t &arg : list->arguments) {
wcstring arg_src = arg.source(arg_list_src);
if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) {
break; // failed to expand a string
}
@ -656,10 +655,10 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io,
eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
const job_group_ref_t &job_group, enum block_type_t block_type) {
assert(block_type == block_type_t::top || block_type == block_type_t::subst);
if (!ps->tree.empty()) {
// Execute the first node.
tnode_t<grammar::job_list> start{&ps->tree, &ps->tree.front()};
return this->eval_node(ps, start, io, job_group, block_type);
const auto *job_list = ps->ast->top()->as<ast::job_list_t>();
if (!job_list->empty()) {
// Execute the top job list.
return this->eval_node(ps, *job_list, io, job_group, block_type);
} else {
auto status = proc_status_t::from_exit_code(get_last_status());
bool break_expand = false;
@ -669,11 +668,11 @@ eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
}
template <typename T>
eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, const T &node,
const io_chain_t &block_io, const job_group_ref_t &job_group,
block_type_t block_type) {
static_assert(
std::is_same<T, grammar::statement>::value || std::is_same<T, grammar::job_list>::value,
std::is_same<T, ast::statement_t>::value || std::is_same<T, ast::job_list_t>::value,
"Unexpected node type");
// Handle cancellation requests. If our block stack is currently empty, then we already did
// successfully cancel (or there was nothing to cancel); clear the flag. If our block stack is
@ -725,9 +724,9 @@ eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
}
// Explicit instantiations. TODO: use overloads instead?
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::statement>,
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::statement_t &,
const io_chain_t &, const job_group_ref_t &, block_type_t);
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::job_list>,
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::job_list_t &,
const io_chain_t &, const job_group_ref_t &, block_type_t);
void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors,

View File

@ -300,9 +300,9 @@ class parser_t : public std::enable_shared_from_this<parser_t> {
block_type_t block_type = block_type_t::top);
/// Evaluates a node.
/// The node type must be grammar::statement or grammar::job_list.
/// The node type must be ast_t::statement_t or ast::job_list_t.
template <typename T>
eval_res_t eval_node(const parsed_source_ref_t &ps, tnode_t<T> node, const io_chain_t &block_io,
eval_res_t eval_node(const parsed_source_ref_t &ps, const T &node, const io_chain_t &block_io,
const job_group_ref_t &job_group,
block_type_t block_type = block_type_t::top);

View File

@ -21,7 +21,6 @@
#include "global_safety.h"
#include "io.h"
#include "parse_tree.h"
#include "tnode.h"
#include "topic_monitor.h"
/// Types of processes.
@ -44,6 +43,10 @@ enum class job_control_t {
none,
};
namespace ast {
struct statement_t;
}
/// A proc_status_t is a value type that encapsulates logic around exited vs stopped vs signaled,
/// etc.
class proc_status_t {
@ -261,10 +264,10 @@ class process_t {
/// Type of process.
process_type_t type{process_type_t::external};
/// For internal block processes only, the node offset of the statement.
/// For internal block processes only, the node of the statement.
/// This is always either block, ifs, or switchs, never boolean or decorated.
parsed_source_ref_t block_node_source{};
tnode_t<grammar::statement> internal_block_node{};
const ast::statement_t *internal_block_node{};
struct concrete_assignment {
wcstring variable_name;

View File

@ -44,6 +44,7 @@
#include <set>
#include <stack>
#include "ast.h"
#include "color.h"
#include "common.h"
#include "complete.h"
@ -74,7 +75,6 @@
#include "screen.h"
#include "signal.h"
#include "termsize.h"
#include "tnode.h"
#include "tokenizer.h"
#include "wutil.h" // IWYU pragma: keep
@ -935,33 +935,29 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset;
// Parse this subcmd.
parse_node_tree_t parse_tree;
parse_tree_from_string(subcmd,
parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
&parse_tree, nullptr);
using namespace ast;
auto ast =
ast_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens |
parse_flag_leave_unterminated);
// Look for plain statements where the cursor is at the end of the command.
using namespace grammar;
tnode_t<tok_string> matching_cmd_node;
for (const parse_node_t &node : parse_tree) {
// Only interested in plain statements with source.
if (node.type != symbol_plain_statement || !node.has_source()) continue;
const ast::string_t *matching_cmd_node = nullptr;
for (const node_t &n : ast) {
const decorated_statement_t *stmt = n.try_as<decorated_statement_t>();
if (!stmt) continue;
// Get the command node. Skip it if we can't or it has no source.
tnode_t<plain_statement> statement(&parse_tree, &node);
tnode_t<tok_string> cmd_node = statement.child<0>();
// Skip if we have a decoration.
if (stmt->opt_decoration) continue;
// Skip decorated statements.
if (get_decoration(statement) != parse_statement_decoration_none) continue;
auto msource = cmd_node.source_range();
// See if the command's source range range contains our cursor, including at the end.
auto msource = stmt->command.try_source_range();
if (!msource) continue;
// Now see if its source range contains our cursor, including at the end.
if (subcmd_cursor_pos >= msource->start &&
subcmd_cursor_pos <= msource->start + msource->length) {
// Success!
matching_cmd_node = cmd_node;
matching_cmd_node = &stmt->command;
break;
}
}
@ -969,11 +965,12 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
// Now if we found a command node, expand it.
maybe_t<edit_t> result{};
if (matching_cmd_node) {
const wcstring token = matching_cmd_node.get_source(subcmd);
assert(!matching_cmd_node->unsourced && "Should not be unsourced");
const wcstring token = matching_cmd_node->source(subcmd);
if (auto abbreviation = expand_abbreviation(token, vars)) {
// There was an abbreviation! Replace the token in the full command. Maintain the
// relative position of the cursor.
source_range_t r = *matching_cmd_node.source_range();
source_range_t r = matching_cmd_node->source_range();
result = edit_t(subcmd_offset + r.start, r.length, std::move(*abbreviation));
}
}

View File

@ -1,152 +0,0 @@
#include "tnode.h"
const parse_node_t *parse_node_tree_t::next_node_in_node_list(
const parse_node_t &node_list, parse_token_type_t entry_type,
const parse_node_t **out_list_tail) const {
parse_token_type_t list_type = node_list.type;
// Paranoia - it doesn't make sense for a list type to contain itself.
assert(list_type != entry_type);
const parse_node_t *list_cursor = &node_list;
const parse_node_t *list_entry = nullptr;
// Loop while we don't have an item but do have a list. Note that some nodes may contain
// nothing; e.g. job_list contains blank lines as a production.
while (list_entry == nullptr && list_cursor != nullptr) {
const parse_node_t *next_cursor = nullptr;
// Walk through the children.
for (node_offset_t i = 0; i < list_cursor->child_count; i++) {
const parse_node_t *child = this->get_child(*list_cursor, i);
if (child->type == entry_type) {
// This is the list entry.
list_entry = child;
} else if (child->type == list_type) {
// This is the next in the list.
next_cursor = child;
}
}
// Go to the next entry, even if it's NULL.
list_cursor = next_cursor;
}
// Return what we got.
assert(list_cursor == nullptr || list_cursor->type == list_type);
assert(list_entry == nullptr || list_entry->type == entry_type);
if (out_list_tail != nullptr) *out_list_tail = list_cursor;
return list_entry;
}
enum parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt) {
parse_statement_decoration_t decoration = parse_statement_decoration_none;
if (auto decorated_statement = stmt.try_get_parent<grammar::decorated_statement>()) {
decoration = static_cast<parse_statement_decoration_t>(decorated_statement.tag());
}
return decoration;
}
enum parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt) {
return static_cast<parse_job_decoration_t>(stmt.tag());
}
enum parse_job_decoration_t bool_statement_type(
tnode_t<grammar::job_conjunction_continuation> cont) {
return static_cast<parse_job_decoration_t>(cont.tag());
}
maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
const wcstring &src, wcstring *out_target) {
assert(redirection && "redirection is missing");
tnode_t<grammar::tok_redirection> prim = redirection.child<0>(); // like 2>
assert(prim && "expected to have primitive");
maybe_t<pipe_or_redir_t> result{};
if (prim.has_source()) {
result = pipe_or_redir_t::from_string(prim.get_source(src));
assert(result.has_value() && "Failed to parse valid redirection");
assert(!result->is_pipe && "Should not be a pipe");
}
if (out_target != nullptr) {
tnode_t<grammar::tok_string> target = redirection.child<1>(); // like 1 or file path
*out_target = target.has_source() ? target.get_source(src) : wcstring();
}
return result;
}
std::vector<tnode_t<grammar::comment>> parse_node_tree_t::comment_nodes_for_node(
const parse_node_t &parent) const {
std::vector<tnode_t<grammar::comment>> result;
if (parent.has_comments()) {
// Walk all our nodes, looking for comment nodes that have the given node as a parent.
for (size_t i = 0; i < this->size(); i++) {
const parse_node_t &potential_comment = this->at(i);
if (potential_comment.type == parse_special_type_comment &&
this->get_parent(potential_comment) == &parent) {
result.emplace_back(this, &potential_comment);
}
}
}
return result;
}
variable_assignment_node_list_t get_variable_assignment_nodes(
tnode_t<grammar::variable_assignments> list, size_t max) {
return list.descendants<grammar::variable_assignment>(max);
}
maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
const wcstring &src) {
tnode_t<grammar::tok_string> cmd = stmt.child<0>();
if (cmd && cmd.has_source()) {
return cmd.get_source(src);
}
return none();
}
arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list> list, size_t max) {
return list.descendants<grammar::argument>(max);
}
arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list> list,
size_t max) {
return list.descendants<grammar::argument>(max);
}
bool job_node_is_background(tnode_t<grammar::job> job) {
tnode_t<grammar::optional_background> bg = job.child<4>();
return bg.tag() == parse_background;
}
parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction> conj) {
using namespace grammar;
tnode_t<job_decorator> dec;
// We have two possible parents: job_list and andor_job_list.
if (auto p = conj.try_get_parent<job_list>()) {
dec = p.require_get_child<job_decorator, 0>();
} else if (auto p = conj.try_get_parent<andor_job_list>()) {
dec = p.require_get_child<job_decorator, 0>();
}
// note this returns 0 (none) if dec is empty.
return bool_statement_type(dec);
}
pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st) {
using namespace grammar;
if (!st) {
return pipeline_position_t::none;
}
// If we're part of a job continuation, we're definitely in a pipeline.
if (st.try_get_parent<job_continuation>()) {
return pipeline_position_t::subsequent;
}
// Check if we're the beginning of a job, and if so, whether that job
// has a non-empty continuation.
tnode_t<job_continuation> jc = st.try_get_parent<job>().child<3>();
if (jc.try_get_child<statement, 3>()) {
return pipeline_position_t::first;
}
return pipeline_position_t::none;
}

View File

@ -1,278 +0,0 @@
// Type-safe access to fish parse trees.
#ifndef FISH_TNODE_H
#define FISH_TNODE_H
#include "parse_grammar.h"
#include "parse_tree.h"
// Check if a child type is possible for a parent type at a given index.
template <typename Parent, typename Child, size_t Index>
constexpr bool child_type_possible_at_index() {
return Parent::template type_possible<Child, Index>();
}
// Check if a child type is possible for a parent type at any index.
// The number of cases here should match MAX_PRODUCTION_LENGTH.
template <typename Parent, typename Child>
constexpr bool child_type_possible() {
return child_type_possible_at_index<Parent, Child, 0>() ||
child_type_possible_at_index<Parent, Child, 1>() ||
child_type_possible_at_index<Parent, Child, 2>() ||
child_type_possible_at_index<Parent, Child, 3>() ||
child_type_possible_at_index<Parent, Child, 4>() ||
child_type_possible_at_index<Parent, Child, 5>();
}
/// tnode_t ("typed node") is type-safe access to a parse_tree. A tnode_t holds both a pointer to a
/// parse_node_tree_t and a pointer to a parse_node_t. (Note that the parse_node_tree_t is unowned;
/// the caller must ensure that the tnode does not outlive the tree.
///
/// tnode_t is a lightweight value-type class. It ought to be passed by value. A tnode_t may also be
/// "missing", associated with a null parse_node_t pointer. operator bool() may be used to check if
/// a tnode_t is misisng.
///
/// A tnode_t is parametrized by a grammar element, and uses the fish grammar to statically
/// type-check accesses to children and parents. Any particular tnode either corresponds to a
/// sequence (a single child) or an alternation (multiple possible children). A sequence may have
/// its children accessed directly via child(), which is templated on the index (and returns a
/// tnode of the proper type). Alternations may be disambiguated via try_get_child(), which returns
/// an empty child if the child has the wrong type, or require_get_child() which aborts if the child
/// has the wrong type.
template <typename Type>
class tnode_t {
/// The tree containing our node.
const parse_node_tree_t *tree = nullptr;
/// The node in the tree
const parse_node_t *nodeptr = nullptr;
// Helper to get a child type at a given index.
template <class Element, uint32_t Index>
using child_at = typename std::tuple_element<Index, typename Element::type_tuple>::type;
public:
tnode_t() = default;
tnode_t(const parse_node_tree_t *t, const parse_node_t *n) : tree(t), nodeptr(n) {
assert(t && "tree cannot be null in this constructor");
assert((!n || n->type == Type::token) && "node has wrong type");
}
// Try to create a tnode from the given tree and parse node.
// Returns an empty node if the parse node is null, or has the wrong type.
static tnode_t try_create(const parse_node_tree_t *tree, const parse_node_t *node) {
assert(tree && "tree cannot be null");
return tnode_t(tree, node && node->type == Type::token ? node : nullptr);
}
/// Temporary conversion to parse_node_t to assist in migration.
/* implicit */ operator const parse_node_t &() const {
assert(nodeptr && "Empty tnode_t");
return *nodeptr;
}
/* implicit */ operator const parse_node_t *() const { return nodeptr; }
/// \return the underlying (type-erased) node.
const parse_node_t *node() const { return nodeptr; }
/// Check whether we're populated.
explicit operator bool() const { return nodeptr != nullptr; }
bool operator==(const tnode_t &rhs) const { return tree == rhs.tree && nodeptr == rhs.nodeptr; }
bool operator!=(const tnode_t &rhs) const { return !(*this == rhs); }
// Helper to return whether the given tree is the same as ours.
bool matches_node_tree(const parse_node_tree_t &t) const { return &t == tree; }
const parse_node_tree_t *get_tree() const { return tree; }
bool has_source() const { return nodeptr && nodeptr->has_source(); }
// return the tag, or 0 if missing.
parse_node_tag_t tag() const { return nodeptr ? nodeptr->tag : 0; }
// return the number of children, or 0 if missing.
uint8_t child_count() const { return nodeptr ? nodeptr->child_count : 0; }
maybe_t<source_range_t> source_range() const {
if (!nodeptr || nodeptr->source_start == NODE_OFFSET_INVALID) return none();
return source_range_t{nodeptr->source_start, nodeptr->source_length};
}
wcstring get_source(const wcstring &str) const {
if (!nodeptr) {
return L"";
}
return nodeptr->get_source(str);
}
bool location_in_or_at_end_of_source_range(size_t loc) const {
return nodeptr && nodeptr->location_in_or_at_end_of_source_range(loc);
}
static tnode_t find_node_matching_source_location(const parse_node_tree_t *tree,
size_t source_loc,
const parse_node_t *parent) {
assert(tree && "null tree");
return tnode_t{tree,
tree->find_node_matching_source_location(Type::token, source_loc, parent)};
}
/// Type-safe access to a child at the given index.
template <node_offset_t Index>
tnode_t<child_at<Type, Index>> child() const {
using child_type = child_at<Type, Index>;
const parse_node_t *child = nullptr;
if (nodeptr) child = tree->get_child(*nodeptr, Index, child_type::token);
return tnode_t<child_type>{tree, child};
}
/// Return a parse_node_t for a child.
/// This is used to disambiguate alts.
template <node_offset_t Index>
const parse_node_t &get_child_node() const {
assert(nodeptr && "receiver is missing in get_child_node");
return *tree->get_child(*nodeptr, Index);
}
/// If the child at the given index has the given type, return it; otherwise return an empty
/// child. Note this will refuse to compile if the child type is not possible.
/// This is used for e.g. alternations.
template <class ChildType, node_offset_t Index>
tnode_t<ChildType> try_get_child() const {
static_assert(child_type_possible_at_index<Type, ChildType, Index>(),
"Cannot contain a child of this type");
const parse_node_t *child = nullptr;
if (nodeptr) child = tree->get_child(*nodeptr, Index);
if (child && child->type == ChildType::token) return {tree, child};
return {tree, nullptr};
}
/// assert that this is not empty and that the child at index Index has the given type, then
/// return that child. Note this will refuse to compile if the child type is not possible.
template <class ChildType, node_offset_t Index>
tnode_t<ChildType> require_get_child() const {
assert(nodeptr && "receiver is missing in require_get_child()");
auto result = try_get_child<ChildType, Index>();
assert(result && "require_get_child(): wrong child type");
return result;
}
/// Find the first direct child of the given node of the given type. asserts on failure.
template <class ChildType>
tnode_t<ChildType> find_child() const {
static_assert(child_type_possible<Type, ChildType>(), "Cannot have that type as a child");
assert(nodeptr && "receiver is missing in find_child()");
tnode_t<ChildType> result{tree, &tree->find_child(*nodeptr, ChildType::token)};
assert(result && "cannot find child");
return result;
}
/// Type-safe access to a node's parent.
/// If the parent exists and has type ParentType, return it.
/// Otherwise return a missing tnode.
template <class ParentType>
tnode_t<ParentType> try_get_parent() const {
static_assert(child_type_possible<ParentType, Type>(), "Parent cannot have us as a child");
if (!nodeptr) return {};
return {tree, tree->get_parent(*nodeptr, ParentType::token)};
}
/// Finds all descendants (up to max_count) under this node of the given type.
template <typename DescendantType>
std::vector<tnode_t<DescendantType>> descendants(size_t max_count = -1) const {
if (!nodeptr) return {};
std::vector<tnode_t<DescendantType>> result;
std::vector<const parse_node_t *> stack{nodeptr};
while (!stack.empty() && result.size() < max_count) {
const parse_node_t *node = stack.back();
if (node->type == DescendantType::token) result.emplace_back(tree, node);
stack.pop_back();
node_offset_t index = node->child_count;
while (index--) {
stack.push_back(tree->get_child(*node, index));
}
}
return result;
}
/// Given that we are a list type, \return the next node of some Item in some node list,
/// adjusting 'this' to be the remainder of the list.
/// Returns an empty item on failure.
template <class ItemType>
tnode_t<ItemType> next_in_list() {
// We require that we can contain ourselves, and ItemType as well.
static_assert(child_type_possible<Type, Type>(), "Is not a list");
static_assert(child_type_possible<Type, ItemType>(), "Is not a list of that type");
if (!nodeptr) return {tree, nullptr};
const parse_node_t *next =
tree->next_node_in_node_list(*nodeptr, ItemType::token, &nodeptr);
return {tree, next};
}
};
template <typename Type>
tnode_t<Type> parse_node_tree_t::find_child(const parse_node_t &parent) const {
return tnode_t<Type>(this, &this->find_child(parent, Type::token));
}
/// Return the arguments under an arguments_list or arguments_or_redirection_list
/// Do not return more than max.
using variable_assignment_node_list_t = std::vector<tnode_t<grammar::variable_assignment>>;
variable_assignment_node_list_t get_variable_assignment_nodes(
tnode_t<grammar::variable_assignments>, size_t max = -1);
/// Given a plain statement, get the command from the child node. Returns the command string on
/// success, none on failure.
maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
const wcstring &src);
/// Return the decoration for a plain statement.
parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt);
/// Return the type for a boolean statement.
parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt);
parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_conjunction_continuation> cont);
/// Given a redirection node, get the parsed redirection and target of the redirection (file path,
/// or fd).
maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
const wcstring &src, wcstring *out_target);
/// Return the arguments under an arguments_list or arguments_or_redirection_list
/// Do not return more than max.
using arguments_node_list_t = std::vector<tnode_t<grammar::argument>>;
arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list>, size_t max = -1);
arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list>,
size_t max = -1);
/// Return whether the given job is background because it has a & symbol.
bool job_node_is_background(tnode_t<grammar::job>);
/// If the conjunction is has a decorator (and/or), return it; otherwise return none. This only
/// considers the leading conjunction, e.g. in `and true || false` only the 'true' conjunction will
/// return 'and'.
parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction>);
/// Return whether the statement is part of a pipeline.
/// This doesn't detect e.g. pipelines involving our parent's block statements.
enum class pipeline_position_t {
none, // not part of a pipeline
first, // first command in a pipeline
subsequent // second or further command in a pipeline
};
pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st);
/// Check whether an argument_list is a root list.
inline bool argument_list_is_root(tnode_t<grammar::argument_list> list) {
return !list.try_get_parent<grammar::argument_list>();
}
inline bool argument_list_is_root(tnode_t<grammar::arguments_or_redirections_list> list) {
return !list.try_get_parent<grammar::arguments_or_redirections_list>();
}
#endif

View File

@ -49,7 +49,7 @@ end' | $fish_indent
#CHECK: c
#CHECK: echo thing
#CHECK: end
echo 'echo foo |
echo banana' | $fish_indent
#CHECK: echo foo |
@ -57,12 +57,11 @@ echo banana' | $fish_indent
echo 'echo foo \\
;' | $fish_indent
#CHECK: echo foo \
#CHECK:
#CHECK: echo foo
echo 'echo foo \\
' | $fish_indent
#CHECK: echo foo \
#CHECK: echo foo
echo -n '
begin
@ -201,9 +200,9 @@ end; echo alpha "
#CHECK: begin
#CHECK: {{ }}echo hi
#CHECK: else
#CHECK:
#CHECK: {{^}}echo bye
#CHECK: end
#CHECK: echo alpha "
#CHECK: end; echo alpha "
# issue 1665
echo -n '
@ -285,7 +284,7 @@ echo bye
#CHECK:
#CHECK: echo hi |
#CHECK:
#CHECK: echo bye
#CHECK: {{ }}echo bye
echo 'a;;;;;;' | $fish_indent
#CHECK: a