mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-02-21 11:01:26 +08:00
Merge branch 'parser_cleanup_3'
This merges a sequence of changes which eliminates the "parse tree" construct and replaces it with a new abstract syntax tree implementation. This is simpler and easier to understand/use.
This commit is contained in:
commit
44944146e2
@ -114,14 +114,14 @@ set(FISH_SRCS
|
||||
src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp
|
||||
src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/intern.cpp
|
||||
src/io.cpp src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
|
||||
src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp
|
||||
src/parse_execution.cpp src/parse_tree.cpp
|
||||
src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp
|
||||
src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp
|
||||
src/signal.cpp src/tinyexpr.cpp src/tnode.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
|
||||
src/signal.cpp src/tinyexpr.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
|
||||
src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp
|
||||
src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp
|
||||
src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp
|
||||
src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp
|
||||
src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp src/ast.cpp
|
||||
)
|
||||
|
||||
# Header files are just globbed.
|
||||
|
1206
src/ast.cpp
Normal file
1206
src/ast.cpp
Normal file
File diff suppressed because it is too large
Load Diff
60
src/ast_node_types.inc
Normal file
60
src/ast_node_types.inc
Normal file
@ -0,0 +1,60 @@
|
||||
// Define ELEM and optionally ELEMLIST before including this file.
|
||||
// ELEM is for ordinary nodes.
|
||||
// ELEMLIST(x, y) marks list nodes and the type they contain.
|
||||
#ifndef ELEMLIST
|
||||
#define ELEMLIST(x, y) ELEM(x)
|
||||
#endif
|
||||
|
||||
ELEM(keyword_base)
|
||||
ELEM(token_base)
|
||||
ELEM(maybe_newlines)
|
||||
|
||||
ELEM(argument)
|
||||
ELEMLIST(argument_list, argument)
|
||||
|
||||
ELEM(redirection)
|
||||
ELEM(argument_or_redirection)
|
||||
ELEMLIST(argument_or_redirection_list, argument_or_redirection)
|
||||
|
||||
ELEM(variable_assignment)
|
||||
ELEMLIST(variable_assignment_list, variable_assignment)
|
||||
|
||||
ELEM(job)
|
||||
ELEM(job_conjunction)
|
||||
// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed.
|
||||
ELEMLIST(job_list, job_conjunction)
|
||||
ELEM(job_conjunction_continuation)
|
||||
ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation)
|
||||
|
||||
ELEM(job_continuation)
|
||||
ELEMLIST(job_continuation_list, job_continuation)
|
||||
|
||||
ELEM(andor_job)
|
||||
ELEMLIST(andor_job_list, andor_job)
|
||||
|
||||
ELEM(statement)
|
||||
|
||||
ELEM(not_statement)
|
||||
|
||||
ELEM(block_statement)
|
||||
ELEM(for_header)
|
||||
ELEM(while_header)
|
||||
ELEM(function_header)
|
||||
ELEM(begin_header)
|
||||
|
||||
ELEM(if_statement)
|
||||
ELEM(if_clause)
|
||||
ELEM(elseif_clause)
|
||||
ELEMLIST(elseif_clause_list, elseif_clause)
|
||||
ELEM(else_clause)
|
||||
|
||||
ELEM(switch_statement)
|
||||
ELEM(case_item)
|
||||
ELEMLIST(case_item_list, case_item)
|
||||
|
||||
ELEM(decorated_statement)
|
||||
|
||||
ELEM(freestanding_argument_list)
|
||||
|
||||
#undef ELEM
|
||||
#undef ELEMLIST
|
@ -200,8 +200,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring
|
||||
/// Define a function. Calls into `function.cpp` to perform the heavy lifting of defining a
|
||||
/// function.
|
||||
int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
|
||||
const parsed_source_ref_t &source,
|
||||
tnode_t<grammar::block_statement> func_node) {
|
||||
const parsed_source_ref_t &source, const ast::block_statement_t &func_node) {
|
||||
assert(source && "Missing source in builtin_function");
|
||||
// The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with
|
||||
// that property. This is needed because this builtin has a different signature than the other
|
||||
@ -252,7 +251,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
|
||||
props->shadow_scope = opts.shadow_scope;
|
||||
props->named_arguments = std::move(opts.named_arguments);
|
||||
props->parsed_source = source;
|
||||
props->func_node = func_node;
|
||||
props->func_node = &func_node;
|
||||
|
||||
// Populate inherit_vars.
|
||||
for (const wcstring &name : opts.inherit_vars) {
|
||||
|
@ -8,7 +8,10 @@
|
||||
class parser_t;
|
||||
struct io_streams_t;
|
||||
|
||||
namespace ast {
|
||||
struct block_statement_t;
|
||||
}
|
||||
|
||||
int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
|
||||
const parsed_source_ref_t &source,
|
||||
tnode_t<grammar::block_statement> func_node);
|
||||
const parsed_source_ref_t &source, const ast::block_statement_t &func_node);
|
||||
#endif
|
||||
|
@ -45,7 +45,6 @@
|
||||
#include "path.h"
|
||||
#include "proc.h"
|
||||
#include "reader.h"
|
||||
#include "tnode.h"
|
||||
#include "util.h"
|
||||
#include "wcstringutil.h"
|
||||
#include "wildcard.h"
|
||||
|
@ -623,10 +623,10 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
|
||||
|
||||
if (p->type == process_type_t::block_node) {
|
||||
const parsed_source_ref_t &source = p->block_node_source;
|
||||
tnode_t<grammar::statement> node = p->internal_block_node;
|
||||
const ast::statement_t *node = p->internal_block_node;
|
||||
assert(source && node && "Process is missing node info");
|
||||
return [=](parser_t &parser) {
|
||||
return parser.eval_node(source, node, io_chain, job_group).status;
|
||||
return parser.eval_node(source, *node, io_chain, job_group).status;
|
||||
};
|
||||
} else {
|
||||
assert(p->type == process_type_t::function);
|
||||
@ -638,7 +638,7 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
|
||||
auto argv = move_to_sharedptr(p->get_argv_array().to_list());
|
||||
return [=](parser_t &parser) {
|
||||
// Pull out the job list from the function.
|
||||
tnode_t<grammar::job_list> body = props->func_node.child<1>();
|
||||
const ast::job_list_t &body = props->func_node->jobs;
|
||||
const block_t *fb = function_prepare_environment(parser, *argv, *props);
|
||||
auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group);
|
||||
function_restore_environment(parser, fb);
|
||||
|
@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "ast.h"
|
||||
#include "color.h"
|
||||
#include "common.h"
|
||||
#include "env.h"
|
||||
@ -43,16 +44,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
#include "operation_context.h"
|
||||
#include "output.h"
|
||||
#include "parse_constants.h"
|
||||
#include "parse_util.h"
|
||||
#include "print_help.h"
|
||||
#include "tnode.h"
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
|
||||
// The number of spaces per indent isn't supposed to be configurable.
|
||||
// See discussion at https://github.com/fish-shell/fish-shell/pull/6790
|
||||
#define SPACES_PER_INDENT 4
|
||||
|
||||
// An indent_t represents an abstract indent depth. 2 means we are in a doubly-nested block, etc.
|
||||
using indent_t = unsigned int;
|
||||
static bool dump_parse_tree = false;
|
||||
static int ret = 0;
|
||||
|
||||
@ -83,232 +82,539 @@ static wcstring read_file(FILE *f) {
|
||||
return result;
|
||||
}
|
||||
|
||||
struct prettifier_t {
|
||||
namespace {
|
||||
/// From C++14.
|
||||
template <bool B, typename T = void>
|
||||
using enable_if_t = typename std::enable_if<B, T>::type;
|
||||
|
||||
/// \return the number of escaping backslashes before a character.
|
||||
/// \p idx may be "one past the end."
|
||||
size_t count_preceding_backslashes(const wcstring &text, size_t idx) {
|
||||
assert(idx <= text.size() && "Out of bounds");
|
||||
size_t backslashes = 0;
|
||||
while (backslashes < idx && text.at(idx - backslashes - 1) == L'\\') {
|
||||
backslashes++;
|
||||
}
|
||||
return backslashes;
|
||||
}
|
||||
|
||||
/// \return whether a character at a given index is escaped.
|
||||
/// A character is escaped if it has an odd number of backslashes.
|
||||
bool char_is_escaped(const wcstring &text, size_t idx) {
|
||||
return count_preceding_backslashes(text, idx) % 2 == 1;
|
||||
}
|
||||
|
||||
using namespace ast;
|
||||
struct pretty_printer_t {
|
||||
// Note: this got somewhat more complicated after introducing the new AST, because that AST no
|
||||
// longer encodes detailed lexical information (e.g. every newline). This feels more complex
|
||||
// than necessary and would probably benefit from a more layered approach where we identify
|
||||
// certain runs, weight line breaks, have a cost model, etc.
|
||||
pretty_printer_t(const wcstring &src, bool do_indent)
|
||||
: source(src),
|
||||
indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
|
||||
ast(ast_t::parse(src, parse_flags())),
|
||||
do_indent(do_indent),
|
||||
gaps(compute_gaps()),
|
||||
preferred_semi_locations(compute_preferred_semi_locations()) {
|
||||
assert(indents.size() == source.size() && "indents and source should be same length");
|
||||
}
|
||||
|
||||
// Original source.
|
||||
const wcstring &source;
|
||||
|
||||
// The indents of our string.
|
||||
// This has the same length as 'source' and describes the indentation level.
|
||||
const std::vector<int> indents;
|
||||
|
||||
// The parsed ast.
|
||||
const ast_t ast;
|
||||
|
||||
// The prettifier output.
|
||||
wcstring output;
|
||||
|
||||
// The indent of the source range which we are currently emitting.
|
||||
int current_indent{0};
|
||||
|
||||
// Whether to indent, or just insert spaces.
|
||||
const bool do_indent;
|
||||
|
||||
// Whether we are at the beginning of a new line.
|
||||
bool has_new_line = true;
|
||||
// Whether the next gap text should hide the first newline.
|
||||
bool gap_text_mask_newline{false};
|
||||
|
||||
// Whether the last token was a semicolon.
|
||||
bool last_was_semicolon = false;
|
||||
// The "gaps": a sorted set of ranges between tokens.
|
||||
// These contain whitespace, comments, semicolons, and other lexical elements which are not
|
||||
// present in the ast.
|
||||
const std::vector<source_range_t> gaps;
|
||||
|
||||
// Whether we need to append a continuation new line before continuing.
|
||||
bool needs_continuation_newline = false;
|
||||
// The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
|
||||
// This is computed ahead of time for convenience.
|
||||
const std::vector<uint32_t> preferred_semi_locations;
|
||||
|
||||
// Additional indentation due to line continuation (escaped newline)
|
||||
uint32_t line_continuation_indent = 0;
|
||||
// Flags we support.
|
||||
using gap_flags_t = uint32_t;
|
||||
enum {
|
||||
default_flags = 0,
|
||||
|
||||
prettifier_t(const wcstring &source, bool do_indent) : source(source), do_indent(do_indent) {}
|
||||
// Whether to allow line splitting via escaped newlines.
|
||||
// For example, in argument lists:
|
||||
//
|
||||
// echo a \
|
||||
// b
|
||||
//
|
||||
// If this is not set, then split-lines will be joined.
|
||||
allow_escaped_newlines = 1 << 0,
|
||||
|
||||
void prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx, indent_t node_indent,
|
||||
parse_token_type_t parent_type);
|
||||
// Whether to require a space before this token.
|
||||
// This is used when emitting semis:
|
||||
// echo a; echo b;
|
||||
// No space required between 'a' and ';', or 'b' and ';'.
|
||||
skip_space = 1 << 1,
|
||||
};
|
||||
|
||||
void maybe_prepend_escaped_newline(const parse_node_t &node) {
|
||||
if (node.has_preceding_escaped_newline()) {
|
||||
output.append(L" \\");
|
||||
append_newline(true);
|
||||
// \return gap text flags for the gap text that comes *before* a given node type.
|
||||
static gap_flags_t gap_text_flags_before_node(const node_t &node) {
|
||||
gap_flags_t result = default_flags;
|
||||
switch (node.type) {
|
||||
// Allow escaped newlines in argument and redirection lists.
|
||||
case type_t::argument:
|
||||
case type_t::redirection:
|
||||
result |= allow_escaped_newlines;
|
||||
break;
|
||||
|
||||
case type_t::token_base:
|
||||
// Allow escaped newlines before && and ||, and also pipes.
|
||||
switch (node.as<token_base_t>()->type) {
|
||||
case parse_token_type_andand:
|
||||
case parse_token_type_oror:
|
||||
case parse_token_type_pipe:
|
||||
result |= allow_escaped_newlines;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// \return whether we are at the start of a new line.
|
||||
bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
|
||||
|
||||
// \return whether we have a space before the output.
|
||||
// This ignores escaped spaces and escaped newlines.
|
||||
bool has_preceding_space() const {
|
||||
long idx = static_cast<long>(output.size()) - 1;
|
||||
// Skip escaped newlines.
|
||||
// This is historical. Example:
|
||||
//
|
||||
// cmd1 \
|
||||
// | cmd2
|
||||
//
|
||||
// we want the pipe to "see" the space after cmd1.
|
||||
// TODO: this is too tricky, we should factor this better.
|
||||
while (idx >= 0 && output.at(idx) == L'\n') {
|
||||
size_t backslashes = count_preceding_backslashes(source, idx);
|
||||
if (backslashes % 2 == 0) {
|
||||
// Not escaped.
|
||||
return false;
|
||||
}
|
||||
idx -= (1 + backslashes);
|
||||
}
|
||||
return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
|
||||
}
|
||||
|
||||
// Entry point. Prettify our source code and return it.
|
||||
wcstring prettify() {
|
||||
output = wcstring{};
|
||||
node_visitor(*this).accept(ast.top());
|
||||
|
||||
// Trailing gap text.
|
||||
emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
|
||||
|
||||
// Replace all trailing newlines with just a single one.
|
||||
while (!output.empty() && at_line_start()) {
|
||||
output.pop_back();
|
||||
}
|
||||
emit_newline();
|
||||
|
||||
wcstring result = std::move(output);
|
||||
return result;
|
||||
}
|
||||
|
||||
// \return a substring of source.
|
||||
wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
|
||||
|
||||
// Return the gap ranges from our ast.
|
||||
std::vector<source_range_t> compute_gaps() const {
|
||||
auto range_compare = [](source_range_t r1, source_range_t r2) {
|
||||
if (r1.start != r2.start) return r1.start < r2.start;
|
||||
return r1.length < r2.length;
|
||||
};
|
||||
// Collect the token ranges into a list.
|
||||
std::vector<source_range_t> tok_ranges;
|
||||
for (const node_t &node : ast) {
|
||||
if (node.category == category_t::leaf) {
|
||||
auto r = node.source_range();
|
||||
if (r.length > 0) tok_ranges.push_back(r);
|
||||
}
|
||||
}
|
||||
// Place a zero length range at end to aid in our inverting.
|
||||
tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
|
||||
|
||||
// Our tokens should be sorted.
|
||||
assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
|
||||
|
||||
// For each range, add a gap range between the previous range and this range.
|
||||
std::vector<source_range_t> gaps;
|
||||
uint32_t prev_end = 0;
|
||||
for (source_range_t tok_range : tok_ranges) {
|
||||
assert(tok_range.start >= prev_end &&
|
||||
"Token range should not overlap or be out of order");
|
||||
if (tok_range.start >= prev_end) {
|
||||
gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
|
||||
}
|
||||
prev_end = tok_range.start + tok_range.length;
|
||||
}
|
||||
return gaps;
|
||||
}
|
||||
|
||||
// Return sorted list of semi-preferring semi_nl nodes.
|
||||
std::vector<uint32_t> compute_preferred_semi_locations() const {
|
||||
std::vector<uint32_t> result;
|
||||
auto mark_as_semi = [&result](const optional_t<semi_nl_t> &n) {
|
||||
if (n && n->has_source()) result.push_back(n->range.start);
|
||||
};
|
||||
|
||||
// andor_job_lists get semis if they are short enough.
|
||||
for (const auto &node : ast) {
|
||||
// See if we have a condition and an andor_job_list.
|
||||
const optional_t<semi_nl_t> *condition = nullptr;
|
||||
const andor_job_list_t *andors = nullptr;
|
||||
if (const auto *ifc = node.try_as<if_clause_t>()) {
|
||||
condition = &ifc->condition.semi_nl;
|
||||
andors = &ifc->andor_tail;
|
||||
} else if (const auto *wc = node.try_as<while_header_t>()) {
|
||||
condition = &wc->condition.semi_nl;
|
||||
andors = &wc->andor_tail;
|
||||
}
|
||||
|
||||
// This describes the heuristic of when to place and_or job lists on separate lines.
|
||||
// That is, do we want:
|
||||
// if true; and false
|
||||
// or do we want:
|
||||
// if true
|
||||
// and false
|
||||
// Lists with two or fewer get semis.
|
||||
// Note the effective count is then three, because this list does not include the main
|
||||
// condition.
|
||||
if (andors && andors->count() > 0 && andors->count() <= 2) {
|
||||
if (condition) mark_as_semi(*condition);
|
||||
// Mark all but last of the andor list.
|
||||
for (uint32_t i = 0; i + 1 < andors->count(); i++) {
|
||||
mark_as_semi(andors->at(i)->job.semi_nl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `x ; and y` gets semis if it has them already, and they are on the same line.
|
||||
for (const auto &node : ast) {
|
||||
if (const auto *job_list = node.try_as<job_list_t>()) {
|
||||
const semi_nl_t *prev_job_semi_nl = nullptr;
|
||||
for (const job_conjunction_t &job : *job_list) {
|
||||
// Set up prev_job_semi_nl for the next iteration to make control flow easier.
|
||||
const semi_nl_t *prev = prev_job_semi_nl;
|
||||
prev_job_semi_nl = job.semi_nl.contents.get();
|
||||
|
||||
// Is this an 'and' or 'or' job?
|
||||
if (!job.decorator) continue;
|
||||
|
||||
// Now see if we want to mark 'prev' as allowing a semi.
|
||||
// Did we have a previous semi_nl which was a newline?
|
||||
if (!prev || substr(prev->range) != L";") continue;
|
||||
|
||||
// Is there a newline between them?
|
||||
assert(prev->range.start <= job.decorator->range.start &&
|
||||
"Ranges out of order");
|
||||
auto start = source.begin() + prev->range.start;
|
||||
auto end = source.begin() + job.decorator->range.end();
|
||||
if (std::find(start, end, L'\n') == end) {
|
||||
// We're going to allow the previous semi_nl to be a semi.
|
||||
result.push_back(prev->range.start);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
std::sort(result.begin(), result.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
// Emit a space or indent as necessary, depending on the previous output.
|
||||
void emit_space_or_indent(gap_flags_t flags = default_flags) {
|
||||
if (at_line_start()) {
|
||||
output.append(SPACES_PER_INDENT * current_indent, L' ');
|
||||
} else if (!(flags & skip_space) && !has_preceding_space()) {
|
||||
output.append(1, L' ');
|
||||
}
|
||||
}
|
||||
|
||||
void append_newline(bool is_continuation = false) {
|
||||
output.push_back('\n');
|
||||
has_new_line = true;
|
||||
needs_continuation_newline = false;
|
||||
line_continuation_indent = is_continuation ? 1 : 0;
|
||||
// Emit "gap text:" newlines and comments from the original source.
|
||||
// Gap text may be a few things:
|
||||
//
|
||||
// 1. Just a space is common. We will trim the spaces to be empty.
|
||||
//
|
||||
// Here the gap text is the comment, followed by the newline:
|
||||
//
|
||||
// echo abc # arg
|
||||
// echo def
|
||||
//
|
||||
// 2. It may also be an escaped newline:
|
||||
// Here the gap text is a space, backslash, newline, space.
|
||||
//
|
||||
// echo \
|
||||
// hi
|
||||
//
|
||||
// 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
|
||||
//
|
||||
// begin | stuff
|
||||
//
|
||||
// We do not handle errors here - instead our caller does.
|
||||
void emit_gap_text(const wcstring &gap_text, gap_flags_t flags) {
|
||||
// Common case: if we are only spaces, do nothing.
|
||||
if (gap_text.find_first_not_of(L' ') == wcstring::npos) return;
|
||||
|
||||
// Look to see if there is an escaped newline.
|
||||
// Emit it if either we allow it, or it comes before the first comment.
|
||||
// Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
|
||||
// text - we already know it has no semantic significance.
|
||||
size_t escaped_nl = gap_text.find(L"\\\n");
|
||||
if (escaped_nl != wcstring::npos) {
|
||||
size_t comment_idx = gap_text.find(L'#');
|
||||
if ((flags & allow_escaped_newlines) ||
|
||||
(comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
|
||||
// Emit a space before the escaped newline.
|
||||
if (!at_line_start() && !has_preceding_space()) {
|
||||
output.append(L" ");
|
||||
}
|
||||
output.append(L"\\\n");
|
||||
}
|
||||
}
|
||||
|
||||
// It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
|
||||
// always emit one.
|
||||
bool needs_nl = false;
|
||||
|
||||
tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
|
||||
while (maybe_t<tok_t> tok = tokenizer.next()) {
|
||||
wcstring tok_text = tokenizer.text_of(*tok);
|
||||
|
||||
if (needs_nl) {
|
||||
emit_newline();
|
||||
needs_nl = false;
|
||||
if (tok_text == L"\n") continue;
|
||||
} else if (gap_text_mask_newline) {
|
||||
// We only respect mask_newline the first time through the loop.
|
||||
gap_text_mask_newline = false;
|
||||
if (tok_text == L"\n") continue;
|
||||
}
|
||||
|
||||
if (tok->type == token_type_t::comment) {
|
||||
emit_space_or_indent();
|
||||
output.append(tok_text);
|
||||
needs_nl = true;
|
||||
} else if (tok->type == token_type_t::end) {
|
||||
// This may be either a newline or semicolon.
|
||||
// Semicolons found here are not part of the ast and can simply be removed.
|
||||
// Newlines are preserved unless mask_newline is set.
|
||||
if (tok_text == L"\n") {
|
||||
emit_newline();
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Gap text should only have comments and newlines - instead found token "
|
||||
"type %d with text: %ls\n",
|
||||
(int)tok->type, tok_text.c_str());
|
||||
DIE("Gap text should only have comments and newlines");
|
||||
}
|
||||
}
|
||||
if (needs_nl) emit_newline();
|
||||
}
|
||||
|
||||
// Append whitespace as necessary. If we have a newline, append the appropriate indent.
|
||||
// Otherwise, append a space.
|
||||
void append_whitespace(indent_t node_indent) {
|
||||
if (needs_continuation_newline) {
|
||||
append_newline(true);
|
||||
/// \return the gap text ending at a given index into the string, or empty if none.
|
||||
source_range_t gap_text_to(uint32_t end) const {
|
||||
auto where = std::lower_bound(
|
||||
gaps.begin(), gaps.end(), end,
|
||||
[](source_range_t r, uint32_t end) { return r.start + r.length < end; });
|
||||
if (where == gaps.end() || where->start + where->length != end) {
|
||||
// Not found.
|
||||
return source_range_t{0, 0};
|
||||
} else {
|
||||
return *where;
|
||||
}
|
||||
if (!has_new_line) {
|
||||
output.push_back(L' ');
|
||||
} else if (do_indent) {
|
||||
output.append((node_indent + line_continuation_indent) * SPACES_PER_INDENT, L' ');
|
||||
}
|
||||
|
||||
/// \return whether a range \p r overlaps an error range from our ast.
|
||||
bool range_contained_error(source_range_t r) const {
|
||||
const auto &errs = ast.extras().errors;
|
||||
auto range_is_before = [](source_range_t x, source_range_t y) {
|
||||
return x.start + x.length <= y.start;
|
||||
};
|
||||
assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
|
||||
"Error ranges should be sorted");
|
||||
return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
|
||||
}
|
||||
|
||||
// Emit the gap text before a source range.
|
||||
void emit_gap_text_before(source_range_t r, gap_flags_t flags) {
|
||||
assert(r.start <= source.size() && "source out of bounds");
|
||||
uint32_t start = r.start;
|
||||
if (start < indents.size()) current_indent = indents.at(start);
|
||||
|
||||
// Find the gap text which ends at start.
|
||||
source_range_t range = gap_text_to(start);
|
||||
if (range.length > 0) {
|
||||
// If this range contained an error, append the gap text without modification.
|
||||
// For example in: echo foo "
|
||||
// We don't want to mess with the quote.
|
||||
if (range_contained_error(range)) {
|
||||
output.append(substr(range));
|
||||
} else {
|
||||
emit_gap_text(substr(range), flags);
|
||||
}
|
||||
}
|
||||
// Always clear gap_text_mask_newline after emitting even empty gap text.
|
||||
gap_text_mask_newline = false;
|
||||
}
|
||||
|
||||
/// Given a string \p input, remove unnecessary quotes, etc.
|
||||
wcstring clean_text(const wcstring &input) {
|
||||
// Unescape the string - this leaves special markers around if there are any
|
||||
// expansions or anything. We specifically tell it to not compute backslash-escapes
|
||||
// like \U or \x, because we want to leave them intact.
|
||||
wcstring unescaped = input;
|
||||
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
|
||||
|
||||
// Remove INTERNAL_SEPARATOR because that's a quote.
|
||||
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
|
||||
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
|
||||
|
||||
// If no non-"good" char is left, use the unescaped version.
|
||||
// This can be extended to other characters, but giving the precise list is tough,
|
||||
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
|
||||
// people feel more at ease.
|
||||
auto goodchars = [](wchar_t ch) {
|
||||
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
|
||||
};
|
||||
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
|
||||
!unescaped.empty()) {
|
||||
return unescaped;
|
||||
} else {
|
||||
return input;
|
||||
}
|
||||
}
|
||||
|
||||
// Emit a range of original text. This indents as needed, and also inserts preceding gap text.
|
||||
// If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
|
||||
// lines.
|
||||
void emit_text(source_range_t r, gap_flags_t flags) {
|
||||
emit_gap_text_before(r, flags);
|
||||
current_indent = indents.at(r.start);
|
||||
if (r.length > 0) {
|
||||
emit_space_or_indent(flags);
|
||||
output.append(clean_text(substr(r)));
|
||||
}
|
||||
}
|
||||
|
||||
template <type_t Type>
|
||||
void emit_node_text(const leaf_t<Type> &node) {
|
||||
emit_text(node.range, gap_text_flags_before_node(node));
|
||||
}
|
||||
|
||||
// Emit one newline.
|
||||
void emit_newline() { output.push_back(L'\n'); }
|
||||
|
||||
// Emit a semicolon.
|
||||
void emit_semi() { output.push_back(L';'); }
|
||||
|
||||
// For branch and list nodes, default is to visit their children.
|
||||
template <typename Node>
|
||||
enable_if_t<Node::Category == category_t::branch> visit(const Node &node) {
|
||||
node_visitor(*this).accept_children_of(node);
|
||||
}
|
||||
|
||||
template <typename Node>
|
||||
enable_if_t<Node::Category == ast::category_t::list> visit(const Node &node) {
|
||||
node_visitor(*this).accept_children_of(node);
|
||||
}
|
||||
|
||||
// Leaf nodes we just visit their text.
|
||||
void visit(const keyword_base_t &node) { emit_node_text(node); }
|
||||
void visit(const token_base_t &node) { emit_node_text(node); }
|
||||
void visit(const argument_t &node) { emit_node_text(node); }
|
||||
void visit(const variable_assignment_t &node) { emit_node_text(node); }
|
||||
|
||||
void visit(const semi_nl_t &node) {
|
||||
// These are semicolons or newlines which are part of the ast. That means it includes e.g.
|
||||
// ones terminating a job or 'if' header, but not random semis in job lists. We respect
|
||||
// preferred_semi_locations to decide whether or not these should stay as newlines or
|
||||
// become semicolons.
|
||||
|
||||
// Check if we should prefer a semicolon.
|
||||
bool prefer_semi = node.range.length > 0 &&
|
||||
std::binary_search(preferred_semi_locations.begin(),
|
||||
preferred_semi_locations.end(), node.range.start);
|
||||
emit_gap_text_before(node.range, gap_text_flags_before_node(node));
|
||||
|
||||
// Don't emit anything if the gap text put us on a newline (because it had a comment).
|
||||
if (!at_line_start()) {
|
||||
prefer_semi ? emit_semi() : emit_newline();
|
||||
|
||||
// If it was a semi but we emitted a newline, swallow a subsequent newline.
|
||||
if (!prefer_semi && substr(node.range) == L";") {
|
||||
gap_text_mask_newline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const redirection_t &node) {
|
||||
// No space between a redirection operator and its target (#2899).
|
||||
emit_text(node.oper.range, default_flags);
|
||||
emit_text(node.target.range, skip_space);
|
||||
}
|
||||
|
||||
void visit(const maybe_newlines_t &node) {
|
||||
// Our newlines may have comments embedded in them, example:
|
||||
// cmd |
|
||||
// # something
|
||||
// cmd2
|
||||
// Treat it as gap text.
|
||||
if (node.range.length > 0) {
|
||||
auto flags = gap_text_flags_before_node(node);
|
||||
current_indent = indents.at(node.range.start);
|
||||
emit_gap_text_before(node.range, flags);
|
||||
wcstring text = source.substr(node.range.start, node.range.length);
|
||||
emit_gap_text(text, flags);
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const begin_header_t &node) {
|
||||
// 'begin' does not require a newline after it, but we insert one.
|
||||
node_visitor(*this).accept_children_of(node);
|
||||
if (!at_line_start()) {
|
||||
emit_newline();
|
||||
}
|
||||
}
|
||||
|
||||
// The flags we use to parse.
|
||||
static parse_tree_flags_t parse_flags() {
|
||||
return parse_flag_continue_after_error | parse_flag_include_comments |
|
||||
parse_flag_leave_unterminated | parse_flag_show_blank_lines;
|
||||
}
|
||||
};
|
||||
|
||||
// Dump a parse tree node in a form helpful to someone debugging the behavior of this program.
|
||||
static void dump_node(indent_t node_indent, const parse_node_t &node, const wcstring &source) {
|
||||
wchar_t nextc = L' ';
|
||||
wchar_t prevc = L' ';
|
||||
wcstring source_txt;
|
||||
if (node.source_start != SOURCE_OFFSET_INVALID && node.source_length != SOURCE_OFFSET_INVALID) {
|
||||
int nextc_idx = node.source_start + node.source_length;
|
||||
if (static_cast<size_t>(nextc_idx) < source.size()) {
|
||||
nextc = source[node.source_start + node.source_length];
|
||||
}
|
||||
if (node.source_start > 0) prevc = source[node.source_start - 1];
|
||||
source_txt = source.substr(node.source_start, node.source_length);
|
||||
}
|
||||
wchar_t prevc_str[4] = {prevc, 0, 0, 0};
|
||||
wchar_t nextc_str[4] = {nextc, 0, 0, 0};
|
||||
if (prevc < L' ') {
|
||||
prevc_str[0] = L'\\';
|
||||
prevc_str[1] = L'c';
|
||||
prevc_str[2] = prevc + '@';
|
||||
}
|
||||
if (nextc < L' ') {
|
||||
nextc_str[0] = L'\\';
|
||||
nextc_str[1] = L'c';
|
||||
nextc_str[2] = nextc + '@';
|
||||
}
|
||||
std::fwprintf(stderr, L"{off %4u, len %4u, indent %2u, kw %ls, %ls} [%ls|%ls|%ls]\n",
|
||||
node.source_start, node.source_length, node_indent,
|
||||
keyword_description(node.keyword), token_type_description(node.type), prevc_str,
|
||||
source_txt.c_str(), nextc_str);
|
||||
}
|
||||
|
||||
void prettifier_t::prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx,
|
||||
indent_t node_indent, parse_token_type_t parent_type) {
|
||||
// Use an explicit stack to avoid stack overflow.
|
||||
struct pending_node_t {
|
||||
node_offset_t index;
|
||||
indent_t indent;
|
||||
parse_token_type_t parent_type;
|
||||
};
|
||||
std::stack<pending_node_t> pending_node_stack;
|
||||
|
||||
pending_node_stack.push({node_idx, node_indent, parent_type});
|
||||
while (!pending_node_stack.empty()) {
|
||||
pending_node_t args = pending_node_stack.top();
|
||||
pending_node_stack.pop();
|
||||
auto node_idx = args.index;
|
||||
auto node_indent = args.indent;
|
||||
auto parent_type = args.parent_type;
|
||||
|
||||
const parse_node_t &node = tree.at(node_idx);
|
||||
const parse_token_type_t node_type = node.type;
|
||||
const parse_token_type_t prev_node_type =
|
||||
node_idx > 0 ? tree.at(node_idx - 1).type : token_type_invalid;
|
||||
|
||||
// Increment the indent if we are either a root job_list, or root case_item_list, or in an
|
||||
// if or while header (#1665).
|
||||
const bool is_root_job_list =
|
||||
node_type == symbol_job_list && parent_type != symbol_job_list;
|
||||
const bool is_root_case_list =
|
||||
node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
|
||||
const bool is_if_while_header =
|
||||
(node_type == symbol_job_conjunction || node_type == symbol_andor_job_list) &&
|
||||
(parent_type == symbol_if_clause || parent_type == symbol_while_header);
|
||||
|
||||
if (is_root_job_list || is_root_case_list || is_if_while_header) {
|
||||
node_indent += 1;
|
||||
}
|
||||
|
||||
if (dump_parse_tree) dump_node(node_indent, node, source);
|
||||
|
||||
// Prepend any escaped newline, but only for certain cases.
|
||||
// We allow it to split arguments (including at the end - this is like trailing commas in
|
||||
// lists, makes for better diffs), to separate pipelines (but it has to be *before* the
|
||||
// pipe, so the pipe symbol is the first thing on the new line after the indent) and to
|
||||
// separate &&/|| job lists (`and` and `or` are handled separately below, as they *allow*
|
||||
// semicolons)
|
||||
// TODO: Handle
|
||||
// foo | \
|
||||
// bar
|
||||
// so it just removes the escape - pipes don't need it. This was changed in some fish
|
||||
// version, figure out which it was and if it is worth supporting.
|
||||
if (prev_node_type == symbol_arguments_or_redirections_list ||
|
||||
prev_node_type == symbol_argument_list || node_type == parse_token_type_andand ||
|
||||
node_type == parse_token_type_pipe || node_type == parse_token_type_end) {
|
||||
maybe_prepend_escaped_newline(node);
|
||||
}
|
||||
|
||||
// handle comments, which come before the text
|
||||
if (node.has_comments()) {
|
||||
auto comment_nodes = tree.comment_nodes_for_node(node);
|
||||
for (const auto &comment : comment_nodes) {
|
||||
maybe_prepend_escaped_newline(*comment.node());
|
||||
append_whitespace(node_indent);
|
||||
auto source_range = comment.source_range();
|
||||
output.append(source, source_range->start, source_range->length);
|
||||
needs_continuation_newline = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (node_type == parse_token_type_end) {
|
||||
// For historical reasons, semicolon also get "TOK_END".
|
||||
// We need to distinguish between them, because otherwise `a;;;;` gets extra lines
|
||||
// instead of the semicolons. Semicolons are just ignored, unless they are followed by a
|
||||
// command. So `echo;` removes the semicolon, but `echo; echo` removes it and adds a
|
||||
// newline.
|
||||
last_was_semicolon = false;
|
||||
if (node.get_source(source) == L"\n") {
|
||||
append_newline();
|
||||
} else if (!has_new_line) {
|
||||
// The semicolon is only useful if we haven't just had a newline.
|
||||
last_was_semicolon = true;
|
||||
}
|
||||
} else if ((node_type >= FIRST_PARSE_TOKEN_TYPE && node_type <= LAST_PARSE_TOKEN_TYPE) ||
|
||||
node_type == parse_special_type_parse_error) {
|
||||
if (last_was_semicolon) {
|
||||
// We keep the semicolon for `; and` and `; or`,
|
||||
// others we turn into newlines.
|
||||
if (node.keyword != parse_keyword_t::kw_and &&
|
||||
node.keyword != parse_keyword_t::kw_or) {
|
||||
append_newline();
|
||||
} else {
|
||||
output.push_back(L';');
|
||||
}
|
||||
last_was_semicolon = false;
|
||||
}
|
||||
|
||||
if (node.has_source()) {
|
||||
// Some type representing a particular token.
|
||||
if (prev_node_type != parse_token_type_redirection) {
|
||||
append_whitespace(node_indent);
|
||||
}
|
||||
wcstring unescaped{source, node.source_start, node.source_length};
|
||||
// Unescape the string - this leaves special markers around if there are any
|
||||
// expansions or anything. We specifically tell it to not compute backslash-escapes
|
||||
// like \U or \x, because we want to leave them intact.
|
||||
unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
|
||||
|
||||
// Remove INTERNAL_SEPARATOR because that's a quote.
|
||||
auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
|
||||
unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote),
|
||||
unescaped.end());
|
||||
|
||||
// If no non-"good" char is left, use the unescaped version.
|
||||
// This can be extended to other characters, but giving the precise list is tough,
|
||||
// can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
|
||||
// people feel more at ease.
|
||||
auto goodchars = [](wchar_t ch) {
|
||||
return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
|
||||
};
|
||||
if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) ==
|
||||
unescaped.end() &&
|
||||
!unescaped.empty()) {
|
||||
output.append(unescaped);
|
||||
} else {
|
||||
output.append(source, node.source_start, node.source_length);
|
||||
}
|
||||
has_new_line = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Put all children in stack in reversed order
|
||||
// This way they will be processed in correct order.
|
||||
for (node_offset_t idx = node.child_count; idx > 0; idx--) {
|
||||
// Note: We pass our type to our child, which becomes its parent node type.
|
||||
// Note: While node.child_start could be -1 (NODE_OFFSET_INVALID) the addition is safe
|
||||
// because we won't execute this call in that case since node.child_count should be
|
||||
// zero.
|
||||
pending_node_stack.push({node.child_start + (idx - 1), node_indent, node_type});
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
static const char *highlight_role_to_string(highlight_role_t role) {
|
||||
#define TEST_ROLE(x) \
|
||||
@ -394,29 +700,17 @@ static std::string make_pygments_csv(const wcstring &src) {
|
||||
|
||||
// Entry point for prettification.
|
||||
static wcstring prettify(const wcstring &src, bool do_indent) {
|
||||
parse_node_tree_t parse_tree;
|
||||
int parse_flags = (parse_flag_continue_after_error | parse_flag_include_comments |
|
||||
parse_flag_leave_unterminated | parse_flag_show_blank_lines);
|
||||
if (!parse_tree_from_string(src, parse_flags, &parse_tree, nullptr)) {
|
||||
return src; // we return the original string on failure
|
||||
}
|
||||
|
||||
if (dump_parse_tree) {
|
||||
const wcstring dump = parse_dump_tree(parse_tree, src);
|
||||
std::fwprintf(stderr, L"%ls\n", dump.c_str());
|
||||
auto ast =
|
||||
ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
|
||||
parse_flag_show_extra_semis);
|
||||
wcstring ast_dump = ast.dump(src);
|
||||
std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
|
||||
}
|
||||
|
||||
// We may have a forest of disconnected trees on a parse failure. We have to handle all nodes
|
||||
// that have no parent, and all parse errors.
|
||||
prettifier_t prettifier{src, do_indent};
|
||||
for (node_offset_t i = 0; i < parse_tree.size(); i++) {
|
||||
const parse_node_t &node = parse_tree.at(i);
|
||||
if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) {
|
||||
// A root node.
|
||||
prettifier.prettify_node(parse_tree, i, 0, symbol_job_list);
|
||||
}
|
||||
}
|
||||
return std::move(prettifier.output);
|
||||
pretty_printer_t printer{src, do_indent};
|
||||
wcstring output = printer.prettify();
|
||||
return output;
|
||||
}
|
||||
|
||||
/// Given a string and list of colors of the same size, return the string with HTML span elements
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ast.h"
|
||||
#include "autoload.h"
|
||||
#include "builtin.h"
|
||||
#include "color.h"
|
||||
@ -75,7 +76,6 @@
|
||||
#include "signal.h"
|
||||
#include "termsize.h"
|
||||
#include "timer.h"
|
||||
#include "tnode.h"
|
||||
#include "tokenizer.h"
|
||||
#include "topic_monitor.h"
|
||||
#include "utf8.h"
|
||||
@ -978,15 +978,18 @@ static void test_debounce_timeout() {
|
||||
}
|
||||
|
||||
static parser_test_error_bits_t detect_argument_errors(const wcstring &src) {
|
||||
parse_node_tree_t tree;
|
||||
if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL, symbol_argument_list)) {
|
||||
using namespace ast;
|
||||
auto ast = ast_t::parse_argument_list(src, parse_flag_none);
|
||||
if (ast.errored()) {
|
||||
return PARSER_TEST_ERROR;
|
||||
}
|
||||
|
||||
assert(!tree.empty()); //!OCLINT(multiple unary operator)
|
||||
tnode_t<grammar::argument_list> arg_list{&tree, &tree.at(0)};
|
||||
auto first_arg = arg_list.next_in_list<grammar::argument>();
|
||||
return parse_util_detect_errors_in_argument(first_arg, first_arg.get_source(src));
|
||||
const ast::argument_t *first_arg =
|
||||
ast.top()->as<freestanding_argument_list_t>()->arguments.at(0);
|
||||
if (!first_arg) {
|
||||
err(L"Failed to parse an argument");
|
||||
return 0;
|
||||
}
|
||||
return parse_util_detect_errors_in_argument(*first_arg, first_arg->source(src));
|
||||
}
|
||||
|
||||
/// Test the parser.
|
||||
@ -1084,7 +1087,7 @@ static void test_parser() {
|
||||
}
|
||||
|
||||
if (parse_util_detect_errors(L"echo (\nfoo\n bar") != PARSER_TEST_INCOMPLETE) {
|
||||
err(L"unterminated multiline subhsell not reported properly");
|
||||
err(L"unterminated multiline subshell not reported properly");
|
||||
}
|
||||
|
||||
if (parse_util_detect_errors(L"begin ; true ; end | ") != PARSER_TEST_INCOMPLETE) {
|
||||
@ -1268,75 +1271,121 @@ static void test_cancellation() {
|
||||
parser.clear_cancel();
|
||||
}
|
||||
|
||||
namespace indent_tests {
|
||||
// A struct which is either text or a new indent.
|
||||
struct segment_t {
|
||||
// The indent to set
|
||||
int indent{0};
|
||||
const char *text{nullptr};
|
||||
|
||||
/* implicit */ segment_t(int indent) : indent(indent) {}
|
||||
/* implicit */ segment_t(const char *text) : text(text) {}
|
||||
};
|
||||
|
||||
using test_t = std::vector<segment_t>;
|
||||
using test_list_t = std::vector<test_t>;
|
||||
|
||||
// Add a new test to a test list based on a series of ints and texts.
|
||||
template <typename... Types>
|
||||
void add_test(test_list_t *v, const Types &... types) {
|
||||
segment_t segments[] = {types...};
|
||||
v->emplace_back(std::begin(segments), std::end(segments));
|
||||
}
|
||||
} // namespace indent_tests
|
||||
|
||||
static void test_indents() {
|
||||
say(L"Testing indents");
|
||||
using namespace indent_tests;
|
||||
|
||||
// Here are the components of our source and the indents we expect those to be.
|
||||
struct indent_component_t {
|
||||
const wchar_t *txt;
|
||||
int indent;
|
||||
};
|
||||
test_list_t tests;
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
0, "\nend");
|
||||
|
||||
const indent_component_t components1[] = {{L"if foo", 0}, {L"end", 0}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nfoo", //
|
||||
0, "\nend");
|
||||
|
||||
const indent_component_t components2[] = {{L"if foo", 0},
|
||||
{L"", 1}, // trailing newline!
|
||||
{NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
1, "\nend", //
|
||||
0, "\nend");
|
||||
|
||||
const indent_component_t components3[] = {{L"if foo", 0},
|
||||
{L"foo", 1},
|
||||
{L"end", 0}, // trailing newline!
|
||||
{NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
1, "\n", // FIXME: this should be 2 but parse_util_compute_indents has a bug
|
||||
1, "\nend\n");
|
||||
|
||||
const indent_component_t components4[] = {{L"if foo", 0}, {L"if bar", 1}, {L"end", 1},
|
||||
{L"end", 0}, {L"", 0}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
2, "\n");
|
||||
|
||||
const indent_component_t components5[] = {{L"if foo", 0}, {L"if bar", 1}, {L"", 2}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "begin", //
|
||||
1, "\nfoo", //
|
||||
1, "\n");
|
||||
|
||||
const indent_component_t components6[] = {{L"begin", 0}, {L"foo", 1}, {L"", 1}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "begin", //
|
||||
1, "\n;", //
|
||||
0, "end", //
|
||||
0, "\nfoo", 0, "\n");
|
||||
|
||||
const indent_component_t components7[] = {{L"begin", 0}, {L";", 1}, {L"end", 0},
|
||||
{L"foo", 0}, {L"", 0}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "begin", //
|
||||
1, "\n;", //
|
||||
0, "end", //
|
||||
0, "\nfoo", 0, "\n");
|
||||
|
||||
const indent_component_t components8[] = {{L"if foo", 0}, {L"if bar", 1}, {L"baz", 2},
|
||||
{L"end", 1}, {L"", 1}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "if", 1, " foo", //
|
||||
1, "\nif", 2, " bar", //
|
||||
2, "\nbaz", //
|
||||
1, "\nend", 1, "\n");
|
||||
|
||||
const indent_component_t components9[] = {{L"switch foo", 0}, {L"", 1}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "switch foo", //
|
||||
1, "\n" //
|
||||
);
|
||||
|
||||
const indent_component_t components10[] = {
|
||||
{L"switch foo", 0}, {L"case bar", 1}, {L"case baz", 1}, {L"quux", 2}, {L"", 2}, {NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "switch foo", //
|
||||
1, "\ncase bar", //
|
||||
1, "\ncase baz", //
|
||||
2, "\nquux", //
|
||||
2, "\nquux" //
|
||||
);
|
||||
|
||||
const indent_component_t components11[] = {{L"switch foo", 0},
|
||||
{L"cas", 1}, // parse error indentation handling
|
||||
{NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "switch foo", //
|
||||
1, "\ncas" // parse error indentation handling
|
||||
);
|
||||
|
||||
const indent_component_t components12[] = {{L"while false", 0},
|
||||
{L"# comment", 1}, // comment indentation handling
|
||||
{L"command", 1}, // comment indentation handling
|
||||
{L"# comment2", 1}, // comment indentation handling
|
||||
{NULL, -1}};
|
||||
add_test(&tests, //
|
||||
0, "while", 1, " false", //
|
||||
1, "\n# comment", // comment indentation handling
|
||||
1, "\ncommand", //
|
||||
1, "\n# comment 2" //
|
||||
);
|
||||
|
||||
const indent_component_t *tests[] = {components1, components2, components3, components4,
|
||||
components5, components6, components7, components8,
|
||||
components9, components10, components11, components12};
|
||||
for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) {
|
||||
const indent_component_t *components = tests[which];
|
||||
// Count how many we have.
|
||||
size_t component_count = 0;
|
||||
while (components[component_count].txt != NULL) {
|
||||
component_count++;
|
||||
}
|
||||
|
||||
// Generate the expected indents.
|
||||
int test_idx = 0;
|
||||
for (const test_t &test : tests) {
|
||||
// Construct the input text and expected indents.
|
||||
wcstring text;
|
||||
std::vector<int> expected_indents;
|
||||
for (size_t i = 0; i < component_count; i++) {
|
||||
if (i > 0) {
|
||||
text.push_back(L'\n');
|
||||
expected_indents.push_back(components[i].indent);
|
||||
int current_indent = 0;
|
||||
for (const segment_t &segment : test) {
|
||||
if (!segment.text) {
|
||||
current_indent = segment.indent;
|
||||
} else {
|
||||
wcstring tmp = str2wcstring(segment.text);
|
||||
text.append(tmp);
|
||||
expected_indents.insert(expected_indents.end(), tmp.size(), current_indent);
|
||||
}
|
||||
text.append(components[i].txt);
|
||||
expected_indents.resize(text.size(), components[i].indent);
|
||||
}
|
||||
do_test(expected_indents.size() == text.size());
|
||||
|
||||
@ -1350,11 +1399,13 @@ static void test_indents() {
|
||||
do_test(expected_indents.size() == indents.size());
|
||||
for (size_t i = 0; i < text.size(); i++) {
|
||||
if (expected_indents.at(i) != indents.at(i)) {
|
||||
err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i,
|
||||
which + 1, expected_indents.at(i), indents.at(i), text.c_str());
|
||||
break; // don't keep showing errors for the rest of the line
|
||||
err(L"Wrong indent at index %lu (char 0x%02x) in test #%lu (expected %d, actual "
|
||||
L"%d):\n%ls\n",
|
||||
i, text.at(i), test_idx, expected_indents.at(i), indents.at(i), text.c_str());
|
||||
break; // don't keep showing errors for the rest of the test
|
||||
}
|
||||
}
|
||||
test_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4298,12 +4349,12 @@ static void test_new_parser_correctness() {
|
||||
{L"true || false; and true", true},
|
||||
{L"true || ||", false},
|
||||
{L"|| true", false},
|
||||
{L"true || \n\n false", true},
|
||||
{L"true || \n\n false", false},
|
||||
};
|
||||
|
||||
for (const auto &test : parser_tests) {
|
||||
parse_node_tree_t parse_tree;
|
||||
bool success = parse_tree_from_string(test.src, parse_flag_none, &parse_tree, NULL);
|
||||
auto ast = ast::ast_t::parse(test.src);
|
||||
bool success = !ast.errored();
|
||||
if (success && !test.ok) {
|
||||
err(L"\"%ls\" should NOT have parsed, but did", test.src);
|
||||
} else if (!success && test.ok) {
|
||||
@ -4332,7 +4383,7 @@ static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_co
|
||||
}
|
||||
|
||||
static void test_new_parser_fuzzing() {
|
||||
say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
|
||||
say(L"Fuzzing parser");
|
||||
const wcstring fuzzes[] = {
|
||||
L"if", L"else", L"for", L"in", L"while", L"begin", L"function",
|
||||
L"switch", L"case", L"end", L"and", L"or", L"not", L"command",
|
||||
@ -4343,7 +4394,6 @@ static void test_new_parser_fuzzing() {
|
||||
wcstring src;
|
||||
src.reserve(128);
|
||||
|
||||
parse_node_tree_t node_tree;
|
||||
parse_error_list_t errors;
|
||||
|
||||
double start = timef();
|
||||
@ -4357,7 +4407,7 @@ static void test_new_parser_fuzzing() {
|
||||
unsigned long permutation = 0;
|
||||
while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++,
|
||||
&src)) {
|
||||
parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
|
||||
ast::ast_t::parse(src);
|
||||
}
|
||||
if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation);
|
||||
}
|
||||
@ -4369,33 +4419,36 @@ static void test_new_parser_fuzzing() {
|
||||
// true if successful.
|
||||
static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args,
|
||||
enum parse_statement_decoration_t *out_deco) {
|
||||
using namespace ast;
|
||||
out_cmd->clear();
|
||||
out_joined_args->clear();
|
||||
*out_deco = parse_statement_decoration_none;
|
||||
|
||||
parse_node_tree_t tree;
|
||||
if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
|
||||
return false;
|
||||
}
|
||||
auto ast = ast_t::parse(src);
|
||||
if (ast.errored()) return false;
|
||||
|
||||
// Get the statement. Should only have one.
|
||||
tnode_t<grammar::job_list> job_list{&tree, &tree.at(0)};
|
||||
auto stmts = job_list.descendants<grammar::plain_statement>();
|
||||
if (stmts.size() != 1) {
|
||||
say(L"Unexpected number of statements (%lu) found in '%ls'", stmts.size(), src.c_str());
|
||||
return false;
|
||||
const decorated_statement_t *statement = nullptr;
|
||||
for (const auto &n : ast) {
|
||||
if (const auto *tmp = n.try_as<decorated_statement_t>()) {
|
||||
if (statement) {
|
||||
say(L"More than one decorated statement found in '%ls'", src.c_str());
|
||||
return false;
|
||||
}
|
||||
statement = tmp;
|
||||
}
|
||||
}
|
||||
tnode_t<grammar::plain_statement> stmt = stmts.at(0);
|
||||
|
||||
// Return its decoration and command.
|
||||
*out_deco = get_decoration(stmt);
|
||||
*out_cmd = *command_for_plain_statement(stmt, src);
|
||||
*out_deco = statement->decoration();
|
||||
*out_cmd = statement->command.source(src);
|
||||
|
||||
// Return arguments separated by spaces.
|
||||
bool first = true;
|
||||
for (auto arg_node : stmt.descendants<grammar::argument>()) {
|
||||
for (const ast::argument_or_redirection_t &arg : statement->args_or_redirs) {
|
||||
if (!arg.is_argument()) continue;
|
||||
if (!first) out_joined_args->push_back(L' ');
|
||||
out_joined_args->append(arg_node.get_source(src));
|
||||
out_joined_args->append(arg.source(src));
|
||||
first = false;
|
||||
}
|
||||
|
||||
@ -4404,19 +4457,22 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o
|
||||
|
||||
// Verify that 'function -h' and 'function --help' are plain statements but 'function --foo' is
|
||||
// not (issue #1240).
|
||||
template <typename Type>
|
||||
template <ast::type_t Type>
|
||||
static void check_function_help(const wchar_t *src) {
|
||||
parse_node_tree_t tree;
|
||||
if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
|
||||
using namespace ast;
|
||||
auto ast = ast_t::parse(src);
|
||||
if (ast.errored()) {
|
||||
err(L"Failed to parse '%ls'", src);
|
||||
}
|
||||
|
||||
tnode_t<grammar::job_list> node{&tree, &tree.at(0)};
|
||||
auto node_list = node.descendants<Type>();
|
||||
if (node_list.size() == 0) {
|
||||
err(L"Failed to find node of type '%ls'", token_type_description(Type::token));
|
||||
} else if (node_list.size() > 1) {
|
||||
err(L"Found too many nodes of type '%ls'", token_type_description(Type::token));
|
||||
int count = 0;
|
||||
for (const node_t &node : ast) {
|
||||
count += (node.type == Type);
|
||||
}
|
||||
if (count == 0) {
|
||||
err(L"Failed to find node of type '%ls'", ast_type_to_string(Type));
|
||||
} else if (count > 1) {
|
||||
err(L"Found too many nodes of type '%ls'", ast_type_to_string(Type));
|
||||
}
|
||||
}
|
||||
|
||||
@ -4463,30 +4519,32 @@ static void test_new_parser_ll2() {
|
||||
test.src.c_str(), (int)test.deco, (int)deco, (long)__LINE__);
|
||||
}
|
||||
|
||||
check_function_help<grammar::plain_statement>(L"function -h");
|
||||
check_function_help<grammar::plain_statement>(L"function --help");
|
||||
check_function_help<grammar::function_header>(L"function --foo; end");
|
||||
check_function_help<grammar::function_header>(L"function foo; end");
|
||||
check_function_help<ast::type_t::decorated_statement>(L"function -h");
|
||||
check_function_help<ast::type_t::decorated_statement>(L"function --help");
|
||||
check_function_help<ast::type_t::function_header>(L"function --foo; end");
|
||||
check_function_help<ast::type_t::function_header>(L"function foo; end");
|
||||
}
|
||||
|
||||
static void test_new_parser_ad_hoc() {
|
||||
using namespace ast;
|
||||
// Very ad-hoc tests for issues encountered.
|
||||
say(L"Testing new parser ad hoc tests");
|
||||
|
||||
// Ensure that 'case' terminates a job list.
|
||||
const wcstring src = L"switch foo ; case bar; case baz; end";
|
||||
parse_node_tree_t parse_tree;
|
||||
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
|
||||
if (!success) {
|
||||
auto ast = ast_t::parse(src);
|
||||
if (ast.errored()) {
|
||||
err(L"Parsing failed");
|
||||
}
|
||||
|
||||
// Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd
|
||||
// Expect two case_item_lists. The bug was that we'd
|
||||
// try to run a command 'case'.
|
||||
tnode_t<grammar::job_list> root{&parse_tree, &parse_tree.at(0)};
|
||||
auto node_list = root.descendants<grammar::case_item_list>();
|
||||
if (node_list.size() != 3) {
|
||||
err(L"Expected 3 case item nodes, found %lu", node_list.size());
|
||||
int count = 0;
|
||||
for (const auto &n : ast) {
|
||||
count += (n.type == type_t::case_item);
|
||||
}
|
||||
if (count != 2) {
|
||||
err(L"Expected 2 case item nodes, found %d", count);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4507,7 +4565,9 @@ static void test_new_parser_errors() {
|
||||
{L"if true ; end ; else", parse_error_unbalancing_else},
|
||||
|
||||
{L"case", parse_error_unbalancing_case},
|
||||
{L"if true ; case ; end", parse_error_unbalancing_case},
|
||||
{L"if true ; case ; end", parse_error_generic},
|
||||
|
||||
{L"true | and", parse_error_andor_in_pipeline},
|
||||
};
|
||||
|
||||
for (const auto &test : tests) {
|
||||
@ -4515,15 +4575,17 @@ static void test_new_parser_errors() {
|
||||
parse_error_code_t expected_code = test.code;
|
||||
|
||||
parse_error_list_t errors;
|
||||
parse_node_tree_t parse_tree;
|
||||
bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
|
||||
if (success) {
|
||||
auto ast = ast::ast_t::parse(src, parse_flag_none, &errors);
|
||||
if (!ast.errored()) {
|
||||
err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
|
||||
}
|
||||
|
||||
if (errors.size() != 1) {
|
||||
err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors",
|
||||
src.c_str(), errors.size());
|
||||
for (const auto &err : errors) {
|
||||
fprintf(stderr, "%ls\n", err.describe(src, false).c_str());
|
||||
}
|
||||
} else if (errors.at(0).code != expected_code) {
|
||||
err(L"Source '%ls' was expected to produce error code %lu, but instead produced error "
|
||||
L"code %lu",
|
||||
@ -4862,6 +4924,12 @@ static void test_highlighting() {
|
||||
{L")", highlight_role_t::error},
|
||||
});
|
||||
|
||||
highlight_tests.push_back({
|
||||
{L"echo", highlight_role_t::command},
|
||||
{L"stuff", highlight_role_t::param},
|
||||
{L"# comment", highlight_role_t::comment},
|
||||
});
|
||||
|
||||
auto &vars = parser_t::principal_parser().vars();
|
||||
// Verify variables and wildcards in commands using /bin/cat.
|
||||
vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"});
|
||||
|
@ -64,9 +64,7 @@ class category_list_t {
|
||||
category_t exec_fork{L"exec-fork", L"Calls to fork()"};
|
||||
|
||||
category_t output_invalid{L"output-invalid", L"Trying to print invalid output"};
|
||||
category_t parse_productions{L"parse-productions", L"Resolving tokens"};
|
||||
category_t parse_productions_chatty{L"parse-productions-chatty",
|
||||
L"Resolving tokens (chatty messages)"};
|
||||
category_t ast_construction{L"ast-construction", L"Parsing fish AST"};
|
||||
|
||||
category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"};
|
||||
|
||||
|
@ -224,17 +224,14 @@ bool function_get_definition(const wcstring &name, wcstring &out_definition) {
|
||||
const function_info_t *func = funcset->get_info(name);
|
||||
if (!func || !func->props) return false;
|
||||
// We want to preserve comments that the AST attaches to the header (#5285).
|
||||
// Take everything from the end of the header to the end of the body.
|
||||
// Take everything from the end of the header to the 'end' keyword.
|
||||
const auto &props = func->props;
|
||||
namespace g = grammar;
|
||||
tnode_t<g::block_header> header = props->func_node.child<0>();
|
||||
tnode_t<g::job_list> jobs = props->func_node.child<1>();
|
||||
auto header_src = header.source_range();
|
||||
auto jobs_src = jobs.source_range();
|
||||
if (header_src && jobs_src) {
|
||||
auto header_src = props->func_node->header->try_source_range();
|
||||
auto end_kw_src = props->func_node->end.try_source_range();
|
||||
if (header_src && end_kw_src) {
|
||||
uint32_t body_start = header_src->start + header_src->length;
|
||||
uint32_t body_end = jobs_src->start + jobs_src->length;
|
||||
assert(body_start <= jobs_src->start && "job list must come after header");
|
||||
uint32_t body_end = end_kw_src->start;
|
||||
assert(body_start <= body_end && "end keyword should come after header");
|
||||
out_definition = wcstring(props->parsed_source->src, body_start, body_end - body_start);
|
||||
}
|
||||
return true;
|
||||
@ -313,7 +310,7 @@ int function_get_definition_lineno(const wcstring &name) {
|
||||
// return one plus the number of newlines at offsets less than the start of our function's
|
||||
// statement (which includes the header).
|
||||
// TODO: merge with line_offset_of_character_at_offset?
|
||||
auto source_range = func->props->func_node.source_range();
|
||||
auto source_range = func->props->func_node->try_source_range();
|
||||
assert(source_range && "Function has no source range");
|
||||
uint32_t func_start = source_range->start;
|
||||
const wcstring &source = func->props->parsed_source->src;
|
||||
|
@ -11,10 +11,13 @@
|
||||
#include "env.h"
|
||||
#include "event.h"
|
||||
#include "parse_tree.h"
|
||||
#include "tnode.h"
|
||||
|
||||
class parser_t;
|
||||
|
||||
namespace ast {
|
||||
struct block_statement_t;
|
||||
}
|
||||
|
||||
/// A function's constant properties. These do not change once initialized.
|
||||
struct function_properties_t {
|
||||
/// Parsed source containing the function.
|
||||
@ -23,7 +26,7 @@ struct function_properties_t {
|
||||
/// Node containing the function statement, pointing into parsed_source.
|
||||
/// We store block_statement, not job_list, so that comments attached to the header are
|
||||
/// preserved.
|
||||
tnode_t<grammar::block_statement> func_node;
|
||||
const ast::block_statement_t *func_node;
|
||||
|
||||
/// List of all named arguments for this function.
|
||||
wcstring_list_t named_arguments;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include "ast.h"
|
||||
#include "builtin.h"
|
||||
#include "color.h"
|
||||
#include "common.h"
|
||||
@ -31,14 +32,11 @@
|
||||
#include "parse_util.h"
|
||||
#include "parser.h"
|
||||
#include "path.h"
|
||||
#include "tnode.h"
|
||||
#include "tokenizer.h"
|
||||
#include "wcstringutil.h"
|
||||
#include "wildcard.h"
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
|
||||
namespace g = grammar;
|
||||
|
||||
#define CURSOR_POSITION_INVALID static_cast<size_t>(-1)
|
||||
|
||||
static const wchar_t *get_highlight_var_name(highlight_role_t role) {
|
||||
@ -338,12 +336,11 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d
|
||||
|
||||
// Given a plain statement node in a parse tree, get the command and return it, expanded
|
||||
// appropriately for commands. If we succeed, return true.
|
||||
static bool plain_statement_get_expanded_command(const wcstring &src,
|
||||
tnode_t<g::plain_statement> stmt,
|
||||
const operation_context_t &ctx,
|
||||
wcstring *out_cmd) {
|
||||
static bool statement_get_expanded_command(const wcstring &src,
|
||||
const ast::decorated_statement_t &stmt,
|
||||
const operation_context_t &ctx, wcstring *out_cmd) {
|
||||
// Get the command. Try expanding it. If we cannot, it's an error.
|
||||
maybe_t<wcstring> cmd = command_for_plain_statement(stmt, src);
|
||||
maybe_t<wcstring> cmd = stmt.command.source(src);
|
||||
if (!cmd) return false;
|
||||
expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr);
|
||||
return err == expand_result_t::ok;
|
||||
@ -384,6 +381,9 @@ rgb_color_t highlight_get_color(const highlight_spec_t &highlight, bool is_backg
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration,
|
||||
const wcstring &working_directory, const environment_t &vars);
|
||||
|
||||
static bool has_expand_reserved(const wcstring &str) {
|
||||
bool result = false;
|
||||
for (auto wc : str) {
|
||||
@ -399,27 +399,22 @@ static bool has_expand_reserved(const wcstring &str) {
|
||||
// command (as a string), if any. This is used to validate autosuggestions.
|
||||
static bool autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx,
|
||||
wcstring *out_expanded_command, wcstring *out_arg) {
|
||||
// Parse the buffer.
|
||||
parse_node_tree_t parse_tree;
|
||||
parse_tree_from_string(buff,
|
||||
parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
|
||||
&parse_tree, nullptr);
|
||||
auto ast = ast::ast_t::parse(
|
||||
buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens);
|
||||
|
||||
// Find the first statement.
|
||||
tnode_t<g::plain_statement> first_statement{};
|
||||
for (const auto &node : parse_tree) {
|
||||
if (node.type == symbol_plain_statement) {
|
||||
first_statement = tnode_t<g::plain_statement>(&parse_tree, &node);
|
||||
break;
|
||||
}
|
||||
const ast::decorated_statement_t *first_statement = nullptr;
|
||||
if (const ast::job_conjunction_t *jc = ast.top()->as<ast::job_list_t>()->at(0)) {
|
||||
first_statement = jc->job.statement.contents->try_as<ast::decorated_statement_t>();
|
||||
}
|
||||
|
||||
if (first_statement &&
|
||||
plain_statement_get_expanded_command(buff, first_statement, ctx, out_expanded_command)) {
|
||||
// Find the first argument.
|
||||
auto args_and_redirs = first_statement.child<1>();
|
||||
if (auto arg = args_and_redirs.next_in_list<grammar::argument>()) {
|
||||
*out_arg = arg.get_source(buff);
|
||||
statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) {
|
||||
// Check if the first argument or redirection is, in fact, an argument.
|
||||
if (const auto *arg_or_redir = first_statement->args_or_redirs.at(0)) {
|
||||
if (arg_or_redir && arg_or_redir->is_argument()) {
|
||||
*out_arg = arg_or_redir->argument().source(buff);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -775,31 +770,56 @@ class highlighter_t {
|
||||
const bool io_ok;
|
||||
// Working directory.
|
||||
const wcstring working_directory;
|
||||
// The ast we produced.
|
||||
ast::ast_t ast;
|
||||
// The resulting colors.
|
||||
using color_array_t = std::vector<highlight_spec_t>;
|
||||
color_array_t color_array;
|
||||
// The parse tree of the buff.
|
||||
parse_node_tree_t parse_tree;
|
||||
|
||||
// Flags we use for AST parsing.
|
||||
static constexpr parse_tree_flags_t ast_flags =
|
||||
parse_flag_continue_after_error | parse_flag_include_comments |
|
||||
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated |
|
||||
parse_flag_show_extra_semis;
|
||||
|
||||
// Color a command.
|
||||
void color_command(tnode_t<g::tok_string> node);
|
||||
// Color an argument.
|
||||
void color_argument(tnode_t<g::tok_string> node);
|
||||
void color_command(const ast::string_t &node);
|
||||
// Color a node as if it were an argument.
|
||||
void color_as_argument(const ast::node_t &node);
|
||||
// Color a redirection.
|
||||
void color_redirection(tnode_t<g::redirection> node);
|
||||
// Color a list of arguments. If cmd_is_cd is true, then the arguments are for 'cd'; detect
|
||||
// invalid directories.
|
||||
void color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd = false);
|
||||
// Color the redirections of the given node.
|
||||
void color_redirections(tnode_t<g::arguments_or_redirections_list> list);
|
||||
void color_redirection(const ast::redirection_t &node);
|
||||
// Color all the children of the command with the given type.
|
||||
void color_children(const parse_node_t &parent, parse_token_type_t type,
|
||||
highlight_spec_t color);
|
||||
void color_children(const ast::node_t &parent, ast::type_t type, highlight_spec_t color);
|
||||
// Colors the source range of a node with a given color.
|
||||
void color_node(const parse_node_t &node, highlight_spec_t color);
|
||||
void color_node(const ast::node_t &node, highlight_spec_t color);
|
||||
// Colors a range with a given color.
|
||||
void color_range(source_range_t range, highlight_spec_t color);
|
||||
// return whether a plain statement is 'cd'.
|
||||
bool is_cd(tnode_t<g::plain_statement> stmt) const;
|
||||
bool is_cd(const ast::decorated_statement_t &stmt) const;
|
||||
|
||||
/// \return a substring of our buffer.
|
||||
wcstring get_source(source_range_t r) const;
|
||||
|
||||
public:
|
||||
// Visit the children of a node.
|
||||
void visit_children(const ast::node_t &node) {
|
||||
ast::node_visitor(*this).accept_children_of(&node);
|
||||
}
|
||||
|
||||
// AST visitor implementations.
|
||||
void visit(const ast::keyword_base_t &kw);
|
||||
void visit(const ast::token_base_t &tok);
|
||||
void visit(const ast::redirection_t &redir);
|
||||
void visit(const ast::variable_assignment_t &varas);
|
||||
void visit(const ast::semi_nl_t &semi_nl);
|
||||
void visit(const ast::decorated_statement_t &stmt);
|
||||
|
||||
// Visit an argument, perhaps knowing that our command is cd.
|
||||
void visit(const ast::argument_t &arg, bool cmd_is_cd = false);
|
||||
|
||||
// Default implementation is to just visit children.
|
||||
void visit(const ast::node_t &node) { visit_children(node); }
|
||||
|
||||
// Constructor
|
||||
highlighter_t(const wcstring &str, size_t pos, const operation_context_t &ctx, wcstring wd,
|
||||
bool can_do_io)
|
||||
@ -808,52 +828,44 @@ class highlighter_t {
|
||||
ctx(ctx),
|
||||
io_ok(can_do_io),
|
||||
working_directory(std::move(wd)),
|
||||
color_array(str.size()) {
|
||||
// Parse the tree.
|
||||
parse_tree_from_string(buff,
|
||||
parse_flag_continue_after_error | parse_flag_include_comments |
|
||||
parse_flag_accept_incomplete_tokens,
|
||||
&this->parse_tree, nullptr);
|
||||
}
|
||||
ast(ast::ast_t::parse(buff, ast_flags)) {}
|
||||
|
||||
// Perform highlighting, returning an array of colors.
|
||||
color_array_t highlight();
|
||||
};
|
||||
|
||||
void highlighter_t::color_node(const parse_node_t &node, highlight_spec_t color) {
|
||||
// Can only color nodes with valid source ranges.
|
||||
if (!node.has_source() || node.source_length == 0) return;
|
||||
|
||||
// Fill the color array with our color in the corresponding range.
|
||||
size_t source_end = node.source_start + node.source_length;
|
||||
assert(source_end >= node.source_start);
|
||||
assert(source_end <= color_array.size());
|
||||
|
||||
std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end,
|
||||
color);
|
||||
wcstring highlighter_t::get_source(source_range_t r) const {
|
||||
assert(r.start + r.length >= r.start && "Overflow");
|
||||
assert(r.start + r.length <= this->buff.size() && "Out of range");
|
||||
return this->buff.substr(r.start, r.length);
|
||||
}
|
||||
|
||||
void highlighter_t::color_command(tnode_t<g::tok_string> node) {
|
||||
auto source_range = node.source_range();
|
||||
if (!source_range) return;
|
||||
void highlighter_t::color_node(const ast::node_t &node, highlight_spec_t color) {
|
||||
color_range(node.source_range(), color);
|
||||
}
|
||||
|
||||
const wcstring cmd_str = node.get_source(this->buff);
|
||||
void highlighter_t::color_range(source_range_t range, highlight_spec_t color) {
|
||||
assert(range.start + range.length <= this->color_array.size() && "Range out of bounds");
|
||||
std::fill_n(this->color_array.begin() + range.start, range.length, color);
|
||||
}
|
||||
|
||||
void highlighter_t::color_command(const ast::string_t &node) {
|
||||
source_range_t source_range = node.source_range();
|
||||
const wcstring cmd_str = get_source(source_range);
|
||||
|
||||
// Get an iterator to the colors associated with the argument.
|
||||
const size_t arg_start = source_range->start;
|
||||
const size_t arg_start = source_range.start;
|
||||
const color_array_t::iterator colors = color_array.begin() + arg_start;
|
||||
color_string_internal(cmd_str, highlight_role_t::command, colors);
|
||||
}
|
||||
|
||||
// node does not necessarily have type symbol_argument here.
|
||||
void highlighter_t::color_argument(tnode_t<g::tok_string> node) {
|
||||
void highlighter_t::color_as_argument(const ast::node_t &node) {
|
||||
auto source_range = node.source_range();
|
||||
if (!source_range) return;
|
||||
|
||||
const wcstring arg_str = node.get_source(this->buff);
|
||||
const wcstring arg_str = get_source(source_range);
|
||||
|
||||
// Get an iterator to the colors associated with the argument.
|
||||
const size_t arg_start = source_range->start;
|
||||
const size_t arg_start = source_range.start;
|
||||
const color_array_t::iterator arg_colors = color_array.begin() + arg_start;
|
||||
|
||||
// Color this argument without concern for command substitutions.
|
||||
@ -905,15 +917,13 @@ void highlighter_t::color_argument(tnode_t<g::tok_string> node) {
|
||||
|
||||
/// Indicates whether the source range of the given node forms a valid path in the given
|
||||
/// working_directory.
|
||||
static bool node_is_potential_path(const wcstring &src, const parse_node_t &node,
|
||||
const operation_context_t &ctx,
|
||||
const wcstring &working_directory) {
|
||||
if (!node.has_source()) return false;
|
||||
|
||||
static bool range_is_potential_path(const wcstring &src, const source_range_t &range,
|
||||
const operation_context_t &ctx,
|
||||
const wcstring &working_directory) {
|
||||
// Get the node source, unescape it, and then pass it to is_potential_path along with the
|
||||
// working directory (as a one element list).
|
||||
bool result = false;
|
||||
wcstring token(src, node.source_start, node.source_length);
|
||||
wcstring token = src.substr(range.start, range.length);
|
||||
if (unescape_string_in_place(&token, UNESCAPE_SPECIAL)) {
|
||||
// Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY.
|
||||
// Put it back.
|
||||
@ -925,172 +935,257 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node
|
||||
return result;
|
||||
}
|
||||
|
||||
bool highlighter_t::is_cd(tnode_t<g::plain_statement> stmt) const {
|
||||
bool cmd_is_cd = false;
|
||||
if (this->io_ok && stmt.has_source()) {
|
||||
wcstring cmd_str;
|
||||
if (plain_statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
|
||||
cmd_is_cd = (cmd_str == L"cd");
|
||||
}
|
||||
bool highlighter_t::is_cd(const ast::decorated_statement_t &stmt) const {
|
||||
wcstring cmd_str;
|
||||
if (this->io_ok && statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
|
||||
return cmd_str == L"cd";
|
||||
}
|
||||
return cmd_is_cd;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Color all of the arguments of the given node list, which should be argument_list or
|
||||
// argument_or_redirection_list.
|
||||
void highlighter_t::color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd) {
|
||||
// Find all the arguments of this list.
|
||||
for (tnode_t<g::argument> arg : args) {
|
||||
this->color_argument(arg.child<0>());
|
||||
void highlighter_t::visit(const ast::keyword_base_t &kw) {
|
||||
highlight_role_t role = highlight_role_t::normal;
|
||||
switch (kw.kw) {
|
||||
case parse_keyword_t::kw_begin:
|
||||
case parse_keyword_t::kw_builtin:
|
||||
case parse_keyword_t::kw_case:
|
||||
case parse_keyword_t::kw_command:
|
||||
case parse_keyword_t::kw_else:
|
||||
case parse_keyword_t::kw_end:
|
||||
case parse_keyword_t::kw_exec:
|
||||
case parse_keyword_t::kw_for:
|
||||
case parse_keyword_t::kw_function:
|
||||
case parse_keyword_t::kw_if:
|
||||
case parse_keyword_t::kw_in:
|
||||
case parse_keyword_t::kw_switch:
|
||||
case parse_keyword_t::kw_while:
|
||||
role = highlight_role_t::command;
|
||||
break;
|
||||
|
||||
if (cmd_is_cd) {
|
||||
// Mark this as an error if it's not 'help' and not a valid cd path.
|
||||
wcstring param = arg.get_source(this->buff);
|
||||
if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
|
||||
bool is_help = string_prefixes_string(param, L"--help") ||
|
||||
string_prefixes_string(param, L"-h");
|
||||
if (!is_help && this->io_ok &&
|
||||
!is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
|
||||
this->color_node(arg, highlight_role_t::error);
|
||||
}
|
||||
case parse_keyword_t::kw_and:
|
||||
case parse_keyword_t::kw_or:
|
||||
case parse_keyword_t::kw_not:
|
||||
case parse_keyword_t::kw_exclam:
|
||||
case parse_keyword_t::kw_time:
|
||||
role = highlight_role_t::operat;
|
||||
break;
|
||||
|
||||
case parse_keyword_t::none:
|
||||
break;
|
||||
}
|
||||
color_node(kw, role);
|
||||
}
|
||||
|
||||
void highlighter_t::visit(const ast::token_base_t &tok) {
|
||||
maybe_t<highlight_role_t> role = highlight_role_t::normal;
|
||||
switch (tok.type) {
|
||||
case parse_token_type_end:
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_background:
|
||||
role = highlight_role_t::statement_terminator;
|
||||
break;
|
||||
|
||||
case parse_token_type_andand:
|
||||
case parse_token_type_oror:
|
||||
role = highlight_role_t::operat;
|
||||
break;
|
||||
|
||||
case parse_token_type_string:
|
||||
// Assume all strings are params. This handles e.g. the variables a for header or
|
||||
// function header. Other strings (like arguments to commands) need more complex
|
||||
// handling, which occurs in their respective overrides of visit().
|
||||
role = highlight_role_t::param;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (role) color_node(tok, *role);
|
||||
}
|
||||
|
||||
void highlighter_t::visit(const ast::semi_nl_t &semi_nl) {
|
||||
color_node(semi_nl, highlight_role_t::statement_terminator);
|
||||
}
|
||||
|
||||
void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd) {
|
||||
color_as_argument(arg);
|
||||
if (cmd_is_cd && io_ok) {
|
||||
// Mark this as an error if it's not 'help' and not a valid cd path.
|
||||
wcstring param = arg.source(this->buff);
|
||||
if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
|
||||
bool is_help =
|
||||
string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h");
|
||||
if (!is_help && this->io_ok &&
|
||||
!is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
|
||||
this->color_node(arg, highlight_role_t::error);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node) {
|
||||
if (!redirection_node.has_source()) return;
|
||||
void highlighter_t::visit(const ast::variable_assignment_t &varas) {
|
||||
color_as_argument(varas);
|
||||
// TODO: Color the '=' in the variable assignment as an operator, for fun.
|
||||
// if (auto where = variable_assignment_equals_pos(varas.source(this->buff))) {
|
||||
// this->color_array.at(*where) = highlight_role_t::operat;
|
||||
// }
|
||||
}
|
||||
|
||||
tnode_t<g::tok_redirection> redir_prim = redirection_node.child<0>(); // like 2>
|
||||
tnode_t<g::tok_string> redir_target = redirection_node.child<1>(); // like &1 or file path
|
||||
void highlighter_t::visit(const ast::decorated_statement_t &stmt) {
|
||||
// Color any decoration.
|
||||
if (stmt.opt_decoration) this->visit(*stmt.opt_decoration);
|
||||
|
||||
if (redir_prim) {
|
||||
wcstring target;
|
||||
const maybe_t<pipe_or_redir_t> redirect =
|
||||
redirection_for_node(redirection_node, this->buff, &target);
|
||||
// Color the command's source code.
|
||||
// If we get no source back, there's nothing to color.
|
||||
maybe_t<wcstring> cmd = stmt.command.try_source(this->buff);
|
||||
if (!cmd.has_value()) return;
|
||||
|
||||
// We may get a missing redirection type if the redirection is invalid.
|
||||
auto hl = (redirect && redirect->is_valid()) ? highlight_role_t::redirection
|
||||
: highlight_role_t::error;
|
||||
this->color_node(redir_prim, hl);
|
||||
wcstring expanded_cmd;
|
||||
bool is_valid_cmd = false;
|
||||
if (!this->io_ok) {
|
||||
// We cannot check if the command is invalid, so just assume it's valid.
|
||||
is_valid_cmd = true;
|
||||
} else if (variable_assignment_equals_pos(*cmd)) {
|
||||
is_valid_cmd = true;
|
||||
} else {
|
||||
// Check to see if the command is valid.
|
||||
// Try expanding it. If we cannot, it's an error.
|
||||
bool expanded = statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
|
||||
if (expanded && !has_expand_reserved(expanded_cmd)) {
|
||||
is_valid_cmd =
|
||||
command_is_valid(expanded_cmd, stmt.decoration(), working_directory, ctx.vars);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the argument contains a command substitution. If so, highlight it as a param
|
||||
// even though it's a command redirection, and don't try to do any other validation.
|
||||
if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
|
||||
this->color_argument(redir_target);
|
||||
// Color our statement.
|
||||
if (is_valid_cmd) {
|
||||
this->color_command(stmt.command);
|
||||
} else {
|
||||
this->color_node(stmt.command, highlight_role_t::error);
|
||||
}
|
||||
|
||||
// Color arguments and redirections.
|
||||
// Except if our command is 'cd' we have special logic for how arguments are colored.
|
||||
bool is_cd = (expanded_cmd == L"cd");
|
||||
for (const ast::argument_or_redirection_t &v : stmt.args_or_redirs) {
|
||||
if (v.is_argument()) {
|
||||
this->visit(v.argument(), is_cd);
|
||||
} else {
|
||||
// No command substitution, so we can highlight the target file or fd. For example,
|
||||
// disallow redirections into a non-existent directory.
|
||||
bool target_is_valid = true;
|
||||
this->visit(v.redirection());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!redirect || !redirect->is_valid()) {
|
||||
// not a valid redirection
|
||||
target_is_valid = false;
|
||||
} else if (!this->io_ok) {
|
||||
// I/O is disallowed, so we don't have much hope of catching anything but gross
|
||||
// errors. Assume it's valid.
|
||||
target_is_valid = true;
|
||||
} else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
|
||||
// Could not be expanded.
|
||||
target_is_valid = false;
|
||||
} else {
|
||||
// Ok, we successfully expanded our target. Now verify that it works with this
|
||||
// redirection. We will probably need it as a path (but not in the case of fd
|
||||
// redirections). Note that the target is now unescaped.
|
||||
const wcstring target_path =
|
||||
path_apply_working_directory(target, this->working_directory);
|
||||
switch (redirect->mode) {
|
||||
case redirection_mode_t::fd: {
|
||||
if (target == L"-") {
|
||||
target_is_valid = true;
|
||||
} else {
|
||||
int fd = fish_wcstoi(target.c_str());
|
||||
target_is_valid = !errno && fd >= 0;
|
||||
}
|
||||
break;
|
||||
void highlighter_t::visit(const ast::redirection_t &redir) {
|
||||
maybe_t<pipe_or_redir_t> oper =
|
||||
pipe_or_redir_t::from_string(redir.oper.source(this->buff)); // like 2>
|
||||
wcstring target = redir.target.source(this->buff); // like &1 or file path
|
||||
|
||||
assert(oper.has_value() &&
|
||||
"Should have successfully parsed a pipe_or_redir_t since it was in our ast");
|
||||
|
||||
// Color the > part.
|
||||
// It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1)
|
||||
// If so, color the whole thing invalid and stop.
|
||||
if (!oper->is_valid()) {
|
||||
this->color_node(redir, highlight_role_t::error);
|
||||
return;
|
||||
}
|
||||
|
||||
// Color the operator part like 2>.
|
||||
this->color_node(redir.oper, highlight_role_t::redirection);
|
||||
|
||||
// Color the target part.
|
||||
// Check if the argument contains a command substitution. If so, highlight it as a param
|
||||
// even though it's a command redirection, and don't try to do any other validation.
|
||||
if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
|
||||
this->color_as_argument(redir.target);
|
||||
} else {
|
||||
// No command substitution, so we can highlight the target file or fd. For example,
|
||||
// disallow redirections into a non-existent directory.
|
||||
bool target_is_valid = true;
|
||||
if (!this->io_ok) {
|
||||
// I/O is disallowed, so we don't have much hope of catching anything but gross
|
||||
// errors. Assume it's valid.
|
||||
target_is_valid = true;
|
||||
} else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
|
||||
// Could not be expanded.
|
||||
target_is_valid = false;
|
||||
} else {
|
||||
// Ok, we successfully expanded our target. Now verify that it works with this
|
||||
// redirection. We will probably need it as a path (but not in the case of fd
|
||||
// redirections). Note that the target is now unescaped.
|
||||
const wcstring target_path =
|
||||
path_apply_working_directory(target, this->working_directory);
|
||||
switch (oper->mode) {
|
||||
case redirection_mode_t::fd: {
|
||||
if (target == L"-") {
|
||||
target_is_valid = true;
|
||||
} else {
|
||||
int fd = fish_wcstoi(target.c_str());
|
||||
target_is_valid = !errno && fd >= 0;
|
||||
}
|
||||
case redirection_mode_t::input: {
|
||||
// Input redirections must have a readable non-directory.
|
||||
struct stat buf = {};
|
||||
target_is_valid = !waccess(target_path, R_OK) &&
|
||||
!wstat(target_path, &buf) && !S_ISDIR(buf.st_mode);
|
||||
break;
|
||||
break;
|
||||
}
|
||||
case redirection_mode_t::input: {
|
||||
// Input redirections must have a readable non-directory.
|
||||
struct stat buf = {};
|
||||
target_is_valid = !waccess(target_path, R_OK) && !wstat(target_path, &buf) &&
|
||||
!S_ISDIR(buf.st_mode);
|
||||
break;
|
||||
}
|
||||
case redirection_mode_t::overwrite:
|
||||
case redirection_mode_t::append:
|
||||
case redirection_mode_t::noclob: {
|
||||
// Test whether the file exists, and whether it's writable (possibly after
|
||||
// creating it). access() returns failure if the file does not exist.
|
||||
bool file_exists = false, file_is_writable = false;
|
||||
int err = 0;
|
||||
|
||||
struct stat buf = {};
|
||||
if (wstat(target_path, &buf) < 0) {
|
||||
err = errno;
|
||||
}
|
||||
case redirection_mode_t::overwrite:
|
||||
case redirection_mode_t::append:
|
||||
case redirection_mode_t::noclob: {
|
||||
// Test whether the file exists, and whether it's writable (possibly after
|
||||
// creating it). access() returns failure if the file does not exist.
|
||||
bool file_exists = false, file_is_writable = false;
|
||||
int err = 0;
|
||||
|
||||
struct stat buf = {};
|
||||
if (wstat(target_path, &buf) < 0) {
|
||||
err = errno;
|
||||
}
|
||||
if (string_suffixes_string(L"/", target)) {
|
||||
// Redirections to things that are directories is definitely not
|
||||
// allowed.
|
||||
file_exists = false;
|
||||
file_is_writable = false;
|
||||
} else if (err == 0) {
|
||||
// No err. We can write to it if it's not a directory and we have
|
||||
// permission.
|
||||
file_exists = true;
|
||||
file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
|
||||
} else if (err == ENOENT) {
|
||||
// File does not exist. Check if its parent directory is writable.
|
||||
wcstring parent = wdirname(target_path);
|
||||
|
||||
if (string_suffixes_string(L"/", target)) {
|
||||
// Redirections to things that are directories is definitely not
|
||||
// allowed.
|
||||
file_exists = false;
|
||||
file_is_writable = false;
|
||||
} else if (err == 0) {
|
||||
// No err. We can write to it if it's not a directory and we have
|
||||
// permission.
|
||||
file_exists = true;
|
||||
file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
|
||||
} else if (err == ENOENT) {
|
||||
// File does not exist. Check if its parent directory is writable.
|
||||
wcstring parent = wdirname(target_path);
|
||||
// Ensure that the parent ends with the path separator. This will ensure
|
||||
// that we get an error if the parent directory is not really a
|
||||
// directory.
|
||||
if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');
|
||||
|
||||
// Ensure that the parent ends with the path separator. This will ensure
|
||||
// that we get an error if the parent directory is not really a
|
||||
// directory.
|
||||
if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');
|
||||
|
||||
// Now the file is considered writable if the parent directory is
|
||||
// writable.
|
||||
file_exists = false;
|
||||
file_is_writable = (0 == waccess(parent, W_OK));
|
||||
} else {
|
||||
// Other errors we treat as not writable. This includes things like
|
||||
// ENOTDIR.
|
||||
file_exists = false;
|
||||
file_is_writable = false;
|
||||
}
|
||||
|
||||
// NOCLOB means that we must not overwrite files that exist.
|
||||
target_is_valid =
|
||||
file_is_writable &&
|
||||
!(file_exists && redirect->mode == redirection_mode_t::noclob);
|
||||
break;
|
||||
// Now the file is considered writable if the parent directory is
|
||||
// writable.
|
||||
file_exists = false;
|
||||
file_is_writable = (0 == waccess(parent, W_OK));
|
||||
} else {
|
||||
// Other errors we treat as not writable. This includes things like
|
||||
// ENOTDIR.
|
||||
file_exists = false;
|
||||
file_is_writable = false;
|
||||
}
|
||||
|
||||
// NOCLOB means that we must not overwrite files that exist.
|
||||
target_is_valid =
|
||||
file_is_writable &&
|
||||
!(file_exists && oper->mode == redirection_mode_t::noclob);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (redir_target) {
|
||||
auto hl = target_is_valid ? highlight_role_t::redirection : highlight_role_t::error;
|
||||
this->color_node(redir_target, hl);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Color all of the redirections of the given command.
|
||||
void highlighter_t::color_redirections(tnode_t<g::arguments_or_redirections_list> list) {
|
||||
for (const auto &node : list.descendants<g::redirection>()) {
|
||||
this->color_redirection(node);
|
||||
}
|
||||
}
|
||||
|
||||
/// Color all the children of the command with the given type.
|
||||
void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type,
|
||||
highlight_spec_t color) {
|
||||
for (node_offset_t idx = 0; idx < parent.child_count; idx++) {
|
||||
const parse_node_t *child = this->parse_tree.get_child(parent, idx);
|
||||
if (child != nullptr && child->type == type) {
|
||||
this->color_node(*child, color);
|
||||
}
|
||||
this->color_node(redir.target,
|
||||
target_is_valid ? highlight_role_t::redirection : highlight_role_t::error);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1145,171 +1240,42 @@ highlighter_t::color_array_t highlighter_t::highlight() {
|
||||
ASSERT_IS_BACKGROUND_THREAD();
|
||||
}
|
||||
|
||||
const size_t length = buff.size();
|
||||
assert(this->buff.size() == this->color_array.size());
|
||||
if (length == 0) return color_array;
|
||||
|
||||
// Start out at zero.
|
||||
this->color_array.resize(this->buff.size());
|
||||
std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{});
|
||||
|
||||
// Walk the node tree.
|
||||
for (const parse_node_t &node : parse_tree) {
|
||||
if (ctx.check_cancel()) return std::move(color_array);
|
||||
switch (node.type) {
|
||||
// Color direct string descendants, e.g. 'for' and 'in'.
|
||||
case symbol_while_header:
|
||||
case symbol_begin_header:
|
||||
case symbol_function_header:
|
||||
case symbol_if_clause:
|
||||
case symbol_else_clause:
|
||||
case symbol_case_item:
|
||||
case symbol_decorated_statement:
|
||||
case symbol_if_statement: {
|
||||
this->color_children(node, parse_token_type_string, highlight_role_t::command);
|
||||
break;
|
||||
}
|
||||
case symbol_switch_statement: {
|
||||
tnode_t<g::switch_statement> switchn(&parse_tree, &node);
|
||||
auto literal_switch = switchn.child<0>();
|
||||
auto switch_arg = switchn.child<1>();
|
||||
this->color_node(literal_switch, highlight_role_t::command);
|
||||
this->color_node(switch_arg, highlight_role_t::param);
|
||||
break;
|
||||
}
|
||||
case symbol_for_header: {
|
||||
tnode_t<g::for_header> fhead(&parse_tree, &node);
|
||||
// Color the 'for' and 'in' as commands.
|
||||
auto literal_for = fhead.child<0>();
|
||||
auto literal_in = fhead.child<2>();
|
||||
this->color_node(literal_for, highlight_role_t::command);
|
||||
this->color_node(literal_in, highlight_role_t::command);
|
||||
this->visit_children(*ast.top());
|
||||
if (ctx.check_cancel()) return std::move(color_array);
|
||||
|
||||
// Color the variable name as a parameter.
|
||||
this->color_argument(fhead.child<1>());
|
||||
break;
|
||||
}
|
||||
|
||||
case parse_token_type_andand:
|
||||
case parse_token_type_oror:
|
||||
this->color_node(node, highlight_role_t::operat);
|
||||
break;
|
||||
|
||||
case symbol_not_statement:
|
||||
this->color_children(node, parse_token_type_string, highlight_role_t::operat);
|
||||
break;
|
||||
|
||||
case symbol_job_decorator:
|
||||
this->color_node(node, highlight_role_t::operat);
|
||||
break;
|
||||
|
||||
case symbol_variable_assignment: {
|
||||
tnode_t<g::variable_assignment> variable_assignment = {&parse_tree, &node};
|
||||
this->color_argument(variable_assignment.child<0>());
|
||||
break;
|
||||
}
|
||||
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_background:
|
||||
case parse_token_type_end:
|
||||
case symbol_optional_background: {
|
||||
this->color_node(node, highlight_role_t::statement_terminator);
|
||||
break;
|
||||
}
|
||||
case symbol_optional_time: {
|
||||
this->color_node(node, highlight_role_t::operat);
|
||||
break;
|
||||
}
|
||||
case symbol_plain_statement: {
|
||||
tnode_t<g::plain_statement> stmt(&parse_tree, &node);
|
||||
// Get the decoration from the parent.
|
||||
enum parse_statement_decoration_t decoration = get_decoration(stmt);
|
||||
|
||||
// Color the command.
|
||||
tnode_t<g::tok_string> cmd_node = stmt.child<0>();
|
||||
maybe_t<wcstring> cmd = cmd_node.get_source(buff);
|
||||
if (!cmd) {
|
||||
break; // not much as we can do without a node that has source text
|
||||
}
|
||||
|
||||
bool is_valid_cmd = false;
|
||||
if (!this->io_ok) {
|
||||
// We cannot check if the command is invalid, so just assume it's valid.
|
||||
is_valid_cmd = true;
|
||||
} else if (variable_assignment_equals_pos(*cmd)) {
|
||||
is_valid_cmd = true;
|
||||
} else {
|
||||
wcstring expanded_cmd;
|
||||
// Check to see if the command is valid.
|
||||
// Try expanding it. If we cannot, it's an error.
|
||||
bool expanded =
|
||||
plain_statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
|
||||
if (expanded && !has_expand_reserved(expanded_cmd)) {
|
||||
is_valid_cmd =
|
||||
command_is_valid(expanded_cmd, decoration, working_directory, ctx.vars);
|
||||
}
|
||||
}
|
||||
if (!is_valid_cmd) {
|
||||
this->color_node(*cmd_node, highlight_role_t::error);
|
||||
} else {
|
||||
this->color_command(cmd_node);
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Only work on root lists, so that we don't re-color child lists.
|
||||
case symbol_arguments_or_redirections_list: {
|
||||
tnode_t<g::arguments_or_redirections_list> list(&parse_tree, &node);
|
||||
if (argument_list_is_root(list)) {
|
||||
bool cmd_is_cd = is_cd(list.try_get_parent<g::plain_statement>());
|
||||
this->color_arguments(list.descendants<g::argument>(), cmd_is_cd);
|
||||
this->color_redirections(list);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case symbol_argument_list: {
|
||||
tnode_t<g::argument_list> list(&parse_tree, &node);
|
||||
if (argument_list_is_root(list)) {
|
||||
this->color_arguments(list.descendants<g::argument>());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case symbol_end_command: {
|
||||
this->color_node(node, highlight_role_t::command);
|
||||
break;
|
||||
}
|
||||
case parse_special_type_parse_error:
|
||||
case parse_special_type_tokenizer_error: {
|
||||
this->color_node(node, highlight_role_t::error);
|
||||
break;
|
||||
}
|
||||
case parse_special_type_comment: {
|
||||
this->color_node(node, highlight_role_t::comment);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Color every comment.
|
||||
const auto &extras = ast.extras();
|
||||
for (const source_range_t &r : extras.comments) {
|
||||
this->color_range(r, highlight_role_t::comment);
|
||||
}
|
||||
|
||||
if (!this->io_ok || this->cursor_pos > this->buff.size()) {
|
||||
return std::move(color_array);
|
||||
// Color every extra semi.
|
||||
for (const source_range_t &r : extras.semis) {
|
||||
this->color_range(r, highlight_role_t::statement_terminator);
|
||||
}
|
||||
|
||||
// If the cursor is over an argument, and that argument is a valid path, underline it.
|
||||
for (const auto &node : parse_tree) {
|
||||
// Must be an argument with source.
|
||||
if (node.type != symbol_argument || !node.has_source()) continue;
|
||||
// Color every error range.
|
||||
for (const source_range_t &r : extras.errors) {
|
||||
this->color_range(r, highlight_role_t::error);
|
||||
}
|
||||
|
||||
if (ctx.check_cancel()) return std::move(color_array);
|
||||
|
||||
// Underline every valid path.
|
||||
if (node_is_potential_path(buff, node, ctx, working_directory)) {
|
||||
// It is, underline it.
|
||||
for (size_t i = node.source_start; i < node.source_start + node.source_length; i++) {
|
||||
// Underline every valid path.
|
||||
if (io_ok) {
|
||||
for (const ast::node_t &node : ast) {
|
||||
const ast::argument_t *arg = node.try_as<ast::argument_t>();
|
||||
if (!arg || arg->unsourced) continue;
|
||||
if (ctx.check_cancel()) break;
|
||||
if (range_is_potential_path(buff, arg->range, ctx, working_directory)) {
|
||||
// Don't color highlight_role_t::error because it looks dorky. For example,
|
||||
// trying to cd into a non-directory would show an underline and also red.
|
||||
if (this->color_array.at(i).foreground != highlight_role_t::error) {
|
||||
this->color_array.at(i).valid_path = true;
|
||||
for (size_t i = arg->range.start, end = arg->range.start + arg->range.length;
|
||||
i < end; i++) {
|
||||
if (this->color_array.at(i).foreground != highlight_role_t::error) {
|
||||
this->color_array.at(i).valid_path = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <type_traits>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "ast.h"
|
||||
#include "common.h"
|
||||
#include "env.h"
|
||||
#include "fallback.h" // IWYU pragma: keep
|
||||
@ -44,7 +45,6 @@
|
||||
#include "parser.h"
|
||||
#include "path.h"
|
||||
#include "reader.h"
|
||||
#include "tnode.h"
|
||||
#include "wcstringutil.h"
|
||||
#include "wildcard.h" // IWYU pragma: keep
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
@ -1096,8 +1096,7 @@ void history_impl_t::populate_from_config_path() {
|
||||
static bool should_import_bash_history_line(const wcstring &line) {
|
||||
if (line.empty()) return false;
|
||||
|
||||
parse_node_tree_t parse_tree;
|
||||
if (!parse_tree_from_string(line, parse_flag_none, &parse_tree, nullptr)) return false;
|
||||
if (ast::ast_t::parse(line).errored()) return false;
|
||||
|
||||
// In doing this test do not allow incomplete strings. Hence the "false" argument.
|
||||
parse_error_list_t errors;
|
||||
@ -1274,38 +1273,33 @@ void history_t::add_pending_with_file_detection(const wcstring &str,
|
||||
|
||||
// Find all arguments that look like they could be file paths.
|
||||
bool needs_sync_write = false;
|
||||
parse_node_tree_t tree;
|
||||
parse_tree_from_string(str, parse_flag_none, &tree, nullptr);
|
||||
using namespace ast;
|
||||
auto ast = ast_t::parse(str);
|
||||
|
||||
path_list_t potential_paths;
|
||||
for (const parse_node_t &node : tree) {
|
||||
if (!node.has_source()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (node.type == symbol_argument) {
|
||||
wcstring potential_path = node.get_source(str);
|
||||
for (const node_t &node : ast) {
|
||||
if (const argument_t *arg = node.try_as<argument_t>()) {
|
||||
wcstring potential_path = arg->source(str);
|
||||
bool unescaped = unescape_string_in_place(&potential_path, UNESCAPE_DEFAULT);
|
||||
if (unescaped && string_could_be_path(potential_path)) {
|
||||
potential_paths.push_back(potential_path);
|
||||
}
|
||||
} else if (node.type == symbol_plain_statement) {
|
||||
} else if (const decorated_statement_t *stmt = node.try_as<decorated_statement_t>()) {
|
||||
// Hack hack hack - if the command is likely to trigger an exit, then don't do
|
||||
// background file detection, because we won't be able to write it to our history file
|
||||
// before we exit.
|
||||
// Also skip it for 'echo'. This is because echo doesn't take file paths, but also
|
||||
// because the history file test wants to find the commands in the history file
|
||||
// immediately after running them, so it can't tolerate the asynchronous file detection.
|
||||
if (get_decoration({&tree, &node}) == parse_statement_decoration_exec) {
|
||||
if (stmt->decoration() == parse_statement_decoration_exec) {
|
||||
needs_sync_write = true;
|
||||
}
|
||||
|
||||
if (maybe_t<wcstring> command = command_for_plain_statement({&tree, &node}, str)) {
|
||||
unescape_string_in_place(&*command, UNESCAPE_DEFAULT);
|
||||
if (*command == L"exit" || *command == L"reboot" || *command == L"restart" ||
|
||||
*command == L"echo") {
|
||||
needs_sync_write = true;
|
||||
}
|
||||
wcstring command = stmt->command.source(str);
|
||||
unescape_string_in_place(&command, UNESCAPE_DEFAULT);
|
||||
if (command == L"exit" || command == L"reboot" || command == L"restart" ||
|
||||
command == L"echo") {
|
||||
needs_sync_write = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,54 +6,27 @@
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define PARSE_ASSERT(a) assert(a)
|
||||
#define PARSER_DIE() \
|
||||
do { \
|
||||
FLOG(error, L"Parser dying!"); \
|
||||
exit_without_destructors(-1); \
|
||||
} while (0)
|
||||
|
||||
// A range of source code.
|
||||
struct source_range_t {
|
||||
uint32_t start;
|
||||
uint32_t length;
|
||||
|
||||
uint32_t end() const {
|
||||
assert(start + length >= start && "Overflow");
|
||||
return start + length;
|
||||
}
|
||||
};
|
||||
|
||||
// IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
|
||||
enum parse_token_type_t : uint8_t {
|
||||
token_type_invalid = 1,
|
||||
// Non-terminal tokens
|
||||
symbol_job_list,
|
||||
symbol_job_conjunction,
|
||||
symbol_job_conjunction_continuation,
|
||||
symbol_job_decorator,
|
||||
symbol_job,
|
||||
symbol_job_continuation,
|
||||
symbol_statement,
|
||||
symbol_block_statement,
|
||||
symbol_block_header,
|
||||
symbol_for_header,
|
||||
symbol_while_header,
|
||||
symbol_begin_header,
|
||||
symbol_function_header,
|
||||
symbol_if_statement,
|
||||
symbol_if_clause,
|
||||
symbol_else_clause,
|
||||
symbol_else_continuation,
|
||||
symbol_switch_statement,
|
||||
symbol_case_item_list,
|
||||
symbol_case_item,
|
||||
symbol_not_statement,
|
||||
symbol_decorated_statement,
|
||||
symbol_plain_statement,
|
||||
symbol_variable_assignment,
|
||||
symbol_variable_assignments,
|
||||
symbol_arguments_or_redirections_list,
|
||||
symbol_andor_job_list,
|
||||
symbol_argument_list,
|
||||
// Freestanding argument lists are parsed from the argument list supplied to 'complete -a'.
|
||||
// They are not generated by parse trees rooted in symbol_job_list.
|
||||
symbol_freestanding_argument_list,
|
||||
symbol_argument,
|
||||
symbol_redirection,
|
||||
symbol_optional_background,
|
||||
symbol_optional_newlines,
|
||||
symbol_optional_time,
|
||||
symbol_end_command,
|
||||
|
||||
// Terminal types.
|
||||
parse_token_type_string,
|
||||
parse_token_type_pipe,
|
||||
@ -68,13 +41,6 @@ enum parse_token_type_t : uint8_t {
|
||||
parse_special_type_parse_error,
|
||||
parse_special_type_tokenizer_error,
|
||||
parse_special_type_comment,
|
||||
|
||||
LAST_TOKEN_TYPE = parse_special_type_comment,
|
||||
FIRST_TERMINAL_TYPE = parse_token_type_string,
|
||||
LAST_TERMINAL_TYPE = parse_token_type_terminate,
|
||||
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
|
||||
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string,
|
||||
LAST_PARSE_TOKEN_TYPE = parse_token_type_end
|
||||
};
|
||||
|
||||
const enum_map<parse_token_type_t> token_enum_map[] = {
|
||||
@ -89,9 +55,6 @@ const enum_map<parse_token_type_t> token_enum_map[] = {
|
||||
{parse_token_type_andand, L"parse_token_type_andand"},
|
||||
{parse_token_type_oror, L"parse_token_type_oror"},
|
||||
{parse_token_type_terminate, L"parse_token_type_terminate"},
|
||||
// Define all symbols
|
||||
#define ELEM(sym) {symbol_##sym, L"symbol_" #sym},
|
||||
#include "parse_grammar_elements.inc"
|
||||
{token_type_invalid, L"token_type_invalid"},
|
||||
{token_type_invalid, nullptr}};
|
||||
#define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map)
|
||||
@ -147,7 +110,7 @@ const enum_map<parse_keyword_t> keyword_enum_map[] = {{parse_keyword_t::kw_excla
|
||||
|
||||
// Node tag values.
|
||||
|
||||
// Statement decorations, stored in node tag.
|
||||
// Statement decorations.
|
||||
enum parse_statement_decoration_t {
|
||||
parse_statement_decoration_none,
|
||||
parse_statement_decoration_command,
|
||||
@ -155,19 +118,6 @@ enum parse_statement_decoration_t {
|
||||
parse_statement_decoration_exec,
|
||||
};
|
||||
|
||||
// Job decorations, stored in node tag.
|
||||
enum parse_job_decoration_t {
|
||||
parse_job_decoration_none,
|
||||
parse_job_decoration_and,
|
||||
parse_job_decoration_or,
|
||||
};
|
||||
|
||||
// Whether a statement is backgrounded.
|
||||
enum parse_optional_background_t { parse_no_background, parse_background };
|
||||
|
||||
// Whether a job is prefixed with "time".
|
||||
enum parse_optional_time_t { parse_optional_time_no_time, parse_optional_time_time };
|
||||
|
||||
// Parse error code list.
|
||||
enum parse_error_code_t {
|
||||
parse_error_none,
|
||||
@ -193,6 +143,26 @@ enum parse_error_code_t {
|
||||
parse_error_andor_in_pipeline, // "and" or "or" after a pipe
|
||||
};
|
||||
|
||||
enum {
|
||||
parse_flag_none = 0,
|
||||
|
||||
/// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
|
||||
/// disconnected trees. This is intended to be used by syntax highlighting.
|
||||
parse_flag_continue_after_error = 1 << 0,
|
||||
/// Include comment tokens.
|
||||
parse_flag_include_comments = 1 << 1,
|
||||
/// Indicate that the tokenizer should accept incomplete tokens */
|
||||
parse_flag_accept_incomplete_tokens = 1 << 2,
|
||||
/// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
|
||||
/// tree where some nodes may have no productions.
|
||||
parse_flag_leave_unterminated = 1 << 3,
|
||||
/// Indicate that the parser should generate job_list entries for blank lines.
|
||||
parse_flag_show_blank_lines = 1 << 4,
|
||||
/// Indicate that extra semis should be generated.
|
||||
parse_flag_show_extra_semis = 1 << 5,
|
||||
};
|
||||
typedef unsigned int parse_tree_flags_t;
|
||||
|
||||
enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 };
|
||||
typedef unsigned int parser_test_error_bits_t;
|
||||
|
||||
@ -214,6 +184,9 @@ struct parse_error_t {
|
||||
};
|
||||
typedef std::vector<parse_error_t> parse_error_list_t;
|
||||
|
||||
wcstring token_type_user_presentable_description(parse_token_type_t type,
|
||||
parse_keyword_t keyword = parse_keyword_t::none);
|
||||
|
||||
// Special source_start value that means unknown.
|
||||
#define SOURCE_LOCATION_UNKNOWN (static_cast<size_t>(-1))
|
||||
|
||||
@ -221,6 +194,13 @@ typedef std::vector<parse_error_t> parse_error_list_t;
|
||||
/// errors in a substring of a larger source buffer.
|
||||
void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt);
|
||||
|
||||
// The location of a pipeline.
|
||||
enum class pipeline_position_t {
|
||||
none, // not part of a pipeline
|
||||
first, // first command in a pipeline
|
||||
subsequent // second or further command in a pipeline
|
||||
};
|
||||
|
||||
/// Maximum number of function calls.
|
||||
#define FISH_MAX_STACK_DEPTH 128
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,9 +1,10 @@
|
||||
// Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.).
|
||||
// Provides the "linkage" between an ast and actual execution structures (job_t, etc.).
|
||||
#ifndef FISH_PARSE_EXECUTION_H
|
||||
#define FISH_PARSE_EXECUTION_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "ast.h"
|
||||
#include "common.h"
|
||||
#include "io.h"
|
||||
#include "parse_constants.h"
|
||||
@ -38,7 +39,7 @@ class parse_execution_context_t {
|
||||
const operation_context_t &ctx;
|
||||
|
||||
// The currently executing job node, used to indicate the line number.
|
||||
tnode_t<grammar::job> executing_job_node{};
|
||||
const ast::job_t *executing_job_node{};
|
||||
|
||||
// Cached line number information.
|
||||
size_t cached_lineno_offset = 0;
|
||||
@ -59,88 +60,91 @@ class parse_execution_context_t {
|
||||
|
||||
// Report an error, setting $status to \p status. Always returns
|
||||
// 'end_execution_reason_t::error'.
|
||||
end_execution_reason_t report_error(int status, const parse_node_t &node, const wchar_t *fmt,
|
||||
end_execution_reason_t report_error(int status, const ast::node_t &node, const wchar_t *fmt,
|
||||
...) const;
|
||||
end_execution_reason_t report_errors(int status, const parse_error_list_t &error_list) const;
|
||||
|
||||
/// Command not found support.
|
||||
end_execution_reason_t handle_command_not_found(const wcstring &cmd,
|
||||
tnode_t<grammar::plain_statement> statement,
|
||||
const ast::decorated_statement_t &statement,
|
||||
int err_code);
|
||||
|
||||
// Utilities
|
||||
wcstring get_source(const parse_node_t &node) const;
|
||||
tnode_t<grammar::plain_statement> infinite_recursive_statement_in_job_list(
|
||||
tnode_t<grammar::job_list> job_list, wcstring *out_func_name) const;
|
||||
wcstring get_source(const ast::node_t &node) const;
|
||||
const ast::decorated_statement_t *infinite_recursive_statement_in_job_list(
|
||||
const ast::job_list_t &job_list, wcstring *out_func_name) const;
|
||||
|
||||
// Expand a command which may contain variables, producing an expand command and possibly
|
||||
// arguments. Prints an error message on error.
|
||||
end_execution_reason_t expand_command(tnode_t<grammar::plain_statement> statement,
|
||||
end_execution_reason_t expand_command(const ast::decorated_statement_t &statement,
|
||||
wcstring *out_cmd, wcstring_list_t *out_args) const;
|
||||
|
||||
/// Return whether we should skip a job with the given bool statement type.
|
||||
bool should_skip(parse_job_decoration_t type) const;
|
||||
|
||||
/// Indicates whether a job is a simple block (one block, no redirections).
|
||||
bool job_is_simple_block(tnode_t<grammar::job> job) const;
|
||||
bool job_is_simple_block(const ast::job_t &job) const;
|
||||
|
||||
enum process_type_t process_type_for_command(tnode_t<grammar::plain_statement> statement,
|
||||
enum process_type_t process_type_for_command(const ast::decorated_statement_t &statement,
|
||||
const wcstring &cmd) const;
|
||||
end_execution_reason_t apply_variable_assignments(
|
||||
process_t *proc, tnode_t<grammar::variable_assignments> variable_assignments,
|
||||
process_t *proc, const ast::variable_assignment_list_t &variable_assignments,
|
||||
const block_t **block);
|
||||
|
||||
// These create process_t structures from statements.
|
||||
end_execution_reason_t populate_job_process(
|
||||
job_t *job, process_t *proc, tnode_t<grammar::statement> statement,
|
||||
tnode_t<grammar::variable_assignments> variable_assignments);
|
||||
job_t *job, process_t *proc, const ast::statement_t &statement,
|
||||
const ast::variable_assignment_list_t &variable_assignments_list_t);
|
||||
end_execution_reason_t populate_not_process(job_t *job, process_t *proc,
|
||||
tnode_t<grammar::not_statement> not_statement);
|
||||
const ast::not_statement_t ¬_statement);
|
||||
end_execution_reason_t populate_plain_process(job_t *job, process_t *proc,
|
||||
tnode_t<grammar::plain_statement> statement);
|
||||
const ast::decorated_statement_t &statement);
|
||||
|
||||
template <typename Type>
|
||||
end_execution_reason_t populate_block_process(job_t *job, process_t *proc,
|
||||
tnode_t<grammar::statement> statement,
|
||||
tnode_t<Type> specific_statement);
|
||||
const ast::statement_t &statement,
|
||||
const Type &specific_statement);
|
||||
|
||||
// These encapsulate the actual logic of various (block) statements.
|
||||
end_execution_reason_t run_block_statement(tnode_t<grammar::block_statement> statement,
|
||||
end_execution_reason_t run_block_statement(const ast::block_statement_t &statement,
|
||||
const block_t *associated_block);
|
||||
end_execution_reason_t run_for_statement(tnode_t<grammar::for_header> header,
|
||||
tnode_t<grammar::job_list> contents);
|
||||
end_execution_reason_t run_if_statement(tnode_t<grammar::if_statement> statement,
|
||||
end_execution_reason_t run_for_statement(const ast::for_header_t &header,
|
||||
const ast::job_list_t &contents);
|
||||
end_execution_reason_t run_if_statement(const ast::if_statement_t &statement,
|
||||
const block_t *associated_block);
|
||||
end_execution_reason_t run_switch_statement(tnode_t<grammar::switch_statement> statement);
|
||||
end_execution_reason_t run_while_statement(tnode_t<grammar::while_header> header,
|
||||
tnode_t<grammar::job_list> contents,
|
||||
end_execution_reason_t run_switch_statement(const ast::switch_statement_t &statement);
|
||||
end_execution_reason_t run_while_statement(const ast::while_header_t &header,
|
||||
const ast::job_list_t &contents,
|
||||
const block_t *associated_block);
|
||||
end_execution_reason_t run_function_statement(tnode_t<grammar::block_statement> statement,
|
||||
tnode_t<grammar::function_header> header);
|
||||
end_execution_reason_t run_begin_statement(tnode_t<grammar::job_list> contents);
|
||||
end_execution_reason_t run_function_statement(const ast::block_statement_t &statement,
|
||||
const ast::function_header_t &header);
|
||||
end_execution_reason_t run_begin_statement(const ast::job_list_t &contents);
|
||||
|
||||
enum globspec_t { failglob, nullglob };
|
||||
using argument_node_list_t = std::vector<tnode_t<grammar::argument>>;
|
||||
end_execution_reason_t expand_arguments_from_nodes(const argument_node_list_t &argument_nodes,
|
||||
using ast_args_list_t = std::vector<const ast::argument_t *>;
|
||||
|
||||
static ast_args_list_t get_argument_nodes(const ast::argument_list_t &args);
|
||||
static ast_args_list_t get_argument_nodes(const ast::argument_or_redirection_list_t &args);
|
||||
|
||||
end_execution_reason_t expand_arguments_from_nodes(const ast_args_list_t &argument_nodes,
|
||||
wcstring_list_t *out_arguments,
|
||||
globspec_t glob_behavior);
|
||||
|
||||
// Determines the list of redirections for a node.
|
||||
end_execution_reason_t determine_redirections(
|
||||
tnode_t<grammar::arguments_or_redirections_list> node,
|
||||
redirection_spec_list_t *out_redirections);
|
||||
end_execution_reason_t determine_redirections(const ast::argument_or_redirection_list_t &list,
|
||||
redirection_spec_list_t *out_redirections);
|
||||
|
||||
end_execution_reason_t run_1_job(tnode_t<grammar::job> job, const block_t *associated_block);
|
||||
end_execution_reason_t run_job_conjunction(tnode_t<grammar::job_conjunction> job_expr,
|
||||
end_execution_reason_t run_1_job(const ast::job_t &job, const block_t *associated_block);
|
||||
end_execution_reason_t test_and_run_1_job_conjunction(const ast::job_conjunction_t &jc,
|
||||
const block_t *associated_block);
|
||||
end_execution_reason_t run_job_conjunction(const ast::job_conjunction_t &job_expr,
|
||||
const block_t *associated_block);
|
||||
template <typename Type>
|
||||
end_execution_reason_t run_job_list(tnode_t<Type> job_list_node,
|
||||
end_execution_reason_t run_job_list(const ast::job_list_t &job_list_node,
|
||||
const block_t *associated_block);
|
||||
end_execution_reason_t populate_job_from_job_node(job_t *j, tnode_t<grammar::job> job_node,
|
||||
end_execution_reason_t run_job_list(const ast::andor_job_list_t &job_list_node,
|
||||
const block_t *associated_block);
|
||||
end_execution_reason_t populate_job_from_job_node(job_t *j, const ast::job_t &job_node,
|
||||
const block_t *associated_block);
|
||||
|
||||
// Returns the line number of the node. Not const since it touches cached_lineno_offset.
|
||||
int line_offset_of_node(tnode_t<grammar::job> node);
|
||||
int line_offset_of_node(const ast::job_t *node);
|
||||
int line_offset_of_character_at_offset(size_t offset);
|
||||
|
||||
public:
|
||||
@ -159,14 +163,14 @@ class parse_execution_context_t {
|
||||
/// Returns the source string.
|
||||
const wcstring &get_source() const { return pstree->src; }
|
||||
|
||||
/// Return the parse tree.
|
||||
const parse_node_tree_t &tree() const { return pstree->tree; }
|
||||
/// Return the parsed ast.
|
||||
const ast::ast_t &ast() const { return *pstree->ast; }
|
||||
|
||||
/// Start executing at the given node. Returns 0 if there was no error, 1 if there was an
|
||||
/// error.
|
||||
end_execution_reason_t eval_node(tnode_t<grammar::statement> statement,
|
||||
end_execution_reason_t eval_node(const ast::statement_t &statement,
|
||||
const block_t *associated_block);
|
||||
end_execution_reason_t eval_node(tnode_t<grammar::job_list> job_list,
|
||||
end_execution_reason_t eval_node(const ast::job_list_t &job_list,
|
||||
const block_t *associated_block);
|
||||
};
|
||||
|
||||
|
@ -1,401 +0,0 @@
|
||||
// Programmatic representation of fish grammar
|
||||
#ifndef FISH_PARSE_GRAMMAR_H
|
||||
#define FISH_PARSE_GRAMMAR_H
|
||||
|
||||
#include <array>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
#include "parse_constants.h"
|
||||
#include "tokenizer.h"
|
||||
|
||||
struct parse_token_t;
|
||||
typedef uint8_t parse_node_tag_t;
|
||||
|
||||
using parse_node_tag_t = uint8_t;
|
||||
struct parse_token_t;
|
||||
namespace grammar {
|
||||
|
||||
using production_element_t = uint8_t;
|
||||
|
||||
enum {
|
||||
// The maximum length of any seq production.
|
||||
MAX_PRODUCTION_LENGTH = 6
|
||||
};
|
||||
|
||||
// Define primitive types.
|
||||
template <enum parse_token_type_t Token>
|
||||
struct primitive {
|
||||
using type_tuple = std::tuple<>;
|
||||
static constexpr parse_token_type_t token = Token;
|
||||
static constexpr production_element_t element() { return Token; }
|
||||
};
|
||||
|
||||
using tok_end = primitive<parse_token_type_end>;
|
||||
using tok_string = primitive<parse_token_type_string>;
|
||||
using tok_pipe = primitive<parse_token_type_pipe>;
|
||||
using tok_background = primitive<parse_token_type_background>;
|
||||
using tok_redirection = primitive<parse_token_type_redirection>;
|
||||
using tok_andand = primitive<parse_token_type_andand>;
|
||||
using tok_oror = primitive<parse_token_type_oror>;
|
||||
|
||||
// Define keyword types.
|
||||
template <parse_keyword_t Keyword>
|
||||
struct keyword {
|
||||
using type_tuple = std::tuple<>;
|
||||
static constexpr parse_token_type_t token = parse_token_type_string;
|
||||
static constexpr production_element_t element() {
|
||||
// Convert a parse_keyword_t enum to a production_element_t enum.
|
||||
return static_cast<uint32_t>(Keyword) + LAST_TOKEN_OR_SYMBOL + 1;
|
||||
}
|
||||
};
|
||||
|
||||
// Define special types.
|
||||
// Comments are not emitted as part of productions, but specially by the parser.
|
||||
struct comment {
|
||||
using type_tuple = std::tuple<>;
|
||||
static constexpr parse_token_type_t token = parse_special_type_comment;
|
||||
};
|
||||
|
||||
// Forward declare all the symbol types.
|
||||
#define ELEM(T) struct T;
|
||||
#include "parse_grammar_elements.inc"
|
||||
|
||||
// A production is a sequence of production elements.
|
||||
// +1 to hold the terminating token_type_invalid
|
||||
template <size_t Count>
|
||||
using production_t = std::array<const production_element_t, Count + 1>;
|
||||
|
||||
// This is an ugly hack to avoid ODR violations
|
||||
// Given some type, return a pointer to its production.
|
||||
template <typename T>
|
||||
const production_element_t *production_for() {
|
||||
static constexpr auto prod = T::production;
|
||||
return prod.data();
|
||||
}
|
||||
|
||||
// Get some production element.
|
||||
template <typename T>
|
||||
constexpr production_element_t element() {
|
||||
return T::element();
|
||||
}
|
||||
|
||||
// Template goo.
|
||||
namespace detail {
|
||||
template <typename T, typename Tuple>
|
||||
struct tuple_contains;
|
||||
|
||||
template <typename T>
|
||||
struct tuple_contains<T, std::tuple<>> : std::false_type {};
|
||||
|
||||
template <typename T, typename U, typename... Ts>
|
||||
struct tuple_contains<T, std::tuple<U, Ts...>> : tuple_contains<T, std::tuple<Ts...>> {};
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
struct tuple_contains<T, std::tuple<T, Ts...>> : std::true_type {};
|
||||
|
||||
struct void_type {
|
||||
using type = void;
|
||||
};
|
||||
|
||||
// Support for checking whether the index N is valid for T::type_tuple.
|
||||
template <size_t N, typename T>
|
||||
static constexpr bool index_valid() {
|
||||
return N < std::tuple_size<typename T::type_tuple>::value;
|
||||
}
|
||||
|
||||
// Get the Nth type of T::type_tuple.
|
||||
template <size_t N, typename T>
|
||||
using tuple_element = std::tuple_element<N, typename T::type_tuple>;
|
||||
|
||||
// Get the Nth type of T::type_tuple, or void if N is out of bounds.
|
||||
template <size_t N, typename T>
|
||||
using tuple_element_or_void =
|
||||
typename std::conditional<index_valid<N, T>(), tuple_element<N, T>, void_type>::type::type;
|
||||
|
||||
// Make a tuple by mapping the Nth item of a list of 'seq's.
|
||||
template <size_t N, typename... Ts>
|
||||
struct tuple_nther {
|
||||
// A tuple of the Nth types of tuples (or voids).
|
||||
using type = std::tuple<tuple_element_or_void<N, Ts>...>;
|
||||
};
|
||||
|
||||
// Given a list of Options, each one a seq, check to see if any of them contain type Desired at
|
||||
// index Index.
|
||||
template <typename Desired, size_t Index, typename... Options>
|
||||
inline constexpr bool type_possible() {
|
||||
using nths = typename tuple_nther<Index, Options...>::type;
|
||||
return tuple_contains<Desired, nths>::value;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
// Partial specialization hack.
|
||||
#define ELEM(T) \
|
||||
template <> \
|
||||
constexpr production_element_t element<T>() { \
|
||||
return symbol_##T; \
|
||||
}
|
||||
#include "parse_grammar_elements.inc"
|
||||
|
||||
// Empty produces nothing.
|
||||
struct empty {
|
||||
using type_tuple = std::tuple<>;
|
||||
static constexpr production_t<0> production = {{token_type_invalid}};
|
||||
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
|
||||
parse_node_tag_t *) {
|
||||
return production_for<empty>();
|
||||
}
|
||||
};
|
||||
|
||||
// Sequence represents a list of (at least two) productions.
|
||||
template <class T0, class... Ts>
|
||||
struct seq {
|
||||
static constexpr production_t<1 + sizeof...(Ts)> production = {
|
||||
{element<T0>(), element<Ts>()..., token_type_invalid}};
|
||||
|
||||
static_assert(1 + sizeof...(Ts) <= MAX_PRODUCTION_LENGTH, "MAX_PRODUCTION_LENGTH too small");
|
||||
|
||||
using type_tuple = std::tuple<T0, Ts...>;
|
||||
|
||||
template <typename Desired, size_t Index>
|
||||
static constexpr bool type_possible() {
|
||||
using element_t = detail::tuple_element_or_void<Index, seq>;
|
||||
return std::is_same<Desired, element_t>::value;
|
||||
}
|
||||
|
||||
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
|
||||
parse_node_tag_t *) {
|
||||
return production_for<seq>();
|
||||
}
|
||||
};
|
||||
|
||||
template <class... Args>
|
||||
using produces_sequence = seq<Args...>;
|
||||
|
||||
// Ergonomic way to create a production for a single element.
|
||||
template <class T>
|
||||
using single = seq<T>;
|
||||
|
||||
template <class T>
|
||||
using produces_single = single<T>;
|
||||
|
||||
// Alternative represents a choice.
|
||||
struct alternative {};
|
||||
|
||||
// Following are the grammar productions.
|
||||
#define BODY(T) static constexpr parse_token_type_t token = symbol_##T;
|
||||
|
||||
#define DEF(T) struct T : public
|
||||
|
||||
#define DEF_ALT(T) struct T : public alternative
|
||||
#define ALT_BODY(T, ...) \
|
||||
BODY(T) \
|
||||
using type_tuple = std::tuple<>; \
|
||||
template <typename Desired, size_t Index> \
|
||||
static constexpr bool type_possible() { \
|
||||
return detail::type_possible<Desired, Index, __VA_ARGS__>(); \
|
||||
} \
|
||||
static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \
|
||||
parse_node_tag_t *)
|
||||
|
||||
// A job_list is a list of job_conjunctions, separated by semicolons or newlines
|
||||
DEF_ALT(job_list) {
|
||||
using normal = seq<job_decorator, job_conjunction, job_list>;
|
||||
using empty_line = seq<tok_end, job_list>;
|
||||
using empty = grammar::empty;
|
||||
ALT_BODY(job_list, normal, empty_line, empty);
|
||||
};
|
||||
|
||||
// Job decorators are 'and' and 'or'. These apply to the whole job.
|
||||
DEF_ALT(job_decorator) {
|
||||
using ands = single<keyword<parse_keyword_t::kw_and>>;
|
||||
using ors = single<keyword<parse_keyword_t::kw_or>>;
|
||||
using empty = grammar::empty;
|
||||
ALT_BODY(job_decorator, ands, ors, empty);
|
||||
};
|
||||
|
||||
// A job_conjunction is a job followed by a continuation.
|
||||
DEF(job_conjunction) produces_sequence<job, job_conjunction_continuation>{BODY(job_conjunction)};
|
||||
|
||||
DEF_ALT(job_conjunction_continuation) {
|
||||
using andands = seq<tok_andand, optional_newlines, job_conjunction>;
|
||||
using orors = seq<tok_oror, optional_newlines, job_conjunction>;
|
||||
using empty = grammar::empty;
|
||||
ALT_BODY(job_conjunction_continuation, andands, orors, empty);
|
||||
};
|
||||
|
||||
/// The time builtin.
|
||||
DEF_ALT(optional_time) {
|
||||
using empty = grammar::empty;
|
||||
using time = single<keyword<parse_keyword_t::kw_time>>;
|
||||
ALT_BODY(optional_time, empty, time);
|
||||
};
|
||||
|
||||
// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases
|
||||
// like if statements, where we require a command). To represent "non-empty", we require a
|
||||
// statement, followed by a possibly empty job_continuation, and then optionally a background
|
||||
// specifier '&'
|
||||
DEF(job)
|
||||
produces_sequence<optional_time, variable_assignments, statement, job_continuation,
|
||||
optional_background>{BODY(job)};
|
||||
|
||||
DEF_ALT(job_continuation) {
|
||||
using piped =
|
||||
seq<tok_pipe, optional_newlines, variable_assignments, statement, job_continuation>;
|
||||
using empty = grammar::empty;
|
||||
ALT_BODY(job_continuation, piped, empty);
|
||||
};
|
||||
|
||||
// A list of assignments like HOME=$PWD
|
||||
DEF_ALT(variable_assignments) {
|
||||
using empty = grammar::empty;
|
||||
using var = seq<variable_assignment, variable_assignments>;
|
||||
ALT_BODY(variable_assignments, empty, var);
|
||||
};
|
||||
// A string token like VAR=value
|
||||
DEF(variable_assignment) produces_single<tok_string>{BODY(variable_assignment)};
|
||||
|
||||
// A statement is a normal command, or an if / while / and etc
|
||||
DEF_ALT(statement) {
|
||||
using nots = single<not_statement>;
|
||||
using block = single<block_statement>;
|
||||
using ifs = single<if_statement>;
|
||||
using switchs = single<switch_statement>;
|
||||
using decorated = single<decorated_statement>;
|
||||
ALT_BODY(statement, nots, block, ifs, switchs, decorated);
|
||||
};
|
||||
|
||||
// A block is a conditional, loop, or begin/end
|
||||
DEF(if_statement)
|
||||
produces_sequence<if_clause, else_clause, end_command, arguments_or_redirections_list>{
|
||||
BODY(if_statement)};
|
||||
|
||||
DEF(if_clause)
|
||||
produces_sequence<keyword<parse_keyword_t::kw_if>, job_conjunction, tok_end, andor_job_list,
|
||||
job_list>{BODY(if_clause)};
|
||||
|
||||
DEF_ALT(else_clause) {
|
||||
using empty = grammar::empty;
|
||||
using else_cont = seq<keyword<parse_keyword_t::kw_else>, else_continuation>;
|
||||
ALT_BODY(else_clause, empty, else_cont);
|
||||
};
|
||||
|
||||
DEF_ALT(else_continuation) {
|
||||
using else_if = seq<if_clause, else_clause>;
|
||||
using else_only = seq<tok_end, job_list>;
|
||||
ALT_BODY(else_continuation, else_if, else_only);
|
||||
};
|
||||
|
||||
DEF(switch_statement)
|
||||
produces_sequence<keyword<parse_keyword_t::kw_switch>, argument, tok_end, case_item_list,
|
||||
end_command, arguments_or_redirections_list>{BODY(switch_statement)};
|
||||
|
||||
DEF_ALT(case_item_list) {
|
||||
using empty = grammar::empty;
|
||||
using case_items = seq<case_item, case_item_list>;
|
||||
using blank_line = seq<tok_end, case_item_list>;
|
||||
ALT_BODY(case_item_list, empty, case_items, blank_line);
|
||||
};
|
||||
|
||||
DEF(case_item)
|
||||
produces_sequence<keyword<parse_keyword_t::kw_case>, argument_list, tok_end, job_list>{
|
||||
BODY(case_item)};
|
||||
|
||||
DEF(block_statement)
|
||||
produces_sequence<block_header, job_list, end_command, arguments_or_redirections_list>{
|
||||
BODY(block_statement)};
|
||||
|
||||
DEF_ALT(block_header) {
|
||||
using forh = single<for_header>;
|
||||
using whileh = single<while_header>;
|
||||
using funch = single<function_header>;
|
||||
using beginh = single<begin_header>;
|
||||
ALT_BODY(block_header, forh, whileh, funch, beginh);
|
||||
};
|
||||
|
||||
DEF(for_header)
|
||||
produces_sequence<keyword<parse_keyword_t::kw_for>, tok_string, keyword<parse_keyword_t::kw_in>,
|
||||
argument_list, tok_end>{BODY(for_header)};
|
||||
|
||||
DEF(while_header)
|
||||
produces_sequence<keyword<parse_keyword_t::kw_while>, job_conjunction, tok_end, andor_job_list>{
|
||||
BODY(while_header)};
|
||||
|
||||
DEF(begin_header) produces_single<keyword<parse_keyword_t::kw_begin>>{BODY(begin_header)};
|
||||
|
||||
// Functions take arguments, and require at least one (the name). No redirections allowed.
|
||||
DEF(function_header)
|
||||
produces_sequence<keyword<parse_keyword_t::kw_function>, argument, argument_list, tok_end>{
|
||||
BODY(function_header)};
|
||||
|
||||
DEF_ALT(not_statement) {
|
||||
using nots =
|
||||
seq<keyword<parse_keyword_t::kw_not>, variable_assignments, optional_time, statement>;
|
||||
using exclams =
|
||||
seq<keyword<parse_keyword_t::kw_exclam>, variable_assignments, optional_time, statement>;
|
||||
ALT_BODY(not_statement, nots, exclams);
|
||||
};
|
||||
|
||||
// An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean
|
||||
// statement.
|
||||
DEF_ALT(andor_job_list) {
|
||||
using empty = grammar::empty;
|
||||
using andor_job = seq<job_decorator, job_conjunction, andor_job_list>;
|
||||
using empty_line = seq<tok_end, andor_job_list>;
|
||||
ALT_BODY(andor_job_list, empty, andor_job, empty_line);
|
||||
};
|
||||
|
||||
// A decorated_statement is a command with a list of arguments_or_redirections, possibly with
|
||||
// "builtin" or "command" or "exec"
|
||||
DEF_ALT(decorated_statement) {
|
||||
using plains = single<plain_statement>;
|
||||
using cmds = seq<keyword<parse_keyword_t::kw_command>, plain_statement>;
|
||||
using builtins = seq<keyword<parse_keyword_t::kw_builtin>, plain_statement>;
|
||||
using execs = seq<keyword<parse_keyword_t::kw_exec>, plain_statement>;
|
||||
ALT_BODY(decorated_statement, plains, cmds, builtins, execs);
|
||||
};
|
||||
|
||||
DEF(plain_statement)
|
||||
produces_sequence<tok_string, arguments_or_redirections_list>{BODY(plain_statement)};
|
||||
|
||||
DEF_ALT(argument_list) {
|
||||
using empty = grammar::empty;
|
||||
using arg = seq<argument, argument_list>;
|
||||
ALT_BODY(argument_list, empty, arg);
|
||||
};
|
||||
|
||||
DEF_ALT(arguments_or_redirections_list) {
|
||||
using empty = grammar::empty;
|
||||
using arg = seq<argument, arguments_or_redirections_list>;
|
||||
using redir = seq<redirection, arguments_or_redirections_list>;
|
||||
ALT_BODY(arguments_or_redirections_list, empty, arg, redir);
|
||||
};
|
||||
|
||||
DEF(argument) produces_single<tok_string>{BODY(argument)};
|
||||
DEF(redirection) produces_sequence<tok_redirection, tok_string>{BODY(redirection)};
|
||||
|
||||
DEF_ALT(optional_background) {
|
||||
using empty = grammar::empty;
|
||||
using background = single<tok_background>;
|
||||
ALT_BODY(optional_background, empty, background);
|
||||
};
|
||||
|
||||
DEF(end_command) produces_single<keyword<parse_keyword_t::kw_end>>{BODY(end_command)};
|
||||
|
||||
// Note optional_newlines only allows newline-style tok_end, not semicolons.
|
||||
DEF_ALT(optional_newlines) {
|
||||
using empty = grammar::empty;
|
||||
using newlines = seq<tok_end, optional_newlines>;
|
||||
ALT_BODY(optional_newlines, empty, newlines);
|
||||
};
|
||||
|
||||
// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
|
||||
// TOK_END (newlines, and even semicolons, for historical reasons)
|
||||
DEF_ALT(freestanding_argument_list) {
|
||||
using empty = grammar::empty;
|
||||
using arg = seq<argument, freestanding_argument_list>;
|
||||
using semicolon = seq<tok_end, freestanding_argument_list>;
|
||||
ALT_BODY(freestanding_argument_list, empty, arg, semicolon);
|
||||
};
|
||||
} // namespace grammar
|
||||
#endif
|
@ -1,37 +0,0 @@
|
||||
// Define ELEM before including this file.
|
||||
ELEM(job_list)
|
||||
ELEM(job)
|
||||
ELEM(job_decorator)
|
||||
ELEM(job_conjunction)
|
||||
ELEM(job_conjunction_continuation)
|
||||
ELEM(job_continuation)
|
||||
ELEM(statement)
|
||||
ELEM(if_statement)
|
||||
ELEM(if_clause)
|
||||
ELEM(else_clause)
|
||||
ELEM(else_continuation)
|
||||
ELEM(switch_statement)
|
||||
ELEM(case_item_list)
|
||||
ELEM(case_item)
|
||||
ELEM(block_statement)
|
||||
ELEM(block_header)
|
||||
ELEM(for_header)
|
||||
ELEM(while_header)
|
||||
ELEM(begin_header)
|
||||
ELEM(function_header)
|
||||
ELEM(not_statement)
|
||||
ELEM(andor_job_list)
|
||||
ELEM(decorated_statement)
|
||||
ELEM(variable_assignment)
|
||||
ELEM(variable_assignments)
|
||||
ELEM(plain_statement)
|
||||
ELEM(argument_list)
|
||||
ELEM(arguments_or_redirections_list)
|
||||
ELEM(argument)
|
||||
ELEM(redirection)
|
||||
ELEM(optional_background)
|
||||
ELEM(optional_newlines)
|
||||
ELEM(optional_time)
|
||||
ELEM(end_command)
|
||||
ELEM(freestanding_argument_list)
|
||||
#undef ELEM
|
@ -1,466 +0,0 @@
|
||||
#include "config.h" // IWYU pragma: keep
|
||||
|
||||
#include "parse_productions.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "flog.h"
|
||||
#include "parse_constants.h"
|
||||
#include "parse_grammar.h"
|
||||
#include "parse_tree.h"
|
||||
|
||||
using namespace parse_productions;
|
||||
using namespace grammar;
|
||||
|
||||
#define NO_PRODUCTION nullptr
|
||||
|
||||
// Herein are encoded the productions for our LL2 fish grammar.
|
||||
//
|
||||
// Each symbol (e.g. symbol_job_list) has a corresponding function (e.g. resolve_job_list). The
|
||||
// function accepts two tokens, representing the first and second lookahead, and returns a
|
||||
// production representing the rule, or NULL on error. There is also a tag value which is returned
|
||||
// by reference; the tag is a sort of node annotation.
|
||||
//
|
||||
// Productions are generally a static const array, and we return a pointer to the array (yes,
|
||||
// really).
|
||||
|
||||
#define RESOLVE(SYM) \
|
||||
const production_element_t *SYM::resolve( \
|
||||
const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag)
|
||||
|
||||
/// A job_list is a list of jobs, separated by semicolons or newlines.
|
||||
RESOLVE(job_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
// Some keywords are special.
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_end:
|
||||
case parse_keyword_t::kw_else:
|
||||
case parse_keyword_t::kw_case: {
|
||||
return production_for<empty>(); // end this job list
|
||||
}
|
||||
default: {
|
||||
return production_for<normal>(); // normal string
|
||||
}
|
||||
}
|
||||
}
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_redirection:
|
||||
case parse_token_type_background: {
|
||||
return production_for<normal>();
|
||||
}
|
||||
case parse_token_type_end: {
|
||||
return production_for<empty_line>();
|
||||
}
|
||||
case parse_token_type_terminate: {
|
||||
return production_for<empty>(); // no more commands, just transition to empty
|
||||
}
|
||||
default: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A job decorator is AND or OR
|
||||
RESOLVE(job_decorator) {
|
||||
// If it's followed by --help, it's not a decoration.
|
||||
if (token2.is_help_argument) {
|
||||
*out_tag = parse_job_decoration_none;
|
||||
return production_for<empty>();
|
||||
}
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_and: {
|
||||
*out_tag = parse_job_decoration_and;
|
||||
return production_for<ands>();
|
||||
}
|
||||
case parse_keyword_t::kw_or: {
|
||||
*out_tag = parse_job_decoration_or;
|
||||
return production_for<ors>();
|
||||
}
|
||||
default: {
|
||||
*out_tag = parse_job_decoration_none;
|
||||
return production_for<empty>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(job_conjunction_continuation) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
switch (token1.type) {
|
||||
case parse_token_type_andand:
|
||||
*out_tag = parse_job_decoration_and;
|
||||
return production_for<andands>();
|
||||
case parse_token_type_oror:
|
||||
*out_tag = parse_job_decoration_or;
|
||||
return production_for<orors>();
|
||||
default:
|
||||
return production_for<empty>();
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(job_continuation) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_pipe: {
|
||||
return production_for<piped>(); // pipe, continuation
|
||||
}
|
||||
default: {
|
||||
return production_for<empty>(); // not a pipe, no job continuation
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A statement is a normal command, or an if / while / and etc.
|
||||
RESOLVE(statement) {
|
||||
UNUSED(out_tag);
|
||||
|
||||
// The only block-like builtin that takes any parameters is 'function' So go to decorated
|
||||
// statements if the subsequent token looks like '--'. The logic here is subtle:
|
||||
//
|
||||
// If we are 'begin', then we expect to be invoked with no arguments.
|
||||
// If we are 'function', then we are a non-block if we are invoked with -h or --help
|
||||
// If we are anything else, we require an argument, so do the same thing if the subsequent token
|
||||
// is a statement terminator.
|
||||
if (token1.type == parse_token_type_string) {
|
||||
// If we are a function, then look for help arguments. Otherwise, if the next token looks
|
||||
// like an option (starts with a dash), then parse it as a decorated statement.
|
||||
if (token1.keyword == parse_keyword_t::kw_function && token2.is_help_argument) {
|
||||
return production_for<decorated>();
|
||||
} else if (token1.keyword != parse_keyword_t::kw_function && token2.has_dash_prefix) {
|
||||
return production_for<decorated>();
|
||||
}
|
||||
|
||||
// Likewise if the next token doesn't look like an argument at all. This corresponds to e.g.
|
||||
// a "naked if".
|
||||
bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_t::kw_begin &&
|
||||
token1.keyword != parse_keyword_t::kw_end);
|
||||
if (naked_invocation_invokes_help &&
|
||||
(token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) {
|
||||
return production_for<decorated>();
|
||||
}
|
||||
}
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_not:
|
||||
case parse_keyword_t::kw_exclam: {
|
||||
return production_for<nots>();
|
||||
}
|
||||
case parse_keyword_t::kw_for:
|
||||
case parse_keyword_t::kw_while:
|
||||
case parse_keyword_t::kw_function:
|
||||
case parse_keyword_t::kw_begin: {
|
||||
return production_for<block>();
|
||||
}
|
||||
case parse_keyword_t::kw_if: {
|
||||
return production_for<ifs>();
|
||||
}
|
||||
case parse_keyword_t::kw_else: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
case parse_keyword_t::kw_switch: {
|
||||
return production_for<switchs>();
|
||||
}
|
||||
case parse_keyword_t::kw_end: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
// All other keywords fall through to decorated statement.
|
||||
default: {
|
||||
return production_for<decorated>();
|
||||
}
|
||||
}
|
||||
}
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_redirection:
|
||||
case parse_token_type_background:
|
||||
case parse_token_type_terminate: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
default: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(else_clause) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_else: {
|
||||
return production_for<else_cont>();
|
||||
}
|
||||
default: {
|
||||
return production_for<empty>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(else_continuation) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_if: {
|
||||
return production_for<else_if>();
|
||||
}
|
||||
default: {
|
||||
return production_for<else_only>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(case_item_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
if (token1.keyword == parse_keyword_t::kw_case)
|
||||
return production_for<case_items>();
|
||||
else if (token1.type == parse_token_type_end)
|
||||
return production_for<blank_line>();
|
||||
else
|
||||
return production_for<empty>();
|
||||
}
|
||||
|
||||
RESOLVE(not_statement) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_not:
|
||||
return production_for<nots>();
|
||||
case parse_keyword_t::kw_exclam:
|
||||
return production_for<exclams>();
|
||||
default:
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(andor_job_list) {
|
||||
UNUSED(out_tag);
|
||||
|
||||
if (token1.type == parse_token_type_end) {
|
||||
return production_for<empty_line>();
|
||||
} else if (token1.keyword == parse_keyword_t::kw_and ||
|
||||
token1.keyword == parse_keyword_t::kw_or) {
|
||||
// Check that the argument to and/or is a string that's not help. Otherwise it's either 'and
|
||||
// --help' or a naked 'and', and not part of this list.
|
||||
if (token2.type == parse_token_type_string && !token2.is_help_argument) {
|
||||
return production_for<andor_job>();
|
||||
}
|
||||
}
|
||||
// All other cases end the list.
|
||||
return production_for<empty>();
|
||||
}
|
||||
|
||||
RESOLVE(argument_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
return production_for<arg>();
|
||||
}
|
||||
default: {
|
||||
return production_for<empty>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(freestanding_argument_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string: {
|
||||
return production_for<arg>();
|
||||
}
|
||||
case parse_token_type_end: {
|
||||
return production_for<semicolon>();
|
||||
}
|
||||
default: {
|
||||
return production_for<empty>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(block_header) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_for: {
|
||||
return production_for<forh>();
|
||||
}
|
||||
case parse_keyword_t::kw_while: {
|
||||
return production_for<whileh>();
|
||||
}
|
||||
case parse_keyword_t::kw_function: {
|
||||
return production_for<funch>();
|
||||
}
|
||||
case parse_keyword_t::kw_begin: {
|
||||
return production_for<beginh>();
|
||||
}
|
||||
default: {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(variable_assignments) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
if (token1.may_be_variable_assignment) {
|
||||
assert(token1.type == parse_token_type_string);
|
||||
return production_for<var>();
|
||||
}
|
||||
return production_for<empty>();
|
||||
}
|
||||
|
||||
RESOLVE(decorated_statement) {
|
||||
// and/or are typically parsed in job_conjunction at the beginning of a job
|
||||
// However they may be reached here through e.g. true && and false.
|
||||
// Refuse to parse them as a command except for --help. See #6089.
|
||||
if ((token1.keyword == parse_keyword_t::kw_and || token1.keyword == parse_keyword_t::kw_or) &&
|
||||
!token2.is_help_argument) {
|
||||
return NO_PRODUCTION;
|
||||
}
|
||||
|
||||
// If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the
|
||||
// second token is not a string, then this is a naked 'command' and we should execute it as
|
||||
// undecorated.
|
||||
if (token2.type != parse_token_type_string || token2.has_dash_prefix) {
|
||||
return production_for<plains>();
|
||||
}
|
||||
|
||||
switch (token1.keyword) {
|
||||
case parse_keyword_t::kw_command: {
|
||||
*out_tag = parse_statement_decoration_command;
|
||||
return production_for<cmds>();
|
||||
}
|
||||
case parse_keyword_t::kw_builtin: {
|
||||
*out_tag = parse_statement_decoration_builtin;
|
||||
return production_for<builtins>();
|
||||
}
|
||||
case parse_keyword_t::kw_exec: {
|
||||
*out_tag = parse_statement_decoration_exec;
|
||||
return production_for<execs>();
|
||||
}
|
||||
default: {
|
||||
*out_tag = parse_statement_decoration_none;
|
||||
return production_for<plains>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(arguments_or_redirections_list) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_string:
|
||||
return production_for<arg>();
|
||||
case parse_token_type_redirection:
|
||||
return production_for<redir>();
|
||||
default:
|
||||
return production_for<empty>();
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(optional_newlines) {
|
||||
UNUSED(token2);
|
||||
UNUSED(out_tag);
|
||||
if (token1.is_newline) return production_for<newlines>();
|
||||
return production_for<empty>();
|
||||
}
|
||||
|
||||
RESOLVE(optional_background) {
|
||||
UNUSED(token2);
|
||||
|
||||
switch (token1.type) {
|
||||
case parse_token_type_background: {
|
||||
*out_tag = parse_background;
|
||||
return production_for<background>();
|
||||
}
|
||||
default: {
|
||||
*out_tag = parse_no_background;
|
||||
return production_for<empty>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOLVE(optional_time) {
|
||||
if (token1.keyword == parse_keyword_t::kw_time && !token2.is_help_argument) {
|
||||
*out_tag = parse_optional_time_time;
|
||||
return production_for<time>();
|
||||
}
|
||||
*out_tag = parse_optional_time_no_time;
|
||||
return production_for<empty>();
|
||||
}
|
||||
|
||||
const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
|
||||
const parse_token_t &input1,
|
||||
const parse_token_t &input2,
|
||||
parse_node_tag_t *out_tag) {
|
||||
// this is **extremely** chatty
|
||||
FLOGF(parse_productions_chatty, L"Resolving production for %ls with input token <%ls>",
|
||||
token_type_description(node_type), input1.describe().c_str());
|
||||
|
||||
// Fetch the function to resolve the list of productions.
|
||||
const production_element_t *(*resolver)(const parse_token_t &input1, //!OCLINT(unused param)
|
||||
const parse_token_t &input2, //!OCLINT(unused param)
|
||||
parse_node_tag_t *out_tag) = //!OCLINT(unused param)
|
||||
nullptr;
|
||||
switch (node_type) {
|
||||
// Handle all of our grammar elements
|
||||
#define ELEM(SYM) \
|
||||
case (symbol_##SYM): \
|
||||
resolver = SYM::resolve; \
|
||||
break;
|
||||
#include "parse_grammar_elements.inc"
|
||||
|
||||
// Everything else is an error.
|
||||
case parse_token_type_string:
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_redirection:
|
||||
case parse_token_type_background:
|
||||
case parse_token_type_andand:
|
||||
case parse_token_type_oror:
|
||||
case parse_token_type_end:
|
||||
case parse_token_type_terminate: {
|
||||
FLOGF(error, L"Terminal token type %ls passed to %s", token_type_description(node_type),
|
||||
__FUNCTION__);
|
||||
PARSER_DIE();
|
||||
break;
|
||||
}
|
||||
case parse_special_type_parse_error:
|
||||
case parse_special_type_tokenizer_error:
|
||||
case parse_special_type_comment: {
|
||||
FLOGF(error, L"Special type %ls passed to %s\n", token_type_description(node_type),
|
||||
__FUNCTION__);
|
||||
PARSER_DIE();
|
||||
break;
|
||||
}
|
||||
case token_type_invalid: {
|
||||
FLOGF(error, L"token_type_invalid passed to %s", __FUNCTION__);
|
||||
PARSER_DIE();
|
||||
break;
|
||||
}
|
||||
}
|
||||
PARSE_ASSERT(resolver != nullptr);
|
||||
|
||||
const production_element_t *result = resolver(input1, input2, out_tag);
|
||||
if (result == nullptr) {
|
||||
FLOGF(parse_productions, L"Node type '%ls' has no production for input '%ls' (in %s)",
|
||||
token_type_description(node_type), input1.describe().c_str(), __FUNCTION__);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
// Programmatic representation of fish code.
|
||||
#ifndef FISH_PARSE_TREE_CONSTRUCTION_H
|
||||
#define FISH_PARSE_TREE_CONSTRUCTION_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "parse_constants.h"
|
||||
|
||||
struct parse_token_t;
|
||||
|
||||
namespace parse_productions {
|
||||
|
||||
// A production is an array of unsigned char. Symbols are encoded directly as their symbol value.
|
||||
// Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together
|
||||
// keywords and symbols.
|
||||
typedef uint8_t production_element_t;
|
||||
|
||||
/// Resolve the type from a production element.
|
||||
inline parse_token_type_t production_element_type(production_element_t elem) {
|
||||
if (elem > LAST_TOKEN_OR_SYMBOL) {
|
||||
return parse_token_type_string;
|
||||
} else {
|
||||
return static_cast<parse_token_type_t>(elem);
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the keyword from a production element.
|
||||
inline parse_keyword_t production_element_keyword(production_element_t elem) {
|
||||
if (elem > LAST_TOKEN_OR_SYMBOL) {
|
||||
// First keyword is LAST_TOKEN_OR_SYMBOL + 1.
|
||||
return static_cast<parse_keyword_t>(elem - LAST_TOKEN_OR_SYMBOL - 1);
|
||||
} else {
|
||||
return parse_keyword_t::none;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if an element is valid.
|
||||
inline bool production_element_is_valid(production_element_t elem) {
|
||||
return elem != token_type_invalid;
|
||||
}
|
||||
|
||||
/// Fetch a production. We are passed two input tokens. The first input token is guaranteed to not
|
||||
/// be invalid; the second token may be invalid if there's no more tokens. We may also set flags.
|
||||
const production_element_t *production_for_token(parse_token_type_t node_type,
|
||||
const parse_token_t &input1,
|
||||
const parse_token_t &input2, uint8_t *out_tag);
|
||||
} // namespace parse_productions
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
188
src/parse_tree.h
188
src/parse_tree.h
@ -6,30 +6,19 @@
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common.h"
|
||||
#include "maybe.h"
|
||||
#include "parse_constants.h"
|
||||
#include "parse_grammar.h"
|
||||
#include "tokenizer.h"
|
||||
|
||||
class parse_node_tree_t;
|
||||
|
||||
typedef uint32_t node_offset_t;
|
||||
|
||||
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
|
||||
|
||||
typedef uint32_t source_offset_t;
|
||||
|
||||
constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
|
||||
|
||||
struct source_range_t {
|
||||
uint32_t start;
|
||||
uint32_t length;
|
||||
};
|
||||
|
||||
/// A struct representing the token type that we use internally.
|
||||
struct parse_token_t {
|
||||
enum parse_token_type_t type; // The type of the token as represented by the parser
|
||||
@ -41,38 +30,35 @@ struct parse_token_t {
|
||||
bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline.
|
||||
bool preceding_escaped_nl{false}; // Whether there was an escaped newline preceding this token.
|
||||
bool may_be_variable_assignment{false}; // Hackish: whether this token is a string like FOO=bar
|
||||
tokenizer_error_t tok_error{tokenizer_error_t::none}; // If this is a tokenizer error, that error.
|
||||
source_offset_t source_start{SOURCE_OFFSET_INVALID};
|
||||
source_offset_t source_length{0};
|
||||
|
||||
/// \return the source range.
|
||||
/// Note the start may be invalid.
|
||||
source_range_t range() const {
|
||||
return source_range_t{source_start, source_length};
|
||||
}
|
||||
|
||||
/// \return whether we are a string with the dash prefix set.
|
||||
bool is_dash_prefix_string() const {
|
||||
return type == parse_token_type_string && has_dash_prefix;
|
||||
}
|
||||
|
||||
wcstring describe() const;
|
||||
wcstring user_presentable_description() const;
|
||||
|
||||
constexpr parse_token_t(parse_token_type_t type) : type(type) {}
|
||||
};
|
||||
|
||||
enum {
|
||||
parse_flag_none = 0,
|
||||
|
||||
/// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
|
||||
/// disconnected trees. This is intended to be used by syntax highlighting.
|
||||
parse_flag_continue_after_error = 1 << 0,
|
||||
/// Include comment tokens.
|
||||
parse_flag_include_comments = 1 << 1,
|
||||
/// Indicate that the tokenizer should accept incomplete tokens */
|
||||
parse_flag_accept_incomplete_tokens = 1 << 2,
|
||||
/// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
|
||||
/// tree where some nodes may have no productions.
|
||||
parse_flag_leave_unterminated = 1 << 3,
|
||||
/// Indicate that the parser should generate job_list entries for blank lines.
|
||||
parse_flag_show_blank_lines = 1 << 4
|
||||
};
|
||||
typedef unsigned int parse_tree_flags_t;
|
||||
|
||||
wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src);
|
||||
/// Return a new parse token, advancing the tokenizer.
|
||||
parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage);
|
||||
|
||||
const wchar_t *token_type_description(parse_token_type_t type);
|
||||
const wchar_t *keyword_description(parse_keyword_t type);
|
||||
|
||||
parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err);
|
||||
|
||||
// Node flags.
|
||||
enum {
|
||||
/// Flag indicating that the node has associated comment nodes.
|
||||
@ -88,145 +74,29 @@ typedef uint8_t parse_node_flags_t;
|
||||
/// Node-type specific tag value.
|
||||
typedef uint8_t parse_node_tag_t;
|
||||
|
||||
/// Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields
|
||||
/// is important.
|
||||
class parse_node_t {
|
||||
public:
|
||||
// Start in the source code.
|
||||
source_offset_t source_start{SOURCE_OFFSET_INVALID};
|
||||
// Length of our range in the source code.
|
||||
source_offset_t source_length{0};
|
||||
// Parent
|
||||
node_offset_t parent{NODE_OFFSET_INVALID};
|
||||
// Children
|
||||
node_offset_t child_start{0};
|
||||
// Number of children.
|
||||
uint8_t child_count{0};
|
||||
// Type of the node.
|
||||
enum parse_token_type_t type;
|
||||
// Keyword associated with node.
|
||||
enum parse_keyword_t keyword { parse_keyword_t::none };
|
||||
// Node flags.
|
||||
parse_node_flags_t flags : 4;
|
||||
// This is used to store e.g. the statement decoration.
|
||||
parse_node_tag_t tag : 4;
|
||||
// Description
|
||||
wcstring describe() const;
|
||||
|
||||
// Constructor
|
||||
explicit parse_node_t(parse_token_type_t ty) : type(ty), flags(0), tag(0) {}
|
||||
|
||||
node_offset_t child_offset(node_offset_t which) const {
|
||||
PARSE_ASSERT(which < child_count);
|
||||
return child_start + which;
|
||||
}
|
||||
|
||||
/// Indicate if this node has a range of source code associated with it.
|
||||
bool has_source() const {
|
||||
// Should never have a nonempty range with an invalid offset.
|
||||
assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0);
|
||||
return this->source_length > 0;
|
||||
}
|
||||
|
||||
/// Indicate if the node has comment nodes.
|
||||
bool has_comments() const { return this->flags & parse_node_flag_has_comments; }
|
||||
|
||||
/// Indicates if we have a preceding escaped newline.
|
||||
bool has_preceding_escaped_newline() const {
|
||||
return this->flags & parse_node_flag_preceding_escaped_nl;
|
||||
}
|
||||
|
||||
source_range_t source_range() const {
|
||||
assert(has_source());
|
||||
return {source_start, source_length};
|
||||
}
|
||||
|
||||
/// Gets source for the node, or the empty string if it has no source.
|
||||
wcstring get_source(const wcstring &str) const {
|
||||
if (!has_source())
|
||||
return wcstring();
|
||||
else
|
||||
return wcstring(str, this->source_start, this->source_length);
|
||||
}
|
||||
|
||||
/// Returns whether the given location is within the source range or at its end.
|
||||
bool location_in_or_at_end_of_source_range(size_t loc) const {
|
||||
return has_source() && source_start <= loc && loc - source_start <= source_length;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
class tnode_t;
|
||||
|
||||
/// The parse tree itself.
|
||||
class parse_node_tree_t : public std::vector<parse_node_t> {
|
||||
public:
|
||||
parse_node_tree_t() {}
|
||||
parse_node_tree_t(parse_node_tree_t &&) = default;
|
||||
parse_node_tree_t &operator=(parse_node_tree_t &&) = default;
|
||||
parse_node_tree_t(const parse_node_tree_t &) = delete; // no copying
|
||||
parse_node_tree_t &operator=(const parse_node_tree_t &) = delete; // no copying
|
||||
|
||||
// Get the node corresponding to a child of the given node, or NULL if there is no such child.
|
||||
// If expected_type is provided, assert that the node has that type.
|
||||
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which,
|
||||
parse_token_type_t expected_type = token_type_invalid) const;
|
||||
|
||||
// Find the first direct child of the given node of the given type. asserts on failure.
|
||||
const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const;
|
||||
|
||||
template <typename Type>
|
||||
tnode_t<Type> find_child(const parse_node_t &parent) const;
|
||||
|
||||
// Get the node corresponding to the parent of the given node, or NULL if there is no such
|
||||
// child. If expected_type is provided, only returns the parent if it is of that type. Note the
|
||||
// asymmetry: get_child asserts since the children are known, but get_parent does not, since the
|
||||
// parent may not be known.
|
||||
const parse_node_t *get_parent(const parse_node_t &node,
|
||||
parse_token_type_t expected_type = token_type_invalid) const;
|
||||
|
||||
// Finds a node containing the given source location. If 'parent' is not NULL, it must be an
|
||||
// ancestor.
|
||||
const parse_node_t *find_node_matching_source_location(parse_token_type_t type,
|
||||
size_t source_loc,
|
||||
const parse_node_t *parent) const;
|
||||
// Utilities
|
||||
|
||||
/// Given a node, return all of its comment nodes.
|
||||
std::vector<tnode_t<grammar::comment>> comment_nodes_for_node(const parse_node_t &parent) const;
|
||||
|
||||
private:
|
||||
template <typename Type>
|
||||
friend class tnode_t;
|
||||
/// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return
|
||||
/// the next element of the given type in that list, and the tail (by reference). Returns NULL
|
||||
/// if we've exhausted the list.
|
||||
const parse_node_t *next_node_in_node_list(const parse_node_t &node_list,
|
||||
parse_token_type_t entry_type,
|
||||
const parse_node_t **list_tail) const;
|
||||
};
|
||||
|
||||
/// The big entry point. Parse a string, attempting to produce a tree for the given goal type.
|
||||
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags,
|
||||
parse_node_tree_t *output, parse_error_list_t *errors,
|
||||
parse_token_type_t goal = symbol_job_list);
|
||||
namespace ast {
|
||||
class ast_t;
|
||||
}
|
||||
|
||||
/// A type wrapping up a parse tree and the original source behind it.
|
||||
struct parsed_source_t {
|
||||
wcstring src;
|
||||
parse_node_tree_t tree;
|
||||
std::unique_ptr<ast::ast_t> ast;
|
||||
|
||||
parsed_source_t(wcstring s, parse_node_tree_t t) : src(std::move(s)), tree(std::move(t)) {}
|
||||
parsed_source_t(wcstring s, ast::ast_t &&ast);
|
||||
~parsed_source_t();
|
||||
|
||||
parsed_source_t(const parsed_source_t &) = delete;
|
||||
void operator=(const parsed_source_t &) = delete;
|
||||
parsed_source_t(parsed_source_t &&) = default;
|
||||
parsed_source_t &operator=(parsed_source_t &&) = default;
|
||||
parsed_source_t(parsed_source_t &&) = delete;
|
||||
parsed_source_t &operator=(parsed_source_t &&) = delete;
|
||||
};
|
||||
|
||||
/// Return a shared pointer to parsed_source_t, or null on failure.
|
||||
/// If parse_flag_continue_after_error is not set, this will return null on any error.
|
||||
using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
|
||||
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors,
|
||||
parse_token_type_t goal = symbol_job_list);
|
||||
parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
|
||||
parse_error_list_t *errors);
|
||||
|
||||
/// Error message for improper use of the exec builtin.
|
||||
#define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline")
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "ast.h"
|
||||
#include "builtin.h"
|
||||
#include "common.h"
|
||||
#include "expand.h"
|
||||
@ -22,7 +23,6 @@
|
||||
#include "parse_constants.h"
|
||||
#include "parse_util.h"
|
||||
#include "parser.h"
|
||||
#include "tnode.h"
|
||||
#include "tokenizer.h"
|
||||
#include "wcstringutil.h"
|
||||
#include "wildcard.h"
|
||||
@ -565,121 +565,16 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
|
||||
return result;
|
||||
}
|
||||
|
||||
/// We are given a parse tree, the index of a node within the tree, its indent, and a vector of
|
||||
/// indents the same size as the original source string. Set the indent correspdonding to the node's
|
||||
/// source range, if appropriate.
|
||||
///
|
||||
/// trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false <ret>'
|
||||
/// then we have an if node with an empty job list (without source) but we want the last line to be
|
||||
/// indented anyways.
|
||||
///
|
||||
/// switch statements also indent.
|
||||
///
|
||||
/// max_visited_node_idx is the largest index we visited.
|
||||
static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx,
|
||||
int node_indent, parse_token_type_t parent_type,
|
||||
std::vector<int> *indents, int *trailing_indent,
|
||||
node_offset_t *max_visited_node_idx) {
|
||||
// Guard against incomplete trees.
|
||||
if (node_idx > tree.size()) return;
|
||||
|
||||
// Update max_visited_node_idx.
|
||||
if (node_idx > *max_visited_node_idx) *max_visited_node_idx = node_idx;
|
||||
|
||||
// We could implement this by utilizing the fish grammar. But there's an easy trick instead:
|
||||
// almost everything that wraps a job list should be indented by 1. So just find all of the job
|
||||
// lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other
|
||||
// exception is job_list itself: a job_list is a job and a job_list, and we want that child list
|
||||
// to be indented the same as the parent. So just find all job_lists whose parent is not a
|
||||
// job_list, and increment their indent by 1. We also want to treat andor_job_list like
|
||||
// job_lists.
|
||||
const parse_node_t &node = tree.at(node_idx);
|
||||
const parse_token_type_t node_type = node.type;
|
||||
|
||||
// Increment the indent if we are either a root job_list, or root case_item_list.
|
||||
const bool is_root_job_list = node_type != parent_type && (node_type == symbol_job_list ||
|
||||
node_type == symbol_andor_job_list);
|
||||
const bool is_root_case_item_list =
|
||||
node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
|
||||
if (is_root_job_list || is_root_case_item_list) {
|
||||
node_indent += 1;
|
||||
}
|
||||
|
||||
// If we have source, store the trailing indent unconditionally. If we do not have source, store
|
||||
// the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job
|
||||
// lists from affecting the trailing indent. For example, code like this:
|
||||
//
|
||||
// if foo
|
||||
//
|
||||
// will be parsed as this:
|
||||
//
|
||||
// job_list
|
||||
// job
|
||||
// if_statement
|
||||
// job [if]
|
||||
// job_list [empty]
|
||||
// job_list [empty]
|
||||
//
|
||||
// There's two "terminal" job lists, and we want the innermost one.
|
||||
//
|
||||
// Note we are relying on the fact that nodes are in the same order as the source, i.e. an
|
||||
// in-order traversal of the node tree also traverses the source from beginning to end.
|
||||
if (node.has_source() || node_indent > *trailing_indent) {
|
||||
*trailing_indent = node_indent;
|
||||
}
|
||||
|
||||
// Store the indent into the indent array.
|
||||
if (node.source_start != SOURCE_OFFSET_INVALID && node.source_start < indents->size()) {
|
||||
if (node.has_source()) {
|
||||
// A normal non-empty node. Store the indent unconditionally.
|
||||
indents->at(node.source_start) = node_indent;
|
||||
} else {
|
||||
// An empty node. We have a source offset but no source length. This can come about when
|
||||
// a node is legitimately empty:
|
||||
//
|
||||
// while true; end
|
||||
//
|
||||
// The job_list inside the while loop is empty. It still has a source offset (at the end
|
||||
// of the while statement) but no source extent. We still need to capture that indent,
|
||||
// because there may be comments inside:
|
||||
//
|
||||
// while true
|
||||
// # loop forever
|
||||
// end
|
||||
//
|
||||
// The 'loop forever' comment must be indented, by virtue of storing the indent.
|
||||
//
|
||||
// Now consider what happens if we remove the end:
|
||||
//
|
||||
// while true
|
||||
// # loop forever
|
||||
//
|
||||
// Now both the job_list and end_command are unmaterialized. However, we want the indent
|
||||
// to be of the job_list and not the end_command. Therefore, we only store the indent
|
||||
// if it's bigger.
|
||||
if (node_indent > indents->at(node.source_start)) {
|
||||
indents->at(node.source_start) = node_indent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recursive to all our children.
|
||||
for (node_offset_t idx = 0; idx < node.child_count; idx++) {
|
||||
// Note we pass our type to our child, which becomes its parent node type.
|
||||
compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents,
|
||||
trailing_indent, max_visited_node_idx);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> parse_util_compute_indents(const wcstring &src) {
|
||||
// Make a vector the same size as the input string, which contains the indents. Initialize them
|
||||
// to -1.
|
||||
// to 0.
|
||||
static wcstring ssss;
|
||||
ssss = src;
|
||||
const size_t src_size = src.size();
|
||||
std::vector<int> indents(src_size, -1);
|
||||
std::vector<int> indents(src_size, 0);
|
||||
|
||||
// Simple trick: if our source does not contain a newline, then all indents are 0.
|
||||
if (src.find('\n') == wcstring::npos) {
|
||||
std::fill(indents.begin(), indents.end(), 0);
|
||||
return indents;
|
||||
}
|
||||
|
||||
@ -687,78 +582,141 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
|
||||
// the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
|
||||
// foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
|
||||
// were a case item list.
|
||||
parse_node_tree_t tree;
|
||||
parse_tree_from_string(src,
|
||||
parse_flag_continue_after_error | parse_flag_include_comments |
|
||||
parse_flag_accept_incomplete_tokens,
|
||||
&tree, nullptr /* errors */);
|
||||
using namespace ast;
|
||||
auto ast =
|
||||
ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
|
||||
parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);
|
||||
|
||||
// Start indenting at the first node. If we have a parse error, we'll have to start indenting
|
||||
// from the top again.
|
||||
node_offset_t start_node_idx = 0;
|
||||
int last_trailing_indent = 0;
|
||||
// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
|
||||
// visiting its children.
|
||||
struct indent_visitor_t {
|
||||
explicit indent_visitor_t(std::vector<int> &indents) : indents(indents) {}
|
||||
|
||||
while (start_node_idx < tree.size()) {
|
||||
// The indent that we'll get for the last line.
|
||||
int trailing_indent = 0;
|
||||
void visit(const node_t &node) {
|
||||
int inc = 0;
|
||||
int dec = 0;
|
||||
switch (node.type) {
|
||||
case type_t::job_list:
|
||||
case type_t::andor_job_list:
|
||||
// Job lists are never unwound.
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
break;
|
||||
|
||||
// Biggest offset we visited.
|
||||
node_offset_t max_visited_node_idx = 0;
|
||||
// Increment indents for conditions in headers (#1665).
|
||||
case type_t::job_conjunction:
|
||||
if (node.parent->type == type_t::while_header ||
|
||||
node.parent->type == type_t::if_clause) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
// Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which
|
||||
// will prevent the really-root job list from indenting.
|
||||
compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list,
|
||||
&indents, &trailing_indent, &max_visited_node_idx);
|
||||
// Increment indents for piped remainders.
|
||||
case type_t::job_continuation_list:
|
||||
if (node.as<job_continuation_list_t>()->count() > 0) {
|
||||
inc = 1;
|
||||
dec = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
// We may have more to indent. The trailing indent becomes our current indent. Start at the
|
||||
// node after the last we visited.
|
||||
last_trailing_indent = trailing_indent;
|
||||
start_node_idx = max_visited_node_idx + 1;
|
||||
}
|
||||
case type_t::case_item_list:
|
||||
// Here's a hack. Consider:
|
||||
// switch abc
|
||||
// cas
|
||||
//
|
||||
// fish will see that 'cas' is not valid inside a switch statement because it is
|
||||
// not "case". It will then unwind back to the top level job list, producing a
|
||||
// parse tree like:
|
||||
//
|
||||
// job_list
|
||||
// switch_job
|
||||
// <err>
|
||||
// normal_job
|
||||
// cas
|
||||
//
|
||||
// And so we will think that the 'cas' job is at the same level as the switch.
|
||||
// To address this, if we see that the switch statement was not closed, do not
|
||||
// decrement the indent afterwards.
|
||||
inc = 1;
|
||||
dec = node.parent->as<switch_statement_t>()->end.unsourced ? 0 : 1;
|
||||
break;
|
||||
|
||||
// Handle comments. Each comment node has a parent (which is whatever the top of the symbol
|
||||
// stack was when the comment was encountered). So the source range of the comment has the same
|
||||
// indent as its parent.
|
||||
const size_t tree_size = tree.size();
|
||||
for (node_offset_t i = 0; i < tree_size; i++) {
|
||||
const parse_node_t &node = tree.at(i);
|
||||
if (node.type == parse_special_type_comment && node.has_source() &&
|
||||
node.parent < tree_size) {
|
||||
const parse_node_t &parent = tree.at(node.parent);
|
||||
if (parent.source_start != SOURCE_OFFSET_INVALID) {
|
||||
indents.at(node.source_start) = indents.at(parent.source_start);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
indent += inc;
|
||||
|
||||
// Now apply the indents. The indents array has -1 for places where the indent does not change,
|
||||
// so start at each value and extend it along the run of -1s.
|
||||
int last_indent = 0;
|
||||
for (size_t i = 0; i < src_size; i++) {
|
||||
int this_indent = indents.at(i);
|
||||
if (this_indent < 0) {
|
||||
indents.at(i) = last_indent;
|
||||
// If we increased the indentation, apply it to the remainder of the string, even if the
|
||||
// list is empty. For example (where _ represents the cursor):
|
||||
//
|
||||
// if foo
|
||||
// _
|
||||
//
|
||||
// we want to indent the newline.
|
||||
if (inc) {
|
||||
std::fill(indents.begin() + last_leaf_end, indents.end(), indent);
|
||||
last_indent = indent;
|
||||
}
|
||||
|
||||
// If this is a leaf node, apply the current indentation.
|
||||
if (node.category == category_t::leaf) {
|
||||
auto range = node.source_range();
|
||||
if (range.length > 0) {
|
||||
// Fill to the end.
|
||||
// Later nodes will come along and overwrite these.
|
||||
std::fill(indents.begin() + range.start, indents.end(), indent);
|
||||
last_leaf_end = range.start + range.length;
|
||||
last_indent = indent;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
node_visitor(*this).accept_children_of(&node);
|
||||
indent -= dec;
|
||||
}
|
||||
|
||||
// The one-past-the-last index of the most recently encountered leaf node.
|
||||
// We use this to populate the indents even if there's no tokens in the range.
|
||||
size_t last_leaf_end{0};
|
||||
|
||||
// The last indent which we assigned.
|
||||
int last_indent{-1};
|
||||
|
||||
// List of indents, which we populate.
|
||||
std::vector<int> &indents;
|
||||
|
||||
// Initialize our starting indent to -1, as our top-level node is a job list which
|
||||
// will immediately increment it.
|
||||
int indent{-1};
|
||||
};
|
||||
|
||||
indent_visitor_t iv(indents);
|
||||
node_visitor(iv).accept(ast.top());
|
||||
|
||||
// All newlines now get the *next* indent.
|
||||
// For example, in this code:
|
||||
// if true
|
||||
// stuff
|
||||
// the newline "belongs" to the if statement as it ends its job.
|
||||
// But when rendered, it visually belongs to the job list.
|
||||
|
||||
// FIXME: if there's a middle newline, we will indent it wrongly.
|
||||
// For example:
|
||||
// if true
|
||||
//
|
||||
// end
|
||||
// Here the middle newline should be indented by 1.
|
||||
|
||||
size_t idx = src_size;
|
||||
int next_indent = iv.last_indent;
|
||||
while (idx--) {
|
||||
if (src.at(idx) == L'\n') {
|
||||
indents.at(idx) = next_indent;
|
||||
} else {
|
||||
// New indent level.
|
||||
last_indent = this_indent;
|
||||
// Make all whitespace before a token have the new level. This avoid using the wrong
|
||||
// indentation level if a new line starts with whitespace.
|
||||
size_t prev_char_idx = i;
|
||||
while (prev_char_idx--) {
|
||||
if (!std::wcschr(L" \n\t\r", src.at(prev_char_idx))) break;
|
||||
indents.at(prev_char_idx) = last_indent;
|
||||
}
|
||||
next_indent = indents.at(idx);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly
|
||||
// indented even if it is empty.
|
||||
size_t suffix_idx = src_size;
|
||||
while (suffix_idx--) {
|
||||
if (!std::wcschr(L" \n\t\r", src.at(suffix_idx))) break;
|
||||
indents.at(suffix_idx) = last_trailing_indent;
|
||||
}
|
||||
|
||||
return indents;
|
||||
}
|
||||
|
||||
@ -790,17 +748,13 @@ bool parse_util_argument_is_help(const wchar_t *s) {
|
||||
return std::wcscmp(L"-h", s) == 0 || std::wcscmp(L"--help", s) == 0;
|
||||
}
|
||||
|
||||
/// Check if the first argument under the given node is --help.
|
||||
static bool first_argument_is_help(tnode_t<grammar::plain_statement> statement,
|
||||
const wcstring &src) {
|
||||
bool is_help = false;
|
||||
auto arg_nodes = get_argument_nodes(statement.child<1>());
|
||||
if (!arg_nodes.empty()) {
|
||||
// Check the first argument only.
|
||||
wcstring first_arg_src = arg_nodes.front().get_source(src);
|
||||
is_help = parse_util_argument_is_help(first_arg_src.c_str());
|
||||
// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
|
||||
// there are no arguments.
|
||||
const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) {
|
||||
for (const ast::argument_or_redirection_t &v : list) {
|
||||
if (v.is_argument()) return &v.argument();
|
||||
}
|
||||
return is_help;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Given a wide character immediately after a dollar sign, return the appropriate error message.
|
||||
@ -956,11 +910,13 @@ static parser_test_error_bits_t detect_dollar_cmdsub_errors(size_t arg_src_offse
|
||||
/// Test if this argument contains any errors. Detected errors include syntax errors in command
|
||||
/// substitutions, improperly escaped characters and improper use of the variable expansion
|
||||
/// operator.
|
||||
parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::argument> node,
|
||||
parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg,
|
||||
const wcstring &arg_src,
|
||||
parse_error_list_t *out_errors) {
|
||||
assert(node.has_source() && "argument has no source");
|
||||
auto source_start = node.source_range()->start;
|
||||
maybe_t<source_range_t> source_range = arg.try_source_range();
|
||||
if (!source_range.has_value()) return 0;
|
||||
|
||||
size_t source_start = source_range->start;
|
||||
int err = 0;
|
||||
wchar_t *paran_begin, *paran_end;
|
||||
int do_loop = 1;
|
||||
@ -1054,10 +1010,10 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::a
|
||||
}
|
||||
|
||||
/// Given that the job given by node should be backgrounded, return true if we detect any errors.
|
||||
static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
|
||||
static bool detect_errors_in_backgrounded_job(const ast::job_t &job,
|
||||
parse_error_list_t *parse_errors) {
|
||||
namespace g = grammar;
|
||||
auto source_range = job.source_range();
|
||||
using namespace ast;
|
||||
auto source_range = job.try_source_range();
|
||||
if (!source_range) return false;
|
||||
|
||||
bool errored = false;
|
||||
@ -1066,54 +1022,77 @@ static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
|
||||
// foo & ; or bar
|
||||
// if foo & ; end
|
||||
// while foo & ; end
|
||||
auto job_conj = job.try_get_parent<g::job_conjunction>();
|
||||
if (job_conj.try_get_parent<g::if_clause>()) {
|
||||
errored = append_syntax_error(parse_errors, source_range->start,
|
||||
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
|
||||
} else if (job_conj.try_get_parent<g::while_header>()) {
|
||||
errored = append_syntax_error(parse_errors, source_range->start,
|
||||
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
|
||||
} else if (auto jlist = job_conj.try_get_parent<g::job_list>()) {
|
||||
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
|
||||
// Fetch the job list and then advance it by one.
|
||||
auto first_jconj = jlist.next_in_list<g::job_conjunction>();
|
||||
assert(first_jconj == job.try_get_parent<g::job_conjunction>() &&
|
||||
"Expected first job to be the node we found");
|
||||
(void)first_jconj;
|
||||
const job_conjunction_t *job_conj = job.parent->try_as<job_conjunction_t>();
|
||||
if (!job_conj) return false;
|
||||
|
||||
// Try getting the next job's decorator.
|
||||
if (auto next_job_dec = jlist.next_in_list<g::job_decorator>()) {
|
||||
// The next job is indeed a boolean statement.
|
||||
parse_job_decoration_t bool_type = bool_statement_type(next_job_dec);
|
||||
if (bool_type == parse_job_decoration_and) {
|
||||
errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
|
||||
BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and");
|
||||
} else if (bool_type == parse_job_decoration_or) {
|
||||
errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
|
||||
BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or");
|
||||
if (job_conj->parent->try_as<if_clause_t>()) {
|
||||
errored = append_syntax_error(parse_errors, source_range->start,
|
||||
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
|
||||
} else if (job_conj->parent->try_as<while_header_t>()) {
|
||||
errored = append_syntax_error(parse_errors, source_range->start,
|
||||
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
|
||||
} else if (const ast::job_list_t *jlist = job_conj->parent->try_as<ast::job_list_t>()) {
|
||||
// This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
|
||||
// Find the index of ourselves in the job list.
|
||||
size_t index;
|
||||
for (index = 0; index < jlist->count(); index++) {
|
||||
if (jlist->at(index) == job_conj) break;
|
||||
}
|
||||
assert(index < jlist->count() && "Should have found the job in the list");
|
||||
|
||||
// Try getting the next job and check its decorator.
|
||||
if (const job_conjunction_t *next = jlist->at(index + 1)) {
|
||||
if (const keyword_base_t *deco = next->decorator.contents.get()) {
|
||||
assert(
|
||||
(deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) &&
|
||||
"Unexpected decorator keyword");
|
||||
const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or");
|
||||
errored = append_syntax_error(parse_errors, deco->source_range().start,
|
||||
BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return errored;
|
||||
}
|
||||
|
||||
static bool detect_errors_in_plain_statement(const wcstring &buff_src,
|
||||
const parse_node_tree_t &node_tree,
|
||||
tnode_t<grammar::plain_statement> pst,
|
||||
parse_error_list_t *parse_errors) {
|
||||
using namespace grammar;
|
||||
static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
|
||||
const ast::decorated_statement_t &dst,
|
||||
parse_error_list_t *parse_errors) {
|
||||
using namespace ast;
|
||||
bool errored = false;
|
||||
auto source_start = pst.source_range()->start;
|
||||
auto source_start = dst.source_range().start;
|
||||
const parse_statement_decoration_t decoration = dst.decoration();
|
||||
|
||||
// In a few places below, we want to know if we are in a pipeline.
|
||||
tnode_t<statement> st = pst.try_get_parent<decorated_statement>().try_get_parent<statement>();
|
||||
pipeline_position_t pipe_pos = get_pipeline_position(st);
|
||||
bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
|
||||
// Determine if the first argument is help.
|
||||
bool first_arg_is_help = false;
|
||||
if (const auto *arg = get_first_arg(dst.args_or_redirs)) {
|
||||
wcstring arg_src = arg->source(buff_src);
|
||||
first_arg_is_help = parse_util_argument_is_help(arg_src.c_str());
|
||||
}
|
||||
|
||||
// We need to know the decoration.
|
||||
const enum parse_statement_decoration_t decoration = get_decoration(pst);
|
||||
// Get the statement we are part of.
|
||||
const statement_t *st = dst.parent->as<statement_t>();
|
||||
|
||||
// Walk up to the job.
|
||||
const ast::job_t *job = nullptr;
|
||||
for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) {
|
||||
assert(cursor && "Reached root without finding a job");
|
||||
job = cursor->try_as<ast::job_t>();
|
||||
}
|
||||
assert(job && "Should have found the job");
|
||||
|
||||
// Check our pipeline position.
|
||||
pipeline_position_t pipe_pos;
|
||||
if (job->continuation.empty()) {
|
||||
pipe_pos = pipeline_position_t::none;
|
||||
} else if (&job->statement == st) {
|
||||
pipe_pos = pipeline_position_t::first;
|
||||
} else {
|
||||
pipe_pos = pipeline_position_t::subsequent;
|
||||
}
|
||||
|
||||
// Check that we don't try to pipe through exec.
|
||||
bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
|
||||
if (is_in_pipeline && decoration == parse_statement_decoration_exec) {
|
||||
errored = append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, L"exec");
|
||||
}
|
||||
@ -1124,14 +1103,14 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
|
||||
if (pipe_pos == pipeline_position_t::subsequent) {
|
||||
// check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
|
||||
// commands.
|
||||
wcstring command = pst.child<0>().get_source(buff_src);
|
||||
wcstring command = dst.command.source(buff_src);
|
||||
if (command == L"and" || command == L"or") {
|
||||
errored =
|
||||
append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, command.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (maybe_t<wcstring> unexp_command = command_for_plain_statement(pst, buff_src)) {
|
||||
if (maybe_t<wcstring> unexp_command = dst.command.try_source(buff_src)) {
|
||||
wcstring command;
|
||||
// Check that we can expand the command.
|
||||
if (expand_to_command_and_args(*unexp_command, operation_context_t::empty(), &command,
|
||||
@ -1148,40 +1127,40 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
|
||||
|
||||
// Check that we don't return from outside a function. But we allow it if it's
|
||||
// 'return --help'.
|
||||
if (!errored && command == L"return") {
|
||||
if (!errored && command == L"return" && !first_arg_is_help) {
|
||||
// See if we are in a function.
|
||||
bool found_function = false;
|
||||
for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
|
||||
ancestor = node_tree.get_parent(*ancestor)) {
|
||||
auto fh = tnode_t<block_statement>::try_create(&node_tree, ancestor)
|
||||
.child<0>()
|
||||
.try_get_child<function_header, 0>();
|
||||
if (fh) {
|
||||
found_function = true;
|
||||
break;
|
||||
for (const node_t *cursor = &dst; cursor != nullptr; cursor = cursor->parent) {
|
||||
if (const auto *bs = cursor->try_as<block_statement_t>()) {
|
||||
if (bs->header->type == type_t::function_header) {
|
||||
found_function = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found_function && !first_argument_is_help(pst, buff_src)) {
|
||||
|
||||
if (!found_function) {
|
||||
errored = append_syntax_error(parse_errors, source_start, INVALID_RETURN_ERR_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that we don't break or continue from outside a loop.
|
||||
if (!errored && (command == L"break" || command == L"continue")) {
|
||||
if (!errored && (command == L"break" || command == L"continue") && !first_arg_is_help) {
|
||||
// Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
|
||||
// stop the search; we can't break an outer loop from inside a function.
|
||||
// This is a little funny because we can't tell if it's a 'for' or 'while'
|
||||
// loop from the ancestor alone; we need the header. That is, we hit a
|
||||
// block_statement, and have to check its header.
|
||||
bool found_loop = false;
|
||||
for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
|
||||
ancestor = node_tree.get_parent(*ancestor)) {
|
||||
tnode_t<block_header> bh =
|
||||
tnode_t<block_statement>::try_create(&node_tree, ancestor).child<0>();
|
||||
if (bh.try_get_child<while_header, 0>() || bh.try_get_child<for_header, 0>()) {
|
||||
for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) {
|
||||
const auto *block = ancestor->try_as<block_statement_t>();
|
||||
if (!block) continue;
|
||||
if (block->header->type == type_t::for_header ||
|
||||
block->header->type == type_t::while_header) {
|
||||
// This is a loop header, so we can break or continue.
|
||||
found_loop = true;
|
||||
break;
|
||||
} else if (bh.try_get_child<function_header, 0>()) {
|
||||
} else if (block->header->type == type_t::function_header) {
|
||||
// This is a function header, so we cannot break or
|
||||
// continue. We stop our search here.
|
||||
found_loop = false;
|
||||
@ -1189,7 +1168,7 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_loop && !first_argument_is_help(pst, buff_src)) {
|
||||
if (!found_loop) {
|
||||
errored = append_syntax_error(
|
||||
parse_errors, source_start,
|
||||
(command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG));
|
||||
@ -1208,12 +1187,21 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
|
||||
return errored;
|
||||
}
|
||||
|
||||
// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that
|
||||
// there are no arguments in the list.
|
||||
static bool detect_errors_in_block_redirection_list(
|
||||
const ast::argument_or_redirection_list_t &args_or_redirs, parse_error_list_t *out_errors) {
|
||||
if (const auto *first_arg = get_first_arg(args_or_redirs)) {
|
||||
return append_syntax_error(out_errors, first_arg->source_range().start,
|
||||
BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
||||
parse_error_list_t *out_errors,
|
||||
bool allow_incomplete,
|
||||
parsed_source_ref_t *out_pstree) {
|
||||
namespace g = grammar;
|
||||
parse_node_tree_t node_tree;
|
||||
parse_error_list_t parse_errors;
|
||||
|
||||
parser_test_error_bits_t res = 0;
|
||||
@ -1233,12 +1221,15 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
||||
// allow_incomplete is set.
|
||||
bool has_unclosed_quote_or_subshell = false;
|
||||
|
||||
// Parse the input string into a parse tree. Some errors are detected here.
|
||||
bool parsed = parse_tree_from_string(
|
||||
buff_src, allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none, &node_tree,
|
||||
&parse_errors);
|
||||
const parse_tree_flags_t parse_flags =
|
||||
allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none;
|
||||
|
||||
// Parse the input string into an ast. Some errors are detected here.
|
||||
using namespace ast;
|
||||
auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors);
|
||||
if (allow_incomplete) {
|
||||
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
|
||||
// successfully.
|
||||
size_t idx = parse_errors.size();
|
||||
while (idx--) {
|
||||
if (parse_errors.at(idx).code == parse_error_tokenizer_unterminated_quote ||
|
||||
@ -1250,19 +1241,14 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
||||
}
|
||||
}
|
||||
|
||||
// Issue #1238: If the only error was unterminated quote, then consider this to have parsed
|
||||
// successfully. A better fix would be to have parse_tree_from_string return this information
|
||||
// directly (but it would be a shame to munge up its nice bool return).
|
||||
if (parse_errors.empty() && has_unclosed_quote_or_subshell) {
|
||||
parsed = true;
|
||||
}
|
||||
|
||||
if (!parsed) {
|
||||
errored = true;
|
||||
}
|
||||
|
||||
// has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
|
||||
assert(!has_unclosed_quote_or_subshell || allow_incomplete);
|
||||
if (has_unclosed_quote_or_subshell) {
|
||||
// We do not bother to validate the rest of the tree in this case.
|
||||
return PARSER_TEST_INCOMPLETE;
|
||||
}
|
||||
|
||||
errored = !parse_errors.empty();
|
||||
|
||||
// Expand all commands.
|
||||
// Verify 'or' and 'and' not used inside pipelines.
|
||||
@ -1271,21 +1257,17 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
||||
// Verify no variable expansions.
|
||||
|
||||
if (!errored) {
|
||||
for (const parse_node_t &node : node_tree) {
|
||||
if (node.type == symbol_end_command && !node.has_source()) {
|
||||
// An 'end' without source is an unclosed block.
|
||||
has_unclosed_block = true;
|
||||
} else if (node.type == symbol_statement && !node.has_source()) {
|
||||
// Check for a statement without source in a pipeline, i.e. unterminated pipeline.
|
||||
auto pipe_pos = get_pipeline_position({&node_tree, &node});
|
||||
if (pipe_pos != pipeline_position_t::none) {
|
||||
for (const node_t &node : ast) {
|
||||
if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) {
|
||||
// Somewhat clumsy way of checking for a statement without source in a pipeline.
|
||||
// See if our pipe has source but our statement does not.
|
||||
if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) {
|
||||
has_unclosed_pipe = true;
|
||||
}
|
||||
} else if (node.type == symbol_argument) {
|
||||
tnode_t<g::argument> arg{&node_tree, &node};
|
||||
const wcstring arg_src = node.get_source(buff_src);
|
||||
res |= parse_util_detect_errors_in_argument(arg, arg_src, &parse_errors);
|
||||
} else if (node.type == symbol_job) {
|
||||
} else if (const argument_t *arg = node.try_as<argument_t>()) {
|
||||
wcstring arg_src = arg->source(buff_src);
|
||||
res |= parse_util_detect_errors_in_argument(*arg, arg_src, &parse_errors);
|
||||
} else if (const ast::job_t *job = node.try_as<ast::job_t>()) {
|
||||
// Disallow background in the following cases:
|
||||
//
|
||||
// foo & ; and bar
|
||||
@ -1293,25 +1275,27 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
||||
// if foo & ; end
|
||||
// while foo & ; end
|
||||
// If it's not a background job, nothing to do.
|
||||
auto job = tnode_t<g::job>{&node_tree, &node};
|
||||
if (job_node_is_background(job)) {
|
||||
errored |= detect_errors_in_backgrounded_job(job, &parse_errors);
|
||||
if (job->bg) {
|
||||
errored |= detect_errors_in_backgrounded_job(*job, &parse_errors);
|
||||
}
|
||||
} else if (node.type == symbol_arguments_or_redirections_list) {
|
||||
// verify no arguments to the end command of if, switch, begin (#986).
|
||||
auto list = tnode_t<g::arguments_or_redirections_list>{&node_tree, &node};
|
||||
if (list.try_get_parent<g::if_statement>() ||
|
||||
list.try_get_parent<g::switch_statement>() ||
|
||||
list.try_get_parent<g::block_statement>()) {
|
||||
if (auto arg = list.next_in_list<g::argument>()) {
|
||||
errored = append_syntax_error(&parse_errors, arg.source_range()->start,
|
||||
END_ARG_ERR_MSG);
|
||||
}
|
||||
}
|
||||
} else if (node.type == symbol_plain_statement) {
|
||||
tnode_t<grammar::plain_statement> pst{&node_tree, &node};
|
||||
} else if (const ast::decorated_statement_t *stmt =
|
||||
node.try_as<decorated_statement_t>()) {
|
||||
errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &parse_errors);
|
||||
} else if (const auto *block = node.try_as<block_statement_t>()) {
|
||||
// If our 'end' had no source, we are unsourced.
|
||||
if (block->end.unsourced) has_unclosed_block = true;
|
||||
errored |=
|
||||
detect_errors_in_plain_statement(buff_src, node_tree, pst, &parse_errors);
|
||||
detect_errors_in_block_redirection_list(block->args_or_redirs, &parse_errors);
|
||||
} else if (const auto *ifs = node.try_as<if_statement_t>()) {
|
||||
// If our 'end' had no source, we are unsourced.
|
||||
if (ifs->end.unsourced) has_unclosed_block = true;
|
||||
errored |=
|
||||
detect_errors_in_block_redirection_list(ifs->args_or_redirs, &parse_errors);
|
||||
} else if (const auto *switchs = node.try_as<switch_statement_t>()) {
|
||||
// If our 'end' had no source, we are unsourced.
|
||||
if (switchs->end.unsourced) has_unclosed_block = true;
|
||||
errored |=
|
||||
detect_errors_in_block_redirection_list(switchs->args_or_redirs, &parse_errors);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1325,8 +1309,9 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
||||
*out_errors = std::move(parse_errors);
|
||||
}
|
||||
|
||||
// \return the ast to our caller if requested.
|
||||
if (out_pstree != nullptr) {
|
||||
*out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(node_tree));
|
||||
*out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(ast));
|
||||
}
|
||||
|
||||
return res;
|
||||
@ -1341,25 +1326,21 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
|
||||
false /* don't skip caret */);
|
||||
};
|
||||
|
||||
// Parse the string as an argument list.
|
||||
// Parse the string as a freestanding argument list.
|
||||
using namespace ast;
|
||||
parse_error_list_t errors;
|
||||
parse_node_tree_t tree;
|
||||
if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, &errors,
|
||||
symbol_freestanding_argument_list)) {
|
||||
// Failed to parse.
|
||||
auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors);
|
||||
if (!errors.empty()) {
|
||||
return get_error_text(errors);
|
||||
}
|
||||
|
||||
// Get the root argument list and extract arguments from it.
|
||||
// Test each of these.
|
||||
assert(!tree.empty() && "Should have parsed a tree");
|
||||
tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
|
||||
while (auto arg = arg_list.next_in_list<grammar::argument>()) {
|
||||
const wcstring arg_src = arg.get_source(arg_list_src);
|
||||
for (const argument_t &arg : ast.top()->as<freestanding_argument_list_t>()->arguments) {
|
||||
const wcstring arg_src = arg.source(arg_list_src);
|
||||
if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) {
|
||||
return get_error_text(errors);
|
||||
}
|
||||
}
|
||||
|
||||
return none();
|
||||
}
|
||||
|
@ -10,6 +10,10 @@
|
||||
#include "parse_tree.h"
|
||||
#include "tokenizer.h"
|
||||
|
||||
namespace ast {
|
||||
struct argument_t;
|
||||
}
|
||||
|
||||
/// Find the beginning and end of the first subshell in the specified string.
|
||||
///
|
||||
/// \param in the string to search for subshells
|
||||
@ -127,7 +131,6 @@ std::vector<int> parse_util_compute_indents(const wcstring &src);
|
||||
/// incomplete (e.g. an unclosed quote), an error is not returned and the PARSER_TEST_INCOMPLETE bit
|
||||
/// is set in the return value. If allow_incomplete is not set, then incomplete strings result in an
|
||||
/// error. If out_pstree is not NULL, the resulting tree is returned by reference.
|
||||
class parse_node_tree_t;
|
||||
parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
|
||||
parse_error_list_t *out_errors = nullptr,
|
||||
bool allow_incomplete = true,
|
||||
@ -141,10 +144,9 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
|
||||
/// Test if this argument contains any errors. Detected errors include syntax errors in command
|
||||
/// substitutions, improperly escaped characters and improper use of the variable expansion
|
||||
/// operator. This does NOT currently detect unterminated quotes.
|
||||
class parse_node_t;
|
||||
|
||||
parser_test_error_bits_t parse_util_detect_errors_in_argument(
|
||||
tnode_t<grammar::argument> node, const wcstring &arg_src,
|
||||
parse_error_list_t *out_errors = nullptr);
|
||||
const ast::argument_t &arg, const wcstring &arg_src, parse_error_list_t *out_errors = nullptr);
|
||||
|
||||
/// Given a string containing a variable expansion error, append an appropriate error to the errors
|
||||
/// list. The global_token_pos is the offset of the token in the larger source, and the dollar_pos
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "ast.h"
|
||||
#include "common.h"
|
||||
#include "env.h"
|
||||
#include "event.h"
|
||||
@ -25,7 +26,6 @@
|
||||
#include "proc.h"
|
||||
#include "reader.h"
|
||||
#include "sanity.h"
|
||||
#include "tnode.h"
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
|
||||
class io_chain_t;
|
||||
@ -328,19 +328,18 @@ completion_list_t parser_t::expand_argument_list(const wcstring &arg_list_src,
|
||||
expand_flags_t eflags,
|
||||
const operation_context_t &ctx) {
|
||||
// Parse the string as an argument list.
|
||||
parse_node_tree_t tree;
|
||||
if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, nullptr /* errors */,
|
||||
symbol_freestanding_argument_list)) {
|
||||
auto ast = ast::ast_t::parse_argument_list(arg_list_src);
|
||||
if (ast.errored()) {
|
||||
// Failed to parse. Here we expect to have reported any errors in test_args.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Get the root argument list and extract arguments from it.
|
||||
completion_list_t result;
|
||||
assert(!tree.empty());
|
||||
tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
|
||||
while (auto arg = arg_list.next_in_list<grammar::argument>()) {
|
||||
const wcstring arg_src = arg.get_source(arg_list_src);
|
||||
const ast::freestanding_argument_list_t *list =
|
||||
ast.top()->as<ast::freestanding_argument_list_t>();
|
||||
for (const ast::argument_t &arg : list->arguments) {
|
||||
wcstring arg_src = arg.source(arg_list_src);
|
||||
if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) {
|
||||
break; // failed to expand a string
|
||||
}
|
||||
@ -656,10 +655,10 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io,
|
||||
eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
|
||||
const job_group_ref_t &job_group, enum block_type_t block_type) {
|
||||
assert(block_type == block_type_t::top || block_type == block_type_t::subst);
|
||||
if (!ps->tree.empty()) {
|
||||
// Execute the first node.
|
||||
tnode_t<grammar::job_list> start{&ps->tree, &ps->tree.front()};
|
||||
return this->eval_node(ps, start, io, job_group, block_type);
|
||||
const auto *job_list = ps->ast->top()->as<ast::job_list_t>();
|
||||
if (!job_list->empty()) {
|
||||
// Execute the top job list.
|
||||
return this->eval_node(ps, *job_list, io, job_group, block_type);
|
||||
} else {
|
||||
auto status = proc_status_t::from_exit_code(get_last_status());
|
||||
bool break_expand = false;
|
||||
@ -669,11 +668,11 @@ eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
|
||||
eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, const T &node,
|
||||
const io_chain_t &block_io, const job_group_ref_t &job_group,
|
||||
block_type_t block_type) {
|
||||
static_assert(
|
||||
std::is_same<T, grammar::statement>::value || std::is_same<T, grammar::job_list>::value,
|
||||
std::is_same<T, ast::statement_t>::value || std::is_same<T, ast::job_list_t>::value,
|
||||
"Unexpected node type");
|
||||
// Handle cancellation requests. If our block stack is currently empty, then we already did
|
||||
// successfully cancel (or there was nothing to cancel); clear the flag. If our block stack is
|
||||
@ -725,9 +724,9 @@ eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
|
||||
}
|
||||
|
||||
// Explicit instantiations. TODO: use overloads instead?
|
||||
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::statement>,
|
||||
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::statement_t &,
|
||||
const io_chain_t &, const job_group_ref_t &, block_type_t);
|
||||
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::job_list>,
|
||||
template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::job_list_t &,
|
||||
const io_chain_t &, const job_group_ref_t &, block_type_t);
|
||||
|
||||
void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors,
|
||||
|
@ -300,9 +300,9 @@ class parser_t : public std::enable_shared_from_this<parser_t> {
|
||||
block_type_t block_type = block_type_t::top);
|
||||
|
||||
/// Evaluates a node.
|
||||
/// The node type must be grammar::statement or grammar::job_list.
|
||||
/// The node type must be ast_t::statement_t or ast::job_list_t.
|
||||
template <typename T>
|
||||
eval_res_t eval_node(const parsed_source_ref_t &ps, tnode_t<T> node, const io_chain_t &block_io,
|
||||
eval_res_t eval_node(const parsed_source_ref_t &ps, const T &node, const io_chain_t &block_io,
|
||||
const job_group_ref_t &job_group,
|
||||
block_type_t block_type = block_type_t::top);
|
||||
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include "global_safety.h"
|
||||
#include "io.h"
|
||||
#include "parse_tree.h"
|
||||
#include "tnode.h"
|
||||
#include "topic_monitor.h"
|
||||
|
||||
/// Types of processes.
|
||||
@ -44,6 +43,10 @@ enum class job_control_t {
|
||||
none,
|
||||
};
|
||||
|
||||
namespace ast {
|
||||
struct statement_t;
|
||||
}
|
||||
|
||||
/// A proc_status_t is a value type that encapsulates logic around exited vs stopped vs signaled,
|
||||
/// etc.
|
||||
class proc_status_t {
|
||||
@ -261,10 +264,10 @@ class process_t {
|
||||
/// Type of process.
|
||||
process_type_t type{process_type_t::external};
|
||||
|
||||
/// For internal block processes only, the node offset of the statement.
|
||||
/// For internal block processes only, the node of the statement.
|
||||
/// This is always either block, ifs, or switchs, never boolean or decorated.
|
||||
parsed_source_ref_t block_node_source{};
|
||||
tnode_t<grammar::statement> internal_block_node{};
|
||||
const ast::statement_t *internal_block_node{};
|
||||
|
||||
struct concrete_assignment {
|
||||
wcstring variable_name;
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <set>
|
||||
#include <stack>
|
||||
|
||||
#include "ast.h"
|
||||
#include "color.h"
|
||||
#include "common.h"
|
||||
#include "complete.h"
|
||||
@ -74,7 +75,6 @@
|
||||
#include "screen.h"
|
||||
#include "signal.h"
|
||||
#include "termsize.h"
|
||||
#include "tnode.h"
|
||||
#include "tokenizer.h"
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
|
||||
@ -935,33 +935,29 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
|
||||
const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset;
|
||||
|
||||
// Parse this subcmd.
|
||||
parse_node_tree_t parse_tree;
|
||||
parse_tree_from_string(subcmd,
|
||||
parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
|
||||
&parse_tree, nullptr);
|
||||
using namespace ast;
|
||||
auto ast =
|
||||
ast_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens |
|
||||
parse_flag_leave_unterminated);
|
||||
|
||||
// Look for plain statements where the cursor is at the end of the command.
|
||||
using namespace grammar;
|
||||
tnode_t<tok_string> matching_cmd_node;
|
||||
for (const parse_node_t &node : parse_tree) {
|
||||
// Only interested in plain statements with source.
|
||||
if (node.type != symbol_plain_statement || !node.has_source()) continue;
|
||||
const ast::string_t *matching_cmd_node = nullptr;
|
||||
for (const node_t &n : ast) {
|
||||
const decorated_statement_t *stmt = n.try_as<decorated_statement_t>();
|
||||
if (!stmt) continue;
|
||||
|
||||
// Get the command node. Skip it if we can't or it has no source.
|
||||
tnode_t<plain_statement> statement(&parse_tree, &node);
|
||||
tnode_t<tok_string> cmd_node = statement.child<0>();
|
||||
// Skip if we have a decoration.
|
||||
if (stmt->opt_decoration) continue;
|
||||
|
||||
// Skip decorated statements.
|
||||
if (get_decoration(statement) != parse_statement_decoration_none) continue;
|
||||
|
||||
auto msource = cmd_node.source_range();
|
||||
// See if the command's source range range contains our cursor, including at the end.
|
||||
auto msource = stmt->command.try_source_range();
|
||||
if (!msource) continue;
|
||||
|
||||
// Now see if its source range contains our cursor, including at the end.
|
||||
if (subcmd_cursor_pos >= msource->start &&
|
||||
subcmd_cursor_pos <= msource->start + msource->length) {
|
||||
// Success!
|
||||
matching_cmd_node = cmd_node;
|
||||
matching_cmd_node = &stmt->command;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -969,11 +965,12 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
|
||||
// Now if we found a command node, expand it.
|
||||
maybe_t<edit_t> result{};
|
||||
if (matching_cmd_node) {
|
||||
const wcstring token = matching_cmd_node.get_source(subcmd);
|
||||
assert(!matching_cmd_node->unsourced && "Should not be unsourced");
|
||||
const wcstring token = matching_cmd_node->source(subcmd);
|
||||
if (auto abbreviation = expand_abbreviation(token, vars)) {
|
||||
// There was an abbreviation! Replace the token in the full command. Maintain the
|
||||
// relative position of the cursor.
|
||||
source_range_t r = *matching_cmd_node.source_range();
|
||||
source_range_t r = matching_cmd_node->source_range();
|
||||
result = edit_t(subcmd_offset + r.start, r.length, std::move(*abbreviation));
|
||||
}
|
||||
}
|
||||
|
152
src/tnode.cpp
152
src/tnode.cpp
@ -1,152 +0,0 @@
|
||||
#include "tnode.h"
|
||||
|
||||
const parse_node_t *parse_node_tree_t::next_node_in_node_list(
|
||||
const parse_node_t &node_list, parse_token_type_t entry_type,
|
||||
const parse_node_t **out_list_tail) const {
|
||||
parse_token_type_t list_type = node_list.type;
|
||||
|
||||
// Paranoia - it doesn't make sense for a list type to contain itself.
|
||||
assert(list_type != entry_type);
|
||||
|
||||
const parse_node_t *list_cursor = &node_list;
|
||||
const parse_node_t *list_entry = nullptr;
|
||||
|
||||
// Loop while we don't have an item but do have a list. Note that some nodes may contain
|
||||
// nothing; e.g. job_list contains blank lines as a production.
|
||||
while (list_entry == nullptr && list_cursor != nullptr) {
|
||||
const parse_node_t *next_cursor = nullptr;
|
||||
|
||||
// Walk through the children.
|
||||
for (node_offset_t i = 0; i < list_cursor->child_count; i++) {
|
||||
const parse_node_t *child = this->get_child(*list_cursor, i);
|
||||
if (child->type == entry_type) {
|
||||
// This is the list entry.
|
||||
list_entry = child;
|
||||
} else if (child->type == list_type) {
|
||||
// This is the next in the list.
|
||||
next_cursor = child;
|
||||
}
|
||||
}
|
||||
// Go to the next entry, even if it's NULL.
|
||||
list_cursor = next_cursor;
|
||||
}
|
||||
|
||||
// Return what we got.
|
||||
assert(list_cursor == nullptr || list_cursor->type == list_type);
|
||||
assert(list_entry == nullptr || list_entry->type == entry_type);
|
||||
if (out_list_tail != nullptr) *out_list_tail = list_cursor;
|
||||
return list_entry;
|
||||
}
|
||||
|
||||
enum parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt) {
|
||||
parse_statement_decoration_t decoration = parse_statement_decoration_none;
|
||||
if (auto decorated_statement = stmt.try_get_parent<grammar::decorated_statement>()) {
|
||||
decoration = static_cast<parse_statement_decoration_t>(decorated_statement.tag());
|
||||
}
|
||||
return decoration;
|
||||
}
|
||||
|
||||
enum parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt) {
|
||||
return static_cast<parse_job_decoration_t>(stmt.tag());
|
||||
}
|
||||
|
||||
enum parse_job_decoration_t bool_statement_type(
|
||||
tnode_t<grammar::job_conjunction_continuation> cont) {
|
||||
return static_cast<parse_job_decoration_t>(cont.tag());
|
||||
}
|
||||
|
||||
maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
|
||||
const wcstring &src, wcstring *out_target) {
|
||||
assert(redirection && "redirection is missing");
|
||||
tnode_t<grammar::tok_redirection> prim = redirection.child<0>(); // like 2>
|
||||
assert(prim && "expected to have primitive");
|
||||
|
||||
maybe_t<pipe_or_redir_t> result{};
|
||||
if (prim.has_source()) {
|
||||
result = pipe_or_redir_t::from_string(prim.get_source(src));
|
||||
assert(result.has_value() && "Failed to parse valid redirection");
|
||||
assert(!result->is_pipe && "Should not be a pipe");
|
||||
}
|
||||
if (out_target != nullptr) {
|
||||
tnode_t<grammar::tok_string> target = redirection.child<1>(); // like 1 or file path
|
||||
*out_target = target.has_source() ? target.get_source(src) : wcstring();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<tnode_t<grammar::comment>> parse_node_tree_t::comment_nodes_for_node(
|
||||
const parse_node_t &parent) const {
|
||||
std::vector<tnode_t<grammar::comment>> result;
|
||||
if (parent.has_comments()) {
|
||||
// Walk all our nodes, looking for comment nodes that have the given node as a parent.
|
||||
for (size_t i = 0; i < this->size(); i++) {
|
||||
const parse_node_t &potential_comment = this->at(i);
|
||||
if (potential_comment.type == parse_special_type_comment &&
|
||||
this->get_parent(potential_comment) == &parent) {
|
||||
result.emplace_back(this, &potential_comment);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
variable_assignment_node_list_t get_variable_assignment_nodes(
|
||||
tnode_t<grammar::variable_assignments> list, size_t max) {
|
||||
return list.descendants<grammar::variable_assignment>(max);
|
||||
}
|
||||
|
||||
maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
|
||||
const wcstring &src) {
|
||||
tnode_t<grammar::tok_string> cmd = stmt.child<0>();
|
||||
if (cmd && cmd.has_source()) {
|
||||
return cmd.get_source(src);
|
||||
}
|
||||
return none();
|
||||
}
|
||||
|
||||
arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list> list, size_t max) {
|
||||
return list.descendants<grammar::argument>(max);
|
||||
}
|
||||
|
||||
arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list> list,
|
||||
size_t max) {
|
||||
return list.descendants<grammar::argument>(max);
|
||||
}
|
||||
|
||||
bool job_node_is_background(tnode_t<grammar::job> job) {
|
||||
tnode_t<grammar::optional_background> bg = job.child<4>();
|
||||
return bg.tag() == parse_background;
|
||||
}
|
||||
|
||||
parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction> conj) {
|
||||
using namespace grammar;
|
||||
tnode_t<job_decorator> dec;
|
||||
// We have two possible parents: job_list and andor_job_list.
|
||||
if (auto p = conj.try_get_parent<job_list>()) {
|
||||
dec = p.require_get_child<job_decorator, 0>();
|
||||
} else if (auto p = conj.try_get_parent<andor_job_list>()) {
|
||||
dec = p.require_get_child<job_decorator, 0>();
|
||||
}
|
||||
// note this returns 0 (none) if dec is empty.
|
||||
return bool_statement_type(dec);
|
||||
}
|
||||
|
||||
pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st) {
|
||||
using namespace grammar;
|
||||
if (!st) {
|
||||
return pipeline_position_t::none;
|
||||
}
|
||||
|
||||
// If we're part of a job continuation, we're definitely in a pipeline.
|
||||
if (st.try_get_parent<job_continuation>()) {
|
||||
return pipeline_position_t::subsequent;
|
||||
}
|
||||
|
||||
// Check if we're the beginning of a job, and if so, whether that job
|
||||
// has a non-empty continuation.
|
||||
tnode_t<job_continuation> jc = st.try_get_parent<job>().child<3>();
|
||||
if (jc.try_get_child<statement, 3>()) {
|
||||
return pipeline_position_t::first;
|
||||
}
|
||||
return pipeline_position_t::none;
|
||||
}
|
278
src/tnode.h
278
src/tnode.h
@ -1,278 +0,0 @@
|
||||
// Type-safe access to fish parse trees.
|
||||
#ifndef FISH_TNODE_H
|
||||
#define FISH_TNODE_H
|
||||
|
||||
#include "parse_grammar.h"
|
||||
#include "parse_tree.h"
|
||||
|
||||
// Check if a child type is possible for a parent type at a given index.
|
||||
template <typename Parent, typename Child, size_t Index>
|
||||
constexpr bool child_type_possible_at_index() {
|
||||
return Parent::template type_possible<Child, Index>();
|
||||
}
|
||||
|
||||
// Check if a child type is possible for a parent type at any index.
|
||||
// The number of cases here should match MAX_PRODUCTION_LENGTH.
|
||||
template <typename Parent, typename Child>
|
||||
constexpr bool child_type_possible() {
|
||||
return child_type_possible_at_index<Parent, Child, 0>() ||
|
||||
child_type_possible_at_index<Parent, Child, 1>() ||
|
||||
child_type_possible_at_index<Parent, Child, 2>() ||
|
||||
child_type_possible_at_index<Parent, Child, 3>() ||
|
||||
child_type_possible_at_index<Parent, Child, 4>() ||
|
||||
child_type_possible_at_index<Parent, Child, 5>();
|
||||
}
|
||||
|
||||
/// tnode_t ("typed node") is type-safe access to a parse_tree. A tnode_t holds both a pointer to a
|
||||
/// parse_node_tree_t and a pointer to a parse_node_t. (Note that the parse_node_tree_t is unowned;
|
||||
/// the caller must ensure that the tnode does not outlive the tree.
|
||||
///
|
||||
/// tnode_t is a lightweight value-type class. It ought to be passed by value. A tnode_t may also be
|
||||
/// "missing", associated with a null parse_node_t pointer. operator bool() may be used to check if
|
||||
/// a tnode_t is misisng.
|
||||
///
|
||||
/// A tnode_t is parametrized by a grammar element, and uses the fish grammar to statically
|
||||
/// type-check accesses to children and parents. Any particular tnode either corresponds to a
|
||||
/// sequence (a single child) or an alternation (multiple possible children). A sequence may have
|
||||
/// its children accessed directly via child(), which is templated on the index (and returns a
|
||||
/// tnode of the proper type). Alternations may be disambiguated via try_get_child(), which returns
|
||||
/// an empty child if the child has the wrong type, or require_get_child() which aborts if the child
|
||||
/// has the wrong type.
|
||||
template <typename Type>
|
||||
class tnode_t {
|
||||
/// The tree containing our node.
|
||||
const parse_node_tree_t *tree = nullptr;
|
||||
|
||||
/// The node in the tree
|
||||
const parse_node_t *nodeptr = nullptr;
|
||||
|
||||
// Helper to get a child type at a given index.
|
||||
template <class Element, uint32_t Index>
|
||||
using child_at = typename std::tuple_element<Index, typename Element::type_tuple>::type;
|
||||
|
||||
public:
|
||||
tnode_t() = default;
|
||||
|
||||
tnode_t(const parse_node_tree_t *t, const parse_node_t *n) : tree(t), nodeptr(n) {
|
||||
assert(t && "tree cannot be null in this constructor");
|
||||
assert((!n || n->type == Type::token) && "node has wrong type");
|
||||
}
|
||||
|
||||
// Try to create a tnode from the given tree and parse node.
|
||||
// Returns an empty node if the parse node is null, or has the wrong type.
|
||||
static tnode_t try_create(const parse_node_tree_t *tree, const parse_node_t *node) {
|
||||
assert(tree && "tree cannot be null");
|
||||
return tnode_t(tree, node && node->type == Type::token ? node : nullptr);
|
||||
}
|
||||
|
||||
/// Temporary conversion to parse_node_t to assist in migration.
|
||||
/* implicit */ operator const parse_node_t &() const {
|
||||
assert(nodeptr && "Empty tnode_t");
|
||||
return *nodeptr;
|
||||
}
|
||||
|
||||
/* implicit */ operator const parse_node_t *() const { return nodeptr; }
|
||||
|
||||
/// \return the underlying (type-erased) node.
|
||||
const parse_node_t *node() const { return nodeptr; }
|
||||
|
||||
/// Check whether we're populated.
|
||||
explicit operator bool() const { return nodeptr != nullptr; }
|
||||
|
||||
bool operator==(const tnode_t &rhs) const { return tree == rhs.tree && nodeptr == rhs.nodeptr; }
|
||||
|
||||
bool operator!=(const tnode_t &rhs) const { return !(*this == rhs); }
|
||||
|
||||
// Helper to return whether the given tree is the same as ours.
|
||||
bool matches_node_tree(const parse_node_tree_t &t) const { return &t == tree; }
|
||||
|
||||
const parse_node_tree_t *get_tree() const { return tree; }
|
||||
|
||||
bool has_source() const { return nodeptr && nodeptr->has_source(); }
|
||||
|
||||
// return the tag, or 0 if missing.
|
||||
parse_node_tag_t tag() const { return nodeptr ? nodeptr->tag : 0; }
|
||||
|
||||
// return the number of children, or 0 if missing.
|
||||
uint8_t child_count() const { return nodeptr ? nodeptr->child_count : 0; }
|
||||
|
||||
maybe_t<source_range_t> source_range() const {
|
||||
if (!nodeptr || nodeptr->source_start == NODE_OFFSET_INVALID) return none();
|
||||
return source_range_t{nodeptr->source_start, nodeptr->source_length};
|
||||
}
|
||||
|
||||
wcstring get_source(const wcstring &str) const {
|
||||
if (!nodeptr) {
|
||||
return L"";
|
||||
}
|
||||
return nodeptr->get_source(str);
|
||||
}
|
||||
|
||||
bool location_in_or_at_end_of_source_range(size_t loc) const {
|
||||
return nodeptr && nodeptr->location_in_or_at_end_of_source_range(loc);
|
||||
}
|
||||
|
||||
static tnode_t find_node_matching_source_location(const parse_node_tree_t *tree,
|
||||
size_t source_loc,
|
||||
const parse_node_t *parent) {
|
||||
assert(tree && "null tree");
|
||||
return tnode_t{tree,
|
||||
tree->find_node_matching_source_location(Type::token, source_loc, parent)};
|
||||
}
|
||||
|
||||
/// Type-safe access to a child at the given index.
|
||||
template <node_offset_t Index>
|
||||
tnode_t<child_at<Type, Index>> child() const {
|
||||
using child_type = child_at<Type, Index>;
|
||||
const parse_node_t *child = nullptr;
|
||||
if (nodeptr) child = tree->get_child(*nodeptr, Index, child_type::token);
|
||||
return tnode_t<child_type>{tree, child};
|
||||
}
|
||||
|
||||
/// Return a parse_node_t for a child.
|
||||
/// This is used to disambiguate alts.
|
||||
template <node_offset_t Index>
|
||||
const parse_node_t &get_child_node() const {
|
||||
assert(nodeptr && "receiver is missing in get_child_node");
|
||||
return *tree->get_child(*nodeptr, Index);
|
||||
}
|
||||
|
||||
/// If the child at the given index has the given type, return it; otherwise return an empty
|
||||
/// child. Note this will refuse to compile if the child type is not possible.
|
||||
/// This is used for e.g. alternations.
|
||||
template <class ChildType, node_offset_t Index>
|
||||
tnode_t<ChildType> try_get_child() const {
|
||||
static_assert(child_type_possible_at_index<Type, ChildType, Index>(),
|
||||
"Cannot contain a child of this type");
|
||||
const parse_node_t *child = nullptr;
|
||||
if (nodeptr) child = tree->get_child(*nodeptr, Index);
|
||||
if (child && child->type == ChildType::token) return {tree, child};
|
||||
return {tree, nullptr};
|
||||
}
|
||||
|
||||
/// assert that this is not empty and that the child at index Index has the given type, then
|
||||
/// return that child. Note this will refuse to compile if the child type is not possible.
|
||||
template <class ChildType, node_offset_t Index>
|
||||
tnode_t<ChildType> require_get_child() const {
|
||||
assert(nodeptr && "receiver is missing in require_get_child()");
|
||||
auto result = try_get_child<ChildType, Index>();
|
||||
assert(result && "require_get_child(): wrong child type");
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Find the first direct child of the given node of the given type. asserts on failure.
|
||||
template <class ChildType>
|
||||
tnode_t<ChildType> find_child() const {
|
||||
static_assert(child_type_possible<Type, ChildType>(), "Cannot have that type as a child");
|
||||
assert(nodeptr && "receiver is missing in find_child()");
|
||||
tnode_t<ChildType> result{tree, &tree->find_child(*nodeptr, ChildType::token)};
|
||||
assert(result && "cannot find child");
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Type-safe access to a node's parent.
|
||||
/// If the parent exists and has type ParentType, return it.
|
||||
/// Otherwise return a missing tnode.
|
||||
template <class ParentType>
|
||||
tnode_t<ParentType> try_get_parent() const {
|
||||
static_assert(child_type_possible<ParentType, Type>(), "Parent cannot have us as a child");
|
||||
if (!nodeptr) return {};
|
||||
return {tree, tree->get_parent(*nodeptr, ParentType::token)};
|
||||
}
|
||||
|
||||
/// Finds all descendants (up to max_count) under this node of the given type.
|
||||
template <typename DescendantType>
|
||||
std::vector<tnode_t<DescendantType>> descendants(size_t max_count = -1) const {
|
||||
if (!nodeptr) return {};
|
||||
std::vector<tnode_t<DescendantType>> result;
|
||||
std::vector<const parse_node_t *> stack{nodeptr};
|
||||
while (!stack.empty() && result.size() < max_count) {
|
||||
const parse_node_t *node = stack.back();
|
||||
if (node->type == DescendantType::token) result.emplace_back(tree, node);
|
||||
stack.pop_back();
|
||||
node_offset_t index = node->child_count;
|
||||
while (index--) {
|
||||
stack.push_back(tree->get_child(*node, index));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Given that we are a list type, \return the next node of some Item in some node list,
|
||||
/// adjusting 'this' to be the remainder of the list.
|
||||
/// Returns an empty item on failure.
|
||||
template <class ItemType>
|
||||
tnode_t<ItemType> next_in_list() {
|
||||
// We require that we can contain ourselves, and ItemType as well.
|
||||
static_assert(child_type_possible<Type, Type>(), "Is not a list");
|
||||
static_assert(child_type_possible<Type, ItemType>(), "Is not a list of that type");
|
||||
if (!nodeptr) return {tree, nullptr};
|
||||
const parse_node_t *next =
|
||||
tree->next_node_in_node_list(*nodeptr, ItemType::token, &nodeptr);
|
||||
return {tree, next};
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
tnode_t<Type> parse_node_tree_t::find_child(const parse_node_t &parent) const {
|
||||
return tnode_t<Type>(this, &this->find_child(parent, Type::token));
|
||||
}
|
||||
|
||||
/// Return the arguments under an arguments_list or arguments_or_redirection_list
|
||||
/// Do not return more than max.
|
||||
using variable_assignment_node_list_t = std::vector<tnode_t<grammar::variable_assignment>>;
|
||||
variable_assignment_node_list_t get_variable_assignment_nodes(
|
||||
tnode_t<grammar::variable_assignments>, size_t max = -1);
|
||||
|
||||
/// Given a plain statement, get the command from the child node. Returns the command string on
|
||||
/// success, none on failure.
|
||||
maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
|
||||
const wcstring &src);
|
||||
|
||||
/// Return the decoration for a plain statement.
|
||||
parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt);
|
||||
|
||||
/// Return the type for a boolean statement.
|
||||
parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt);
|
||||
|
||||
parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_conjunction_continuation> cont);
|
||||
|
||||
/// Given a redirection node, get the parsed redirection and target of the redirection (file path,
|
||||
/// or fd).
|
||||
maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
|
||||
const wcstring &src, wcstring *out_target);
|
||||
|
||||
/// Return the arguments under an arguments_list or arguments_or_redirection_list
|
||||
/// Do not return more than max.
|
||||
using arguments_node_list_t = std::vector<tnode_t<grammar::argument>>;
|
||||
arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list>, size_t max = -1);
|
||||
arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list>,
|
||||
size_t max = -1);
|
||||
|
||||
/// Return whether the given job is background because it has a & symbol.
|
||||
bool job_node_is_background(tnode_t<grammar::job>);
|
||||
|
||||
/// If the conjunction is has a decorator (and/or), return it; otherwise return none. This only
|
||||
/// considers the leading conjunction, e.g. in `and true || false` only the 'true' conjunction will
|
||||
/// return 'and'.
|
||||
parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction>);
|
||||
|
||||
/// Return whether the statement is part of a pipeline.
|
||||
/// This doesn't detect e.g. pipelines involving our parent's block statements.
|
||||
enum class pipeline_position_t {
|
||||
none, // not part of a pipeline
|
||||
first, // first command in a pipeline
|
||||
subsequent // second or further command in a pipeline
|
||||
};
|
||||
pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st);
|
||||
|
||||
/// Check whether an argument_list is a root list.
|
||||
inline bool argument_list_is_root(tnode_t<grammar::argument_list> list) {
|
||||
return !list.try_get_parent<grammar::argument_list>();
|
||||
}
|
||||
|
||||
inline bool argument_list_is_root(tnode_t<grammar::arguments_or_redirections_list> list) {
|
||||
return !list.try_get_parent<grammar::arguments_or_redirections_list>();
|
||||
}
|
||||
|
||||
#endif
|
@ -49,7 +49,7 @@ end' | $fish_indent
|
||||
#CHECK: c
|
||||
#CHECK: echo thing
|
||||
#CHECK: end
|
||||
|
||||
|
||||
echo 'echo foo |
|
||||
echo banana' | $fish_indent
|
||||
#CHECK: echo foo |
|
||||
@ -57,12 +57,11 @@ echo banana' | $fish_indent
|
||||
|
||||
echo 'echo foo \\
|
||||
;' | $fish_indent
|
||||
#CHECK: echo foo \
|
||||
#CHECK:
|
||||
#CHECK: echo foo
|
||||
|
||||
echo 'echo foo \\
|
||||
' | $fish_indent
|
||||
#CHECK: echo foo \
|
||||
#CHECK: echo foo
|
||||
|
||||
echo -n '
|
||||
begin
|
||||
@ -201,9 +200,9 @@ end; echo alpha "
|
||||
#CHECK: begin
|
||||
#CHECK: {{ }}echo hi
|
||||
#CHECK: else
|
||||
#CHECK:
|
||||
#CHECK: {{^}}echo bye
|
||||
#CHECK: end
|
||||
#CHECK: echo alpha "
|
||||
#CHECK: end; echo alpha "
|
||||
|
||||
# issue 1665
|
||||
echo -n '
|
||||
@ -285,7 +284,7 @@ echo bye
|
||||
#CHECK:
|
||||
#CHECK: echo hi |
|
||||
#CHECK:
|
||||
#CHECK: echo bye
|
||||
#CHECK: {{ }}echo bye
|
||||
|
||||
echo 'a;;;;;;' | $fish_indent
|
||||
#CHECK: a
|
||||
|
Loading…
x
Reference in New Issue
Block a user