Merge branch 'parser_cleanup_3'

This merges a sequence of changes which eliminates the "parse tree" construct and replaces it with a new abstract syntax tree implementation. This is simpler and easier to understand/use.
2025-02-21 11:01:26 +08:00 · 2020-07-04 15:06:41 -07:00 · 2020-07-04 15:06:41 -07:00 · 44944146e2
commit 44944146e2
parent e530163078 0c22f67bde
33 changed files with 4242 additions and 4072 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -114,14 +114,14 @@ set(FISH_SRCS
    src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp
    src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/intern.cpp
    src/io.cpp src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
-    src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp
+    src/parse_execution.cpp src/parse_tree.cpp
    src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp
    src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp
-    src/signal.cpp src/tinyexpr.cpp src/tnode.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
+    src/signal.cpp src/tinyexpr.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp
    src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp
    src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp
    src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp
-    src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp
+    src/operation_context.cpp src/fd_monitor.cpp src/termsize.cpp src/ast.cpp
 )

 # Header files are just globbed.
--- a/src/ast.cpp
+++ b/src/ast.cpp
--- a/src/ast.h
+++ b/src/ast.h
--- a/src/ast_node_types.inc
+++ b/src/ast_node_types.inc
@ -0,0 +1,60 @@
+// Define ELEM and optionally ELEMLIST before including this file.
+// ELEM is for ordinary nodes.
+// ELEMLIST(x, y) marks list nodes and the type they contain.
+#ifndef ELEMLIST
+#define ELEMLIST(x, y) ELEM(x)
+#endif
+
+ELEM(keyword_base)
+ELEM(token_base)
+ELEM(maybe_newlines)
+
+ELEM(argument)
+ELEMLIST(argument_list, argument)
+
+ELEM(redirection)
+ELEM(argument_or_redirection)
+ELEMLIST(argument_or_redirection_list, argument_or_redirection)
+
+ELEM(variable_assignment)
+ELEMLIST(variable_assignment_list, variable_assignment)
+
+ELEM(job)
+ELEM(job_conjunction)
+// For historical reasons, a job list is a list of job *conjunctions*. This should be fixed.
+ELEMLIST(job_list, job_conjunction)
+ELEM(job_conjunction_continuation)
+ELEMLIST(job_conjunction_continuation_list, job_conjunction_continuation)
+
+ELEM(job_continuation)
+ELEMLIST(job_continuation_list, job_continuation)
+
+ELEM(andor_job)
+ELEMLIST(andor_job_list, andor_job)
+
+ELEM(statement)
+
+ELEM(not_statement)
+
+ELEM(block_statement)
+ELEM(for_header)
+ELEM(while_header)
+ELEM(function_header)
+ELEM(begin_header)
+
+ELEM(if_statement)
+ELEM(if_clause)
+ELEM(elseif_clause)
+ELEMLIST(elseif_clause_list, elseif_clause)
+ELEM(else_clause)
+
+ELEM(switch_statement)
+ELEM(case_item)
+ELEMLIST(case_item_list, case_item)
+
+ELEM(decorated_statement)
+
+ELEM(freestanding_argument_list)
+
+#undef ELEM
+#undef ELEMLIST
--- a/src/builtin_function.cpp
+++ b/src/builtin_function.cpp
@ -200,8 +200,7 @@ static int validate_function_name(int argc, const wchar_t *const *argv, wcstring
 /// Define a function. Calls into `function.cpp` to perform the heavy lifting of defining a
 /// function.
 int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
-                     const parsed_source_ref_t &source,
-                     tnode_t<grammar::block_statement> func_node) {
+                     const parsed_source_ref_t &source, const ast::block_statement_t &func_node) {
    assert(source && "Missing source in builtin_function");
    // The wgetopt function expects 'function' as the first argument. Make a new wcstring_list with
    // that property. This is needed because this builtin has a different signature than the other
@ -252,7 +251,7 @@ int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_lis
    props->shadow_scope = opts.shadow_scope;
    props->named_arguments = std::move(opts.named_arguments);
    props->parsed_source = source;
-    props->func_node = func_node;
+    props->func_node = &func_node;

    // Populate inherit_vars.
    for (const wcstring &name : opts.inherit_vars) {
--- a/src/builtin_function.h
+++ b/src/builtin_function.h
@ -8,7 +8,10 @@
 class parser_t;
 struct io_streams_t;

+namespace ast {
+struct block_statement_t;
+}
+
 int builtin_function(parser_t &parser, io_streams_t &streams, const wcstring_list_t &c_args,
-                     const parsed_source_ref_t &source,
-                     tnode_t<grammar::block_statement> func_node);
+                     const parsed_source_ref_t &source, const ast::block_statement_t &func_node);
 #endif
--- a/src/complete.cpp
+++ b/src/complete.cpp
@ -45,7 +45,6 @@
 #include "path.h"
 #include "proc.h"
 #include "reader.h"
-#include "tnode.h"
 #include "util.h"
 #include "wcstringutil.h"
 #include "wildcard.h"
--- a/src/exec.cpp
+++ b/src/exec.cpp
@ -623,10 +623,10 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,

    if (p->type == process_type_t::block_node) {
        const parsed_source_ref_t &source = p->block_node_source;
-        tnode_t<grammar::statement> node = p->internal_block_node;
+        const ast::statement_t *node = p->internal_block_node;
        assert(source && node && "Process is missing node info");
        return [=](parser_t &parser) {
-            return parser.eval_node(source, node, io_chain, job_group).status;
+            return parser.eval_node(source, *node, io_chain, job_group).status;
        };
    } else {
        assert(p->type == process_type_t::function);
@ -638,7 +638,7 @@ static proc_performer_t get_performer_for_process(process_t *p, job_t *job,
        auto argv = move_to_sharedptr(p->get_argv_array().to_list());
        return [=](parser_t &parser) {
            // Pull out the job list from the function.
-            tnode_t<grammar::job_list> body = props->func_node.child<1>();
+            const ast::job_list_t &body = props->func_node->jobs;
            const block_t *fb = function_prepare_environment(parser, *argv, *props);
            auto res = parser.eval_node(props->parsed_source, body, io_chain, job_group);
            function_restore_environment(parser, fb);
--- a/src/fish_indent.cpp
+++ b/src/fish_indent.cpp
@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include <tuple>
 #include <vector>

+#include "ast.h"
 #include "color.h"
 #include "common.h"
 #include "env.h"
@ -43,16 +44,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 #include "operation_context.h"
 #include "output.h"
 #include "parse_constants.h"
+#include "parse_util.h"
 #include "print_help.h"
-#include "tnode.h"
 #include "wutil.h"  // IWYU pragma: keep

 // The number of spaces per indent isn't supposed to be configurable.
 // See discussion at https://github.com/fish-shell/fish-shell/pull/6790
 #define SPACES_PER_INDENT 4

-// An indent_t represents an abstract indent depth. 2 means we are in a doubly-nested block, etc.
-using indent_t = unsigned int;
 static bool dump_parse_tree = false;
 static int ret = 0;

@ -83,232 +82,539 @@ static wcstring read_file(FILE *f) {
    return result;
 }

-struct prettifier_t {
+namespace {
+/// From C++14.
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+
+/// \return the number of escaping backslashes before a character.
+/// \p idx may be "one past the end."
+size_t count_preceding_backslashes(const wcstring &text, size_t idx) {
+    assert(idx <= text.size() && "Out of bounds");
+    size_t backslashes = 0;
+    while (backslashes < idx && text.at(idx - backslashes - 1) == L'\\') {
+        backslashes++;
+    }
+    return backslashes;
+}
+
+/// \return whether a character at a given index is escaped.
+/// A character is escaped if it has an odd number of backslashes.
+bool char_is_escaped(const wcstring &text, size_t idx) {
+    return count_preceding_backslashes(text, idx) % 2 == 1;
+}
+
+using namespace ast;
+struct pretty_printer_t {
+    // Note: this got somewhat more complicated after introducing the new AST, because that AST no
+    // longer encodes detailed lexical information (e.g. every newline). This feels more complex
+    // than necessary and would probably benefit from a more layered approach where we identify
+    // certain runs, weight line breaks, have a cost model, etc.
+    pretty_printer_t(const wcstring &src, bool do_indent)
+        : source(src),
+          indents(do_indent ? parse_util_compute_indents(source) : std::vector<int>(src.size(), 0)),
+          ast(ast_t::parse(src, parse_flags())),
+          do_indent(do_indent),
+          gaps(compute_gaps()),
+          preferred_semi_locations(compute_preferred_semi_locations()) {
+        assert(indents.size() == source.size() && "indents and source should be same length");
+    }
+
    // Original source.
    const wcstring &source;

+    // The indents of our string.
+    // This has the same length as 'source' and describes the indentation level.
+    const std::vector<int> indents;
+
+    // The parsed ast.
+    const ast_t ast;
+
    // The prettifier output.
    wcstring output;

+    // The indent of the source range which we are currently emitting.
+    int current_indent{0};
+
    // Whether to indent, or just insert spaces.
    const bool do_indent;

-    // Whether we are at the beginning of a new line.
-    bool has_new_line = true;
+    // Whether the next gap text should hide the first newline.
+    bool gap_text_mask_newline{false};

-    // Whether the last token was a semicolon.
-    bool last_was_semicolon = false;
+    // The "gaps": a sorted set of ranges between tokens.
+    // These contain whitespace, comments, semicolons, and other lexical elements which are not
+    // present in the ast.
+    const std::vector<source_range_t> gaps;

-    // Whether we need to append a continuation new line before continuing.
-    bool needs_continuation_newline = false;
+    // The sorted set of source offsets of nl_semi_t which should be set as semis, not newlines.
+    // This is computed ahead of time for convenience.
+    const std::vector<uint32_t> preferred_semi_locations;

-    // Additional indentation due to line continuation (escaped newline)
-    uint32_t line_continuation_indent = 0;
+    // Flags we support.
+    using gap_flags_t = uint32_t;
+    enum {
+        default_flags = 0,

-    prettifier_t(const wcstring &source, bool do_indent) : source(source), do_indent(do_indent) {}
+        // Whether to allow line splitting via escaped newlines.
+        // For example, in argument lists:
+        //
+        //   echo a \
+        //   b
+        //
+        // If this is not set, then split-lines will be joined.
+        allow_escaped_newlines = 1 << 0,

-    void prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx, indent_t node_indent,
-                       parse_token_type_t parent_type);
+        // Whether to require a space before this token.
+        // This is used when emitting semis:
+        //    echo a; echo b;
+        // No space required between 'a' and ';', or 'b' and ';'.
+        skip_space = 1 << 1,
+    };

-    void maybe_prepend_escaped_newline(const parse_node_t &node) {
-        if (node.has_preceding_escaped_newline()) {
-            output.append(L" \\");
-            append_newline(true);
+    // \return gap text flags for the gap text that comes *before* a given node type.
+    static gap_flags_t gap_text_flags_before_node(const node_t &node) {
+        gap_flags_t result = default_flags;
+        switch (node.type) {
+            // Allow escaped newlines in argument and redirection lists.
+            case type_t::argument:
+            case type_t::redirection:
+                result |= allow_escaped_newlines;
+                break;
+
+            case type_t::token_base:
+                // Allow escaped newlines before && and ||, and also pipes.
+                switch (node.as<token_base_t>()->type) {
+                    case parse_token_type_andand:
+                    case parse_token_type_oror:
+                    case parse_token_type_pipe:
+                        result |= allow_escaped_newlines;
+                        break;
+                    default:
+                        break;
+                }
+                break;
+
+            default:
+                break;
+        }
+        return result;
+    }
+
+    // \return whether we are at the start of a new line.
+    bool at_line_start() const { return output.empty() || output.back() == L'\n'; }
+
+    // \return whether we have a space before the output.
+    // This ignores escaped spaces and escaped newlines.
+    bool has_preceding_space() const {
+        long idx = static_cast<long>(output.size()) - 1;
+        // Skip escaped newlines.
+        // This is historical. Example:
+        //
+        // cmd1 \
+        // | cmd2
+        //
+        // we want the pipe to "see" the space after cmd1.
+        // TODO: this is too tricky, we should factor this better.
+        while (idx >= 0 && output.at(idx) == L'\n') {
+            size_t backslashes = count_preceding_backslashes(source, idx);
+            if (backslashes % 2 == 0) {
+                // Not escaped.
+                return false;
+            }
+            idx -= (1 + backslashes);
+        }
+        return idx >= 0 && output.at(idx) == L' ' && !char_is_escaped(output, idx);
+    }
+
+    // Entry point. Prettify our source code and return it.
+    wcstring prettify() {
+        output = wcstring{};
+        node_visitor(*this).accept(ast.top());
+
+        // Trailing gap text.
+        emit_gap_text_before(source_range_t{(uint32_t)source.size(), 0}, default_flags);
+
+        // Replace all trailing newlines with just a single one.
+        while (!output.empty() && at_line_start()) {
+            output.pop_back();
+        }
+        emit_newline();
+
+        wcstring result = std::move(output);
+        return result;
+    }
+
+    // \return a substring of source.
+    wcstring substr(source_range_t r) const { return source.substr(r.start, r.length); }
+
+    // Return the gap ranges from our ast.
+    std::vector<source_range_t> compute_gaps() const {
+        auto range_compare = [](source_range_t r1, source_range_t r2) {
+            if (r1.start != r2.start) return r1.start < r2.start;
+            return r1.length < r2.length;
+        };
+        // Collect the token ranges into a list.
+        std::vector<source_range_t> tok_ranges;
+        for (const node_t &node : ast) {
+            if (node.category == category_t::leaf) {
+                auto r = node.source_range();
+                if (r.length > 0) tok_ranges.push_back(r);
+            }
+        }
+        // Place a zero length range at end to aid in our inverting.
+        tok_ranges.push_back(source_range_t{(uint32_t)source.size(), 0});
+
+        // Our tokens should be sorted.
+        assert(std::is_sorted(tok_ranges.begin(), tok_ranges.end(), range_compare));
+
+        // For each range, add a gap range between the previous range and this range.
+        std::vector<source_range_t> gaps;
+        uint32_t prev_end = 0;
+        for (source_range_t tok_range : tok_ranges) {
+            assert(tok_range.start >= prev_end &&
+                   "Token range should not overlap or be out of order");
+            if (tok_range.start >= prev_end) {
+                gaps.push_back(source_range_t{prev_end, tok_range.start - prev_end});
+            }
+            prev_end = tok_range.start + tok_range.length;
+        }
+        return gaps;
+    }
+
+    // Return sorted list of semi-preferring semi_nl nodes.
+    std::vector<uint32_t> compute_preferred_semi_locations() const {
+        std::vector<uint32_t> result;
+        auto mark_as_semi = [&result](const optional_t<semi_nl_t> &n) {
+            if (n && n->has_source()) result.push_back(n->range.start);
+        };
+
+        // andor_job_lists get semis if they are short enough.
+        for (const auto &node : ast) {
+            // See if we have a condition and an andor_job_list.
+            const optional_t<semi_nl_t> *condition = nullptr;
+            const andor_job_list_t *andors = nullptr;
+            if (const auto *ifc = node.try_as<if_clause_t>()) {
+                condition = &ifc->condition.semi_nl;
+                andors = &ifc->andor_tail;
+            } else if (const auto *wc = node.try_as<while_header_t>()) {
+                condition = &wc->condition.semi_nl;
+                andors = &wc->andor_tail;
+            }
+
+            // This describes the heuristic of when to place and_or job lists on separate lines.
+            // That is, do we want:
+            //    if true; and false
+            //  or do we want:
+            //    if true
+            //        and false
+            // Lists with two or fewer get semis.
+            // Note the effective count is then three, because this list does not include the main
+            // condition.
+            if (andors && andors->count() > 0 && andors->count() <= 2) {
+                if (condition) mark_as_semi(*condition);
+                // Mark all but last of the andor list.
+                for (uint32_t i = 0; i + 1 < andors->count(); i++) {
+                    mark_as_semi(andors->at(i)->job.semi_nl);
+                }
+            }
+        }
+
+        // `x ; and y` gets semis if it has them already, and they are on the same line.
+        for (const auto &node : ast) {
+            if (const auto *job_list = node.try_as<job_list_t>()) {
+                const semi_nl_t *prev_job_semi_nl = nullptr;
+                for (const job_conjunction_t &job : *job_list) {
+                    // Set up prev_job_semi_nl for the next iteration to make control flow easier.
+                    const semi_nl_t *prev = prev_job_semi_nl;
+                    prev_job_semi_nl = job.semi_nl.contents.get();
+
+                    // Is this an 'and' or 'or' job?
+                    if (!job.decorator) continue;
+
+                    // Now see if we want to mark 'prev' as allowing a semi.
+                    // Did we have a previous semi_nl which was a newline?
+                    if (!prev || substr(prev->range) != L";") continue;
+
+                    // Is there a newline between them?
+                    assert(prev->range.start <= job.decorator->range.start &&
+                           "Ranges out of order");
+                    auto start = source.begin() + prev->range.start;
+                    auto end = source.begin() + job.decorator->range.end();
+                    if (std::find(start, end, L'\n') == end) {
+                        // We're going to allow the previous semi_nl to be a semi.
+                        result.push_back(prev->range.start);
+                    }
+                }
+            }
+        }
+        std::sort(result.begin(), result.end());
+        return result;
+    }
+
+    // Emit a space or indent as necessary, depending on the previous output.
+    void emit_space_or_indent(gap_flags_t flags = default_flags) {
+        if (at_line_start()) {
+            output.append(SPACES_PER_INDENT * current_indent, L' ');
+        } else if (!(flags & skip_space) && !has_preceding_space()) {
+            output.append(1, L' ');
        }
    }

-    void append_newline(bool is_continuation = false) {
-        output.push_back('\n');
-        has_new_line = true;
-        needs_continuation_newline = false;
-        line_continuation_indent = is_continuation ? 1 : 0;
+    // Emit "gap text:" newlines and comments from the original source.
+    // Gap text may be a few things:
+    //
+    // 1. Just a space is common. We will trim the spaces to be empty.
+    //
+    // Here the gap text is the comment, followed by the newline:
+    //
+    //    echo abc # arg
+    //    echo def
+    //
+    // 2. It may also be an escaped newline:
+    // Here the gap text is a space, backslash, newline, space.
+    //
+    //     echo \
+    //       hi
+    //
+    // 3. Lastly it may be an error, if there was an error token. Here the gap text is the pipe:
+    //
+    //   begin | stuff
+    //
+    //  We do not handle errors here - instead our caller does.
+    void emit_gap_text(const wcstring &gap_text, gap_flags_t flags) {
+        // Common case: if we are only spaces, do nothing.
+        if (gap_text.find_first_not_of(L' ') == wcstring::npos) return;
+
+        // Look to see if there is an escaped newline.
+        // Emit it if either we allow it, or it comes before the first comment.
+        // Note we do not have to be concerned with escaped backslashes or escaped #s. This is gap
+        // text - we already know it has no semantic significance.
+        size_t escaped_nl = gap_text.find(L"\\\n");
+        if (escaped_nl != wcstring::npos) {
+            size_t comment_idx = gap_text.find(L'#');
+            if ((flags & allow_escaped_newlines) ||
+                (comment_idx != wcstring::npos && escaped_nl < comment_idx)) {
+                // Emit a space before the escaped newline.
+                if (!at_line_start() && !has_preceding_space()) {
+                    output.append(L" ");
+                }
+                output.append(L"\\\n");
+            }
+        }
+
+        // It seems somewhat ambiguous whether we always get a newline after a comment. Ensure we
+        // always emit one.
+        bool needs_nl = false;
+
+        tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES);
+        while (maybe_t<tok_t> tok = tokenizer.next()) {
+            wcstring tok_text = tokenizer.text_of(*tok);
+
+            if (needs_nl) {
+                emit_newline();
+                needs_nl = false;
+                if (tok_text == L"\n") continue;
+            } else if (gap_text_mask_newline) {
+                // We only respect mask_newline the first time through the loop.
+                gap_text_mask_newline = false;
+                if (tok_text == L"\n") continue;
+            }
+
+            if (tok->type == token_type_t::comment) {
+                emit_space_or_indent();
+                output.append(tok_text);
+                needs_nl = true;
+            } else if (tok->type == token_type_t::end) {
+                // This may be either a newline or semicolon.
+                // Semicolons found here are not part of the ast and can simply be removed.
+                // Newlines are preserved unless mask_newline is set.
+                if (tok_text == L"\n") {
+                    emit_newline();
+                }
+            } else {
+                fprintf(stderr,
+                        "Gap text should only have comments and newlines - instead found token "
+                        "type %d with text: %ls\n",
+                        (int)tok->type, tok_text.c_str());
+                DIE("Gap text should only have comments and newlines");
+            }
+        }
+        if (needs_nl) emit_newline();
    }

-    // Append whitespace as necessary. If we have a newline, append the appropriate indent.
-    // Otherwise, append a space.
-    void append_whitespace(indent_t node_indent) {
-        if (needs_continuation_newline) {
-            append_newline(true);
+    /// \return the gap text ending at a given index into the string, or empty if none.
+    source_range_t gap_text_to(uint32_t end) const {
+        auto where = std::lower_bound(
+            gaps.begin(), gaps.end(), end,
+            [](source_range_t r, uint32_t end) { return r.start + r.length < end; });
+        if (where == gaps.end() || where->start + where->length != end) {
+            // Not found.
+            return source_range_t{0, 0};
+        } else {
+            return *where;
        }
-        if (!has_new_line) {
-            output.push_back(L' ');
-        } else if (do_indent) {
-            output.append((node_indent + line_continuation_indent) * SPACES_PER_INDENT, L' ');
+    }
+
+    /// \return whether a range \p r overlaps an error range from our ast.
+    bool range_contained_error(source_range_t r) const {
+        const auto &errs = ast.extras().errors;
+        auto range_is_before = [](source_range_t x, source_range_t y) {
+            return x.start + x.length <= y.start;
+        };
+        assert(std::is_sorted(errs.begin(), errs.end(), range_is_before) &&
+               "Error ranges should be sorted");
+        return std::binary_search(errs.begin(), errs.end(), r, range_is_before);
+    }
+
+    // Emit the gap text before a source range.
+    void emit_gap_text_before(source_range_t r, gap_flags_t flags) {
+        assert(r.start <= source.size() && "source out of bounds");
+        uint32_t start = r.start;
+        if (start < indents.size()) current_indent = indents.at(start);
+
+        // Find the gap text which ends at start.
+        source_range_t range = gap_text_to(start);
+        if (range.length > 0) {
+            // If this range contained an error, append the gap text without modification.
+            // For example in: echo foo "
+            // We don't want to mess with the quote.
+            if (range_contained_error(range)) {
+                output.append(substr(range));
+            } else {
+                emit_gap_text(substr(range), flags);
+            }
        }
+        // Always clear gap_text_mask_newline after emitting even empty gap text.
+        gap_text_mask_newline = false;
+    }
+
+    /// Given a string \p input, remove unnecessary quotes, etc.
+    wcstring clean_text(const wcstring &input) {
+        // Unescape the string - this leaves special markers around if there are any
+        // expansions or anything. We specifically tell it to not compute backslash-escapes
+        // like \U or \x, because we want to leave them intact.
+        wcstring unescaped = input;
+        unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
+
+        // Remove INTERNAL_SEPARATOR because that's a quote.
+        auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
+        unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote), unescaped.end());
+
+        // If no non-"good" char is left, use the unescaped version.
+        // This can be extended to other characters, but giving the precise list is tough,
+        // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
+        // people feel more at ease.
+        auto goodchars = [](wchar_t ch) {
+            return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
+        };
+        if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) == unescaped.end() &&
+            !unescaped.empty()) {
+            return unescaped;
+        } else {
+            return input;
+        }
+    }
+
+    // Emit a range of original text. This indents as needed, and also inserts preceding gap text.
+    // If \p tolerate_line_splitting is set, then permit escaped newlines; otherwise collapse such
+    // lines.
+    void emit_text(source_range_t r, gap_flags_t flags) {
+        emit_gap_text_before(r, flags);
+        current_indent = indents.at(r.start);
+        if (r.length > 0) {
+            emit_space_or_indent(flags);
+            output.append(clean_text(substr(r)));
+        }
+    }
+
+    template <type_t Type>
+    void emit_node_text(const leaf_t<Type> &node) {
+        emit_text(node.range, gap_text_flags_before_node(node));
+    }
+
+    // Emit one newline.
+    void emit_newline() { output.push_back(L'\n'); }
+
+    // Emit a semicolon.
+    void emit_semi() { output.push_back(L';'); }
+
+    // For branch and list nodes, default is to visit their children.
+    template <typename Node>
+    enable_if_t<Node::Category == category_t::branch> visit(const Node &node) {
+        node_visitor(*this).accept_children_of(node);
+    }
+
+    template <typename Node>
+    enable_if_t<Node::Category == ast::category_t::list> visit(const Node &node) {
+        node_visitor(*this).accept_children_of(node);
+    }
+
+    // Leaf nodes we just visit their text.
+    void visit(const keyword_base_t &node) { emit_node_text(node); }
+    void visit(const token_base_t &node) { emit_node_text(node); }
+    void visit(const argument_t &node) { emit_node_text(node); }
+    void visit(const variable_assignment_t &node) { emit_node_text(node); }
+
+    void visit(const semi_nl_t &node) {
+        // These are semicolons or newlines which are part of the ast. That means it includes e.g.
+        // ones terminating a job or 'if' header, but not random semis in job lists. We respect
+        // preferred_semi_locations to decide whether or not these should stay as newlines or
+        // become semicolons.
+
+        // Check if we should prefer a semicolon.
+        bool prefer_semi = node.range.length > 0 &&
+                           std::binary_search(preferred_semi_locations.begin(),
+                                              preferred_semi_locations.end(), node.range.start);
+        emit_gap_text_before(node.range, gap_text_flags_before_node(node));
+
+        // Don't emit anything if the gap text put us on a newline (because it had a comment).
+        if (!at_line_start()) {
+            prefer_semi ? emit_semi() : emit_newline();
+
+            // If it was a semi but we emitted a newline, swallow a subsequent newline.
+            if (!prefer_semi && substr(node.range) == L";") {
+                gap_text_mask_newline = true;
+            }
+        }
+    }
+
+    void visit(const redirection_t &node) {
+        // No space between a redirection operator and its target (#2899).
+        emit_text(node.oper.range, default_flags);
+        emit_text(node.target.range, skip_space);
+    }
+
+    void visit(const maybe_newlines_t &node) {
+        // Our newlines may have comments embedded in them, example:
+        //    cmd |
+        //    # something
+        //    cmd2
+        // Treat it as gap text.
+        if (node.range.length > 0) {
+            auto flags = gap_text_flags_before_node(node);
+            current_indent = indents.at(node.range.start);
+            emit_gap_text_before(node.range, flags);
+            wcstring text = source.substr(node.range.start, node.range.length);
+            emit_gap_text(text, flags);
+        }
+    }
+
+    void visit(const begin_header_t &node) {
+        // 'begin' does not require a newline after it, but we insert one.
+        node_visitor(*this).accept_children_of(node);
+        if (!at_line_start()) {
+            emit_newline();
+        }
+    }
+
+    // The flags we use to parse.
+    static parse_tree_flags_t parse_flags() {
+        return parse_flag_continue_after_error | parse_flag_include_comments |
+               parse_flag_leave_unterminated | parse_flag_show_blank_lines;
    }
 };
-
-// Dump a parse tree node in a form helpful to someone debugging the behavior of this program.
-static void dump_node(indent_t node_indent, const parse_node_t &node, const wcstring &source) {
-    wchar_t nextc = L' ';
-    wchar_t prevc = L' ';
-    wcstring source_txt;
-    if (node.source_start != SOURCE_OFFSET_INVALID && node.source_length != SOURCE_OFFSET_INVALID) {
-        int nextc_idx = node.source_start + node.source_length;
-        if (static_cast<size_t>(nextc_idx) < source.size()) {
-            nextc = source[node.source_start + node.source_length];
-        }
-        if (node.source_start > 0) prevc = source[node.source_start - 1];
-        source_txt = source.substr(node.source_start, node.source_length);
-    }
-    wchar_t prevc_str[4] = {prevc, 0, 0, 0};
-    wchar_t nextc_str[4] = {nextc, 0, 0, 0};
-    if (prevc < L' ') {
-        prevc_str[0] = L'\\';
-        prevc_str[1] = L'c';
-        prevc_str[2] = prevc + '@';
-    }
-    if (nextc < L' ') {
-        nextc_str[0] = L'\\';
-        nextc_str[1] = L'c';
-        nextc_str[2] = nextc + '@';
-    }
-    std::fwprintf(stderr, L"{off %4u, len %4u, indent %2u, kw %ls, %ls} [%ls|%ls|%ls]\n",
-                  node.source_start, node.source_length, node_indent,
-                  keyword_description(node.keyword), token_type_description(node.type), prevc_str,
-                  source_txt.c_str(), nextc_str);
-}
-
-void prettifier_t::prettify_node(const parse_node_tree_t &tree, node_offset_t node_idx,
-                                 indent_t node_indent, parse_token_type_t parent_type) {
-    // Use an explicit stack to avoid stack overflow.
-    struct pending_node_t {
-        node_offset_t index;
-        indent_t indent;
-        parse_token_type_t parent_type;
-    };
-    std::stack<pending_node_t> pending_node_stack;
-
-    pending_node_stack.push({node_idx, node_indent, parent_type});
-    while (!pending_node_stack.empty()) {
-        pending_node_t args = pending_node_stack.top();
-        pending_node_stack.pop();
-        auto node_idx = args.index;
-        auto node_indent = args.indent;
-        auto parent_type = args.parent_type;
-
-        const parse_node_t &node = tree.at(node_idx);
-        const parse_token_type_t node_type = node.type;
-        const parse_token_type_t prev_node_type =
-            node_idx > 0 ? tree.at(node_idx - 1).type : token_type_invalid;
-
-        // Increment the indent if we are either a root job_list, or root case_item_list, or in an
-        // if or while header (#1665).
-        const bool is_root_job_list =
-            node_type == symbol_job_list && parent_type != symbol_job_list;
-        const bool is_root_case_list =
-            node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
-        const bool is_if_while_header =
-            (node_type == symbol_job_conjunction || node_type == symbol_andor_job_list) &&
-            (parent_type == symbol_if_clause || parent_type == symbol_while_header);
-
-        if (is_root_job_list || is_root_case_list || is_if_while_header) {
-            node_indent += 1;
-        }
-
-        if (dump_parse_tree) dump_node(node_indent, node, source);
-
-        // Prepend any escaped newline, but only for certain cases.
-        // We allow it to split arguments (including at the end - this is like trailing commas in
-        // lists, makes for better diffs), to separate pipelines (but it has to be *before* the
-        // pipe, so the pipe symbol is the first thing on the new line after the indent) and to
-        // separate &&/|| job lists (`and` and `or` are handled separately below, as they *allow*
-        // semicolons)
-        // TODO: Handle
-        //     foo | \
-        //         bar
-        // so it just removes the escape - pipes don't need it. This was changed in some fish
-        // version, figure out which it was and if it is worth supporting.
-        if (prev_node_type == symbol_arguments_or_redirections_list ||
-            prev_node_type == symbol_argument_list || node_type == parse_token_type_andand ||
-            node_type == parse_token_type_pipe || node_type == parse_token_type_end) {
-            maybe_prepend_escaped_newline(node);
-        }
-
-        // handle comments, which come before the text
-        if (node.has_comments()) {
-            auto comment_nodes = tree.comment_nodes_for_node(node);
-            for (const auto &comment : comment_nodes) {
-                maybe_prepend_escaped_newline(*comment.node());
-                append_whitespace(node_indent);
-                auto source_range = comment.source_range();
-                output.append(source, source_range->start, source_range->length);
-                needs_continuation_newline = true;
-            }
-        }
-
-        if (node_type == parse_token_type_end) {
-            // For historical reasons, semicolon also get "TOK_END".
-            // We need to distinguish between them, because otherwise `a;;;;` gets extra lines
-            // instead of the semicolons. Semicolons are just ignored, unless they are followed by a
-            // command. So `echo;` removes the semicolon, but `echo; echo` removes it and adds a
-            // newline.
-            last_was_semicolon = false;
-            if (node.get_source(source) == L"\n") {
-                append_newline();
-            } else if (!has_new_line) {
-                // The semicolon is only useful if we haven't just had a newline.
-                last_was_semicolon = true;
-            }
-        } else if ((node_type >= FIRST_PARSE_TOKEN_TYPE && node_type <= LAST_PARSE_TOKEN_TYPE) ||
-                   node_type == parse_special_type_parse_error) {
-            if (last_was_semicolon) {
-                // We keep the semicolon for `; and` and `; or`,
-                // others we turn into newlines.
-                if (node.keyword != parse_keyword_t::kw_and &&
-                    node.keyword != parse_keyword_t::kw_or) {
-                    append_newline();
-                } else {
-                    output.push_back(L';');
-                }
-                last_was_semicolon = false;
-            }
-
-            if (node.has_source()) {
-                // Some type representing a particular token.
-                if (prev_node_type != parse_token_type_redirection) {
-                    append_whitespace(node_indent);
-                }
-                wcstring unescaped{source, node.source_start, node.source_length};
-                // Unescape the string - this leaves special markers around if there are any
-                // expansions or anything. We specifically tell it to not compute backslash-escapes
-                // like \U or \x, because we want to leave them intact.
-                unescape_string_in_place(&unescaped, UNESCAPE_SPECIAL | UNESCAPE_NO_BACKSLASHES);
-
-                // Remove INTERNAL_SEPARATOR because that's a quote.
-                auto quote = [](wchar_t ch) { return ch == INTERNAL_SEPARATOR; };
-                unescaped.erase(std::remove_if(unescaped.begin(), unescaped.end(), quote),
-                                unescaped.end());
-
-                // If no non-"good" char is left, use the unescaped version.
-                // This can be extended to other characters, but giving the precise list is tough,
-                // can change over time (see "^", "%" and "?", in some cases "{}") and it just makes
-                // people feel more at ease.
-                auto goodchars = [](wchar_t ch) {
-                    return fish_iswalnum(ch) || ch == L'_' || ch == L'-' || ch == L'/';
-                };
-                if (std::find_if_not(unescaped.begin(), unescaped.end(), goodchars) ==
-                        unescaped.end() &&
-                    !unescaped.empty()) {
-                    output.append(unescaped);
-                } else {
-                    output.append(source, node.source_start, node.source_length);
-                }
-                has_new_line = false;
-            }
-        }
-
-        // Put all children in stack in reversed order
-        // This way they will be processed in correct order.
-        for (node_offset_t idx = node.child_count; idx > 0; idx--) {
-            // Note: We pass our type to our child, which becomes its parent node type.
-            // Note: While node.child_start could be -1 (NODE_OFFSET_INVALID) the addition is safe
-            // because we won't execute this call in that case since node.child_count should be
-            // zero.
-            pending_node_stack.push({node.child_start + (idx - 1), node_indent, node_type});
-        }
-    }
-}
+}  // namespace

 static const char *highlight_role_to_string(highlight_role_t role) {
 #define TEST_ROLE(x)          \
@ -394,29 +700,17 @@ static std::string make_pygments_csv(const wcstring &src) {

 // Entry point for prettification.
 static wcstring prettify(const wcstring &src, bool do_indent) {
-    parse_node_tree_t parse_tree;
-    int parse_flags = (parse_flag_continue_after_error | parse_flag_include_comments |
-                       parse_flag_leave_unterminated | parse_flag_show_blank_lines);
-    if (!parse_tree_from_string(src, parse_flags, &parse_tree, nullptr)) {
-        return src;  // we return the original string on failure
-    }
-
    if (dump_parse_tree) {
-        const wcstring dump = parse_dump_tree(parse_tree, src);
-        std::fwprintf(stderr, L"%ls\n", dump.c_str());
+        auto ast =
+            ast::ast_t::parse(src, parse_flag_leave_unterminated | parse_flag_include_comments |
+                                       parse_flag_show_extra_semis);
+        wcstring ast_dump = ast.dump(src);
+        std::fwprintf(stderr, L"%ls\n", ast_dump.c_str());
    }

-    // We may have a forest of disconnected trees on a parse failure. We have to handle all nodes
-    // that have no parent, and all parse errors.
-    prettifier_t prettifier{src, do_indent};
-    for (node_offset_t i = 0; i < parse_tree.size(); i++) {
-        const parse_node_t &node = parse_tree.at(i);
-        if (node.parent == NODE_OFFSET_INVALID || node.type == parse_special_type_parse_error) {
-            // A root node.
-            prettifier.prettify_node(parse_tree, i, 0, symbol_job_list);
-        }
-    }
-    return std::move(prettifier.output);
+    pretty_printer_t printer{src, do_indent};
+    wcstring output = printer.prettify();
+    return output;
 }

 /// Given a string and list of colors of the same size, return the string with HTML span elements
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -40,6 +40,7 @@
 #include <utility>
 #include <vector>

+#include "ast.h"
 #include "autoload.h"
 #include "builtin.h"
 #include "color.h"
@ -75,7 +76,6 @@
 #include "signal.h"
 #include "termsize.h"
 #include "timer.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "topic_monitor.h"
 #include "utf8.h"
@ -978,15 +978,18 @@ static void test_debounce_timeout() {
 }

 static parser_test_error_bits_t detect_argument_errors(const wcstring &src) {
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL, symbol_argument_list)) {
+    using namespace ast;
+    auto ast = ast_t::parse_argument_list(src, parse_flag_none);
+    if (ast.errored()) {
        return PARSER_TEST_ERROR;
    }
-
-    assert(!tree.empty());  //!OCLINT(multiple unary operator)
-    tnode_t<grammar::argument_list> arg_list{&tree, &tree.at(0)};
-    auto first_arg = arg_list.next_in_list<grammar::argument>();
-    return parse_util_detect_errors_in_argument(first_arg, first_arg.get_source(src));
+    const ast::argument_t *first_arg =
+        ast.top()->as<freestanding_argument_list_t>()->arguments.at(0);
+    if (!first_arg) {
+        err(L"Failed to parse an argument");
+        return 0;
+    }
+    return parse_util_detect_errors_in_argument(*first_arg, first_arg->source(src));
 }

 /// Test the parser.
@ -1084,7 +1087,7 @@ static void test_parser() {
    }

    if (parse_util_detect_errors(L"echo (\nfoo\n  bar") != PARSER_TEST_INCOMPLETE) {
-        err(L"unterminated multiline subhsell not reported properly");
+        err(L"unterminated multiline subshell not reported properly");
    }

    if (parse_util_detect_errors(L"begin ; true ; end | ") != PARSER_TEST_INCOMPLETE) {
@ -1268,75 +1271,121 @@ static void test_cancellation() {
    parser.clear_cancel();
 }

+namespace indent_tests {
+// A struct which is either text or a new indent.
+struct segment_t {
+    // The indent to set
+    int indent{0};
+    const char *text{nullptr};
+
+    /* implicit */ segment_t(int indent) : indent(indent) {}
+    /* implicit */ segment_t(const char *text) : text(text) {}
+};
+
+using test_t = std::vector<segment_t>;
+using test_list_t = std::vector<test_t>;
+
+// Add a new test to a test list based on a series of ints and texts.
+template <typename... Types>
+void add_test(test_list_t *v, const Types &... types) {
+    segment_t segments[] = {types...};
+    v->emplace_back(std::begin(segments), std::end(segments));
+}
+}  // namespace indent_tests
+
 static void test_indents() {
    say(L"Testing indents");
+    using namespace indent_tests;

-    // Here are the components of our source and the indents we expect those to be.
-    struct indent_component_t {
-        const wchar_t *txt;
-        int indent;
-    };
+    test_list_t tests;
+    add_test(&tests,              //
+             0, "if", 1, " foo",  //
+             0, "\nend");

-    const indent_component_t components1[] = {{L"if foo", 0}, {L"end", 0}, {NULL, -1}};
+    add_test(&tests,              //
+             0, "if", 1, " foo",  //
+             1, "\nfoo",          //
+             0, "\nend");

-    const indent_component_t components2[] = {{L"if foo", 0},
-                                              {L"", 1},  // trailing newline!
-                                              {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             1, "\nend",            //
+             0, "\nend");

-    const indent_component_t components3[] = {{L"if foo", 0},
-                                              {L"foo", 1},
-                                              {L"end", 0},  // trailing newline!
-                                              {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             1, "\n",  // FIXME: this should be 2 but parse_util_compute_indents has a bug
+             1, "\nend\n");

-    const indent_component_t components4[] = {{L"if foo", 0}, {L"if bar", 1}, {L"end", 1},
-                                              {L"end", 0},    {L"", 0},       {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             2, "\n");

-    const indent_component_t components5[] = {{L"if foo", 0}, {L"if bar", 1}, {L"", 2}, {NULL, -1}};
+    add_test(&tests,      //
+             0, "begin",  //
+             1, "\nfoo",  //
+             1, "\n");

-    const indent_component_t components6[] = {{L"begin", 0}, {L"foo", 1}, {L"", 1}, {NULL, -1}};
+    add_test(&tests,      //
+             0, "begin",  //
+             1, "\n;",    //
+             0, "end",    //
+             0, "\nfoo", 0, "\n");

-    const indent_component_t components7[] = {{L"begin", 0}, {L";", 1}, {L"end", 0},
-                                              {L"foo", 0},   {L"", 0},  {NULL, -1}};
+    add_test(&tests,      //
+             0, "begin",  //
+             1, "\n;",    //
+             0, "end",    //
+             0, "\nfoo", 0, "\n");

-    const indent_component_t components8[] = {{L"if foo", 0}, {L"if bar", 1}, {L"baz", 2},
-                                              {L"end", 1},    {L"", 1},       {NULL, -1}};
+    add_test(&tests,                //
+             0, "if", 1, " foo",    //
+             1, "\nif", 2, " bar",  //
+             2, "\nbaz",            //
+             1, "\nend", 1, "\n");

-    const indent_component_t components9[] = {{L"switch foo", 0}, {L"", 1}, {NULL, -1}};
+    add_test(&tests,           //
+             0, "switch foo",  //
+             1, "\n"           //
+    );

-    const indent_component_t components10[] = {
-        {L"switch foo", 0}, {L"case bar", 1}, {L"case baz", 1}, {L"quux", 2}, {L"", 2}, {NULL, -1}};
+    add_test(&tests,           //
+             0, "switch foo",  //
+             1, "\ncase bar",  //
+             1, "\ncase baz",  //
+             2, "\nquux",      //
+             2, "\nquux"       //
+    );

-    const indent_component_t components11[] = {{L"switch foo", 0},
-                                               {L"cas", 1},  // parse error indentation handling
-                                               {NULL, -1}};
+    add_test(&tests,           //
+             0, "switch foo",  //
+             1, "\ncas"        // parse error indentation handling
+    );

-    const indent_component_t components12[] = {{L"while false", 0},
-                                               {L"# comment", 1},   // comment indentation handling
-                                               {L"command", 1},     // comment indentation handling
-                                               {L"# comment2", 1},  // comment indentation handling
-                                               {NULL, -1}};
+    add_test(&tests,                   //
+             0, "while", 1, " false",  //
+             1, "\n# comment",         // comment indentation handling
+             1, "\ncommand",           //
+             1, "\n# comment 2"        //
+    );

-    const indent_component_t *tests[] = {components1, components2,  components3,  components4,
-                                         components5, components6,  components7,  components8,
-                                         components9, components10, components11, components12};
-    for (size_t which = 0; which < sizeof tests / sizeof *tests; which++) {
-        const indent_component_t *components = tests[which];
-        // Count how many we have.
-        size_t component_count = 0;
-        while (components[component_count].txt != NULL) {
-            component_count++;
-        }
-
-        // Generate the expected indents.
+    int test_idx = 0;
+    for (const test_t &test : tests) {
+        // Construct the input text and expected indents.
        wcstring text;
        std::vector<int> expected_indents;
-        for (size_t i = 0; i < component_count; i++) {
-            if (i > 0) {
-                text.push_back(L'\n');
-                expected_indents.push_back(components[i].indent);
+        int current_indent = 0;
+        for (const segment_t &segment : test) {
+            if (!segment.text) {
+                current_indent = segment.indent;
+            } else {
+                wcstring tmp = str2wcstring(segment.text);
+                text.append(tmp);
+                expected_indents.insert(expected_indents.end(), tmp.size(), current_indent);
            }
-            text.append(components[i].txt);
-            expected_indents.resize(text.size(), components[i].indent);
        }
        do_test(expected_indents.size() == text.size());

@ -1350,11 +1399,13 @@ static void test_indents() {
        do_test(expected_indents.size() == indents.size());
        for (size_t i = 0; i < text.size(); i++) {
            if (expected_indents.at(i) != indents.at(i)) {
-                err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i,
-                    which + 1, expected_indents.at(i), indents.at(i), text.c_str());
-                break;  // don't keep showing errors for the rest of the line
+                err(L"Wrong indent at index %lu (char 0x%02x) in test #%lu (expected %d, actual "
+                    L"%d):\n%ls\n",
+                    i, text.at(i), test_idx, expected_indents.at(i), indents.at(i), text.c_str());
+                break;  // don't keep showing errors for the rest of the test
            }
        }
+        test_idx++;
    }
 }

@ -4298,12 +4349,12 @@ static void test_new_parser_correctness() {
        {L"true || false; and true", true},
        {L"true || ||", false},
        {L"|| true", false},
-        {L"true || \n\n false", true},
+        {L"true || \n\n false", false},
    };

    for (const auto &test : parser_tests) {
-        parse_node_tree_t parse_tree;
-        bool success = parse_tree_from_string(test.src, parse_flag_none, &parse_tree, NULL);
+        auto ast = ast::ast_t::parse(test.src);
+        bool success = !ast.errored();
        if (success && !test.ok) {
            err(L"\"%ls\" should NOT have parsed, but did", test.src);
        } else if (!success && test.ok) {
@ -4332,7 +4383,7 @@ static inline bool string_for_permutation(const wcstring *fuzzes, size_t fuzz_co
 }

 static void test_new_parser_fuzzing() {
-    say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
+    say(L"Fuzzing parser");
    const wcstring fuzzes[] = {
        L"if",      L"else", L"for", L"in",  L"while", L"begin", L"function",
        L"switch",  L"case", L"end", L"and", L"or",    L"not",   L"command",
@ -4343,7 +4394,6 @@ static void test_new_parser_fuzzing() {
    wcstring src;
    src.reserve(128);

-    parse_node_tree_t node_tree;
    parse_error_list_t errors;

    double start = timef();
@ -4357,7 +4407,7 @@ static void test_new_parser_fuzzing() {
        unsigned long permutation = 0;
        while (string_for_permutation(fuzzes, sizeof fuzzes / sizeof *fuzzes, len, permutation++,
                                      &src)) {
-            parse_tree_from_string(src, parse_flag_continue_after_error, &node_tree, &errors);
+            ast::ast_t::parse(src);
        }
        if (log_it) std::fwprintf(stderr, L"done (%lu)\n", permutation);
    }
@ -4369,33 +4419,36 @@ static void test_new_parser_fuzzing() {
 // true if successful.
 static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args,
                             enum parse_statement_decoration_t *out_deco) {
+    using namespace ast;
    out_cmd->clear();
    out_joined_args->clear();
    *out_deco = parse_statement_decoration_none;

-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
-        return false;
-    }
+    auto ast = ast_t::parse(src);
+    if (ast.errored()) return false;

    // Get the statement. Should only have one.
-    tnode_t<grammar::job_list> job_list{&tree, &tree.at(0)};
-    auto stmts = job_list.descendants<grammar::plain_statement>();
-    if (stmts.size() != 1) {
-        say(L"Unexpected number of statements (%lu) found in '%ls'", stmts.size(), src.c_str());
-        return false;
+    const decorated_statement_t *statement = nullptr;
+    for (const auto &n : ast) {
+        if (const auto *tmp = n.try_as<decorated_statement_t>()) {
+            if (statement) {
+                say(L"More than one decorated statement found in '%ls'", src.c_str());
+                return false;
+            }
+            statement = tmp;
+        }
    }
-    tnode_t<grammar::plain_statement> stmt = stmts.at(0);

    // Return its decoration and command.
-    *out_deco = get_decoration(stmt);
-    *out_cmd = *command_for_plain_statement(stmt, src);
+    *out_deco = statement->decoration();
+    *out_cmd = statement->command.source(src);

    // Return arguments separated by spaces.
    bool first = true;
-    for (auto arg_node : stmt.descendants<grammar::argument>()) {
+    for (const ast::argument_or_redirection_t &arg : statement->args_or_redirs) {
+        if (!arg.is_argument()) continue;
        if (!first) out_joined_args->push_back(L' ');
-        out_joined_args->append(arg_node.get_source(src));
+        out_joined_args->append(arg.source(src));
        first = false;
    }

@ -4404,19 +4457,22 @@ static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *o

 // Verify that 'function -h' and 'function --help' are plain statements but 'function --foo' is
 // not (issue #1240).
-template <typename Type>
+template <ast::type_t Type>
 static void check_function_help(const wchar_t *src) {
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(src, parse_flag_none, &tree, NULL)) {
+    using namespace ast;
+    auto ast = ast_t::parse(src);
+    if (ast.errored()) {
        err(L"Failed to parse '%ls'", src);
    }

-    tnode_t<grammar::job_list> node{&tree, &tree.at(0)};
-    auto node_list = node.descendants<Type>();
-    if (node_list.size() == 0) {
-        err(L"Failed to find node of type '%ls'", token_type_description(Type::token));
-    } else if (node_list.size() > 1) {
-        err(L"Found too many nodes of type '%ls'", token_type_description(Type::token));
+    int count = 0;
+    for (const node_t &node : ast) {
+        count += (node.type == Type);
+    }
+    if (count == 0) {
+        err(L"Failed to find node of type '%ls'", ast_type_to_string(Type));
+    } else if (count > 1) {
+        err(L"Found too many nodes of type '%ls'", ast_type_to_string(Type));
    }
 }

@ -4463,30 +4519,32 @@ static void test_new_parser_ll2() {
                test.src.c_str(), (int)test.deco, (int)deco, (long)__LINE__);
    }

-    check_function_help<grammar::plain_statement>(L"function -h");
-    check_function_help<grammar::plain_statement>(L"function --help");
-    check_function_help<grammar::function_header>(L"function --foo; end");
-    check_function_help<grammar::function_header>(L"function foo; end");
+    check_function_help<ast::type_t::decorated_statement>(L"function -h");
+    check_function_help<ast::type_t::decorated_statement>(L"function --help");
+    check_function_help<ast::type_t::function_header>(L"function --foo; end");
+    check_function_help<ast::type_t::function_header>(L"function foo; end");
 }

 static void test_new_parser_ad_hoc() {
+    using namespace ast;
    // Very ad-hoc tests for issues encountered.
    say(L"Testing new parser ad hoc tests");

    // Ensure that 'case' terminates a job list.
    const wcstring src = L"switch foo ; case bar; case baz; end";
-    parse_node_tree_t parse_tree;
-    bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, NULL);
-    if (!success) {
+    auto ast = ast_t::parse(src);
+    if (ast.errored()) {
        err(L"Parsing failed");
    }

-    // Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd
+    // Expect two case_item_lists. The bug was that we'd
    // try to run a command 'case'.
-    tnode_t<grammar::job_list> root{&parse_tree, &parse_tree.at(0)};
-    auto node_list = root.descendants<grammar::case_item_list>();
-    if (node_list.size() != 3) {
-        err(L"Expected 3 case item nodes, found %lu", node_list.size());
+    int count = 0;
+    for (const auto &n : ast) {
+        count += (n.type == type_t::case_item);
+    }
+    if (count != 2) {
+        err(L"Expected 2 case item nodes, found %d", count);
    }
 }

@ -4507,7 +4565,9 @@ static void test_new_parser_errors() {
        {L"if true ; end ; else", parse_error_unbalancing_else},

        {L"case", parse_error_unbalancing_case},
-        {L"if true ; case ; end", parse_error_unbalancing_case},
+        {L"if true ; case ; end", parse_error_generic},
+
+        {L"true | and", parse_error_andor_in_pipeline},
    };

    for (const auto &test : tests) {
@ -4515,15 +4575,17 @@ static void test_new_parser_errors() {
        parse_error_code_t expected_code = test.code;

        parse_error_list_t errors;
-        parse_node_tree_t parse_tree;
-        bool success = parse_tree_from_string(src, parse_flag_none, &parse_tree, &errors);
-        if (success) {
+        auto ast = ast::ast_t::parse(src, parse_flag_none, &errors);
+        if (!ast.errored()) {
            err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str());
        }

        if (errors.size() != 1) {
            err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors",
                src.c_str(), errors.size());
+            for (const auto &err : errors) {
+                fprintf(stderr, "%ls\n", err.describe(src, false).c_str());
+            }
        } else if (errors.at(0).code != expected_code) {
            err(L"Source '%ls' was expected to produce error code %lu, but instead produced error "
                L"code %lu",
@ -4862,6 +4924,12 @@ static void test_highlighting() {
        {L")", highlight_role_t::error},
    });

+    highlight_tests.push_back({
+        {L"echo", highlight_role_t::command},
+        {L"stuff", highlight_role_t::param},
+        {L"# comment", highlight_role_t::comment},
+    });
+
    auto &vars = parser_t::principal_parser().vars();
    // Verify variables and wildcards in commands using /bin/cat.
    vars.set(L"VARIABLE_IN_COMMAND", ENV_LOCAL, {L"a"});
--- a/src/flog.h
+++ b/src/flog.h
@ -64,9 +64,7 @@ class category_list_t {
    category_t exec_fork{L"exec-fork", L"Calls to fork()"};

    category_t output_invalid{L"output-invalid", L"Trying to print invalid output"};
-    category_t parse_productions{L"parse-productions", L"Resolving tokens"};
-    category_t parse_productions_chatty{L"parse-productions-chatty",
-                                        L"Resolving tokens (chatty messages)"};
+    category_t ast_construction{L"ast-construction", L"Parsing fish AST"};

    category_t proc_job_run{L"proc-job-run", L"Jobs getting started or continued"};

--- a/src/function.cpp
+++ b/src/function.cpp
@ -224,17 +224,14 @@ bool function_get_definition(const wcstring &name, wcstring &out_definition) {
    const function_info_t *func = funcset->get_info(name);
    if (!func || !func->props) return false;
    // We want to preserve comments that the AST attaches to the header (#5285).
-    // Take everything from the end of the header to the end of the body.
+    // Take everything from the end of the header to the 'end' keyword.
    const auto &props = func->props;
-    namespace g = grammar;
-    tnode_t<g::block_header> header = props->func_node.child<0>();
-    tnode_t<g::job_list> jobs = props->func_node.child<1>();
-    auto header_src = header.source_range();
-    auto jobs_src = jobs.source_range();
-    if (header_src && jobs_src) {
+    auto header_src = props->func_node->header->try_source_range();
+    auto end_kw_src = props->func_node->end.try_source_range();
+    if (header_src && end_kw_src) {
        uint32_t body_start = header_src->start + header_src->length;
-        uint32_t body_end = jobs_src->start + jobs_src->length;
-        assert(body_start <= jobs_src->start && "job list must come after header");
+        uint32_t body_end = end_kw_src->start;
+        assert(body_start <= body_end && "end keyword should come after header");
        out_definition = wcstring(props->parsed_source->src, body_start, body_end - body_start);
    }
    return true;
@ -313,7 +310,7 @@ int function_get_definition_lineno(const wcstring &name) {
    // return one plus the number of newlines at offsets less than the start of our function's
    // statement (which includes the header).
    // TODO: merge with line_offset_of_character_at_offset?
-    auto source_range = func->props->func_node.source_range();
+    auto source_range = func->props->func_node->try_source_range();
    assert(source_range && "Function has no source range");
    uint32_t func_start = source_range->start;
    const wcstring &source = func->props->parsed_source->src;
--- a/src/function.h
+++ b/src/function.h
@ -11,10 +11,13 @@
 #include "env.h"
 #include "event.h"
 #include "parse_tree.h"
-#include "tnode.h"

 class parser_t;

+namespace ast {
+struct block_statement_t;
+}
+
 /// A function's constant properties. These do not change once initialized.
 struct function_properties_t {
    /// Parsed source containing the function.
@ -23,7 +26,7 @@ struct function_properties_t {
    /// Node containing the function statement, pointing into parsed_source.
    /// We store block_statement, not job_list, so that comments attached to the header are
    /// preserved.
-    tnode_t<grammar::block_statement> func_node;
+    const ast::block_statement_t *func_node;

    /// List of all named arguments for this function.
    wcstring_list_t named_arguments;
--- a/src/highlight.cpp
+++ b/src/highlight.cpp
@ -16,6 +16,7 @@
 #include <unordered_set>
 #include <utility>

+#include "ast.h"
 #include "builtin.h"
 #include "color.h"
 #include "common.h"
@ -31,14 +32,11 @@
 #include "parse_util.h"
 #include "parser.h"
 #include "path.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "wcstringutil.h"
 #include "wildcard.h"
 #include "wutil.h"  // IWYU pragma: keep

-namespace g = grammar;
-
 #define CURSOR_POSITION_INVALID static_cast<size_t>(-1)

 static const wchar_t *get_highlight_var_name(highlight_role_t role) {
@ -338,12 +336,11 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d

 // Given a plain statement node in a parse tree, get the command and return it, expanded
 // appropriately for commands. If we succeed, return true.
-static bool plain_statement_get_expanded_command(const wcstring &src,
-                                                 tnode_t<g::plain_statement> stmt,
-                                                 const operation_context_t &ctx,
-                                                 wcstring *out_cmd) {
+static bool statement_get_expanded_command(const wcstring &src,
+                                           const ast::decorated_statement_t &stmt,
+                                           const operation_context_t &ctx, wcstring *out_cmd) {
    // Get the command. Try expanding it. If we cannot, it's an error.
-    maybe_t<wcstring> cmd = command_for_plain_statement(stmt, src);
+    maybe_t<wcstring> cmd = stmt.command.source(src);
    if (!cmd) return false;
    expand_result_t err = expand_to_command_and_args(*cmd, ctx, out_cmd, nullptr);
    return err == expand_result_t::ok;
@ -384,6 +381,9 @@ rgb_color_t highlight_get_color(const highlight_spec_t &highlight, bool is_backg
    return result;
 }

+static bool command_is_valid(const wcstring &cmd, enum parse_statement_decoration_t decoration,
+                             const wcstring &working_directory, const environment_t &vars);
+
 static bool has_expand_reserved(const wcstring &str) {
    bool result = false;
    for (auto wc : str) {
@ -399,27 +399,22 @@ static bool has_expand_reserved(const wcstring &str) {
 // command (as a string), if any. This is used to validate autosuggestions.
 static bool autosuggest_parse_command(const wcstring &buff, const operation_context_t &ctx,
                                      wcstring *out_expanded_command, wcstring *out_arg) {
-    // Parse the buffer.
-    parse_node_tree_t parse_tree;
-    parse_tree_from_string(buff,
-                           parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
-                           &parse_tree, nullptr);
+    auto ast = ast::ast_t::parse(
+        buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens);

    // Find the first statement.
-    tnode_t<g::plain_statement> first_statement{};
-    for (const auto &node : parse_tree) {
-        if (node.type == symbol_plain_statement) {
-            first_statement = tnode_t<g::plain_statement>(&parse_tree, &node);
-            break;
-        }
+    const ast::decorated_statement_t *first_statement = nullptr;
+    if (const ast::job_conjunction_t *jc = ast.top()->as<ast::job_list_t>()->at(0)) {
+        first_statement = jc->job.statement.contents->try_as<ast::decorated_statement_t>();
    }

    if (first_statement &&
-        plain_statement_get_expanded_command(buff, first_statement, ctx, out_expanded_command)) {
-        // Find the first argument.
-        auto args_and_redirs = first_statement.child<1>();
-        if (auto arg = args_and_redirs.next_in_list<grammar::argument>()) {
-            *out_arg = arg.get_source(buff);
+        statement_get_expanded_command(buff, *first_statement, ctx, out_expanded_command)) {
+        // Check if the first argument or redirection is, in fact, an argument.
+        if (const auto *arg_or_redir = first_statement->args_or_redirs.at(0)) {
+            if (arg_or_redir && arg_or_redir->is_argument()) {
+                *out_arg = arg_or_redir->argument().source(buff);
+            }
        }
        return true;
    }
@ -775,31 +770,56 @@ class highlighter_t {
    const bool io_ok;
    // Working directory.
    const wcstring working_directory;
+    // The ast we produced.
+    ast::ast_t ast;
    // The resulting colors.
    using color_array_t = std::vector<highlight_spec_t>;
    color_array_t color_array;
-    // The parse tree of the buff.
-    parse_node_tree_t parse_tree;
+
+    // Flags we use for AST parsing.
+    static constexpr parse_tree_flags_t ast_flags =
+        parse_flag_continue_after_error | parse_flag_include_comments |
+        parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated |
+        parse_flag_show_extra_semis;
+
    // Color a command.
-    void color_command(tnode_t<g::tok_string> node);
-    // Color an argument.
-    void color_argument(tnode_t<g::tok_string> node);
+    void color_command(const ast::string_t &node);
+    // Color a node as if it were an argument.
+    void color_as_argument(const ast::node_t &node);
    // Color a redirection.
-    void color_redirection(tnode_t<g::redirection> node);
-    // Color a list of arguments. If cmd_is_cd is true, then the arguments are for 'cd'; detect
-    // invalid directories.
-    void color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd = false);
-    // Color the redirections of the given node.
-    void color_redirections(tnode_t<g::arguments_or_redirections_list> list);
+    void color_redirection(const ast::redirection_t &node);
    // Color all the children of the command with the given type.
-    void color_children(const parse_node_t &parent, parse_token_type_t type,
-                        highlight_spec_t color);
+    void color_children(const ast::node_t &parent, ast::type_t type, highlight_spec_t color);
    // Colors the source range of a node with a given color.
-    void color_node(const parse_node_t &node, highlight_spec_t color);
+    void color_node(const ast::node_t &node, highlight_spec_t color);
+    // Colors a range with a given color.
+    void color_range(source_range_t range, highlight_spec_t color);
    // return whether a plain statement is 'cd'.
-    bool is_cd(tnode_t<g::plain_statement> stmt) const;
+    bool is_cd(const ast::decorated_statement_t &stmt) const;
+
+    /// \return a substring of our buffer.
+    wcstring get_source(source_range_t r) const;

   public:
+    // Visit the children of a node.
+    void visit_children(const ast::node_t &node) {
+        ast::node_visitor(*this).accept_children_of(&node);
+    }
+
+    // AST visitor implementations.
+    void visit(const ast::keyword_base_t &kw);
+    void visit(const ast::token_base_t &tok);
+    void visit(const ast::redirection_t &redir);
+    void visit(const ast::variable_assignment_t &varas);
+    void visit(const ast::semi_nl_t &semi_nl);
+    void visit(const ast::decorated_statement_t &stmt);
+
+    // Visit an argument, perhaps knowing that our command is cd.
+    void visit(const ast::argument_t &arg, bool cmd_is_cd = false);
+
+    // Default implementation is to just visit children.
+    void visit(const ast::node_t &node) { visit_children(node); }
+
    // Constructor
    highlighter_t(const wcstring &str, size_t pos, const operation_context_t &ctx, wcstring wd,
                  bool can_do_io)
@ -808,52 +828,44 @@ class highlighter_t {
          ctx(ctx),
          io_ok(can_do_io),
          working_directory(std::move(wd)),
-          color_array(str.size()) {
-        // Parse the tree.
-        parse_tree_from_string(buff,
-                               parse_flag_continue_after_error | parse_flag_include_comments |
-                                   parse_flag_accept_incomplete_tokens,
-                               &this->parse_tree, nullptr);
-    }
+          ast(ast::ast_t::parse(buff, ast_flags)) {}

    // Perform highlighting, returning an array of colors.
    color_array_t highlight();
 };

-void highlighter_t::color_node(const parse_node_t &node, highlight_spec_t color) {
-    // Can only color nodes with valid source ranges.
-    if (!node.has_source() || node.source_length == 0) return;
-
-    // Fill the color array with our color in the corresponding range.
-    size_t source_end = node.source_start + node.source_length;
-    assert(source_end >= node.source_start);
-    assert(source_end <= color_array.size());
-
-    std::fill(this->color_array.begin() + node.source_start, this->color_array.begin() + source_end,
-              color);
+wcstring highlighter_t::get_source(source_range_t r) const {
+    assert(r.start + r.length >= r.start && "Overflow");
+    assert(r.start + r.length <= this->buff.size() && "Out of range");
+    return this->buff.substr(r.start, r.length);
 }

-void highlighter_t::color_command(tnode_t<g::tok_string> node) {
-    auto source_range = node.source_range();
-    if (!source_range) return;
+void highlighter_t::color_node(const ast::node_t &node, highlight_spec_t color) {
+    color_range(node.source_range(), color);
+}

-    const wcstring cmd_str = node.get_source(this->buff);
+void highlighter_t::color_range(source_range_t range, highlight_spec_t color) {
+    assert(range.start + range.length <= this->color_array.size() && "Range out of bounds");
+    std::fill_n(this->color_array.begin() + range.start, range.length, color);
+}
+
+void highlighter_t::color_command(const ast::string_t &node) {
+    source_range_t source_range = node.source_range();
+    const wcstring cmd_str = get_source(source_range);

    // Get an iterator to the colors associated with the argument.
-    const size_t arg_start = source_range->start;
+    const size_t arg_start = source_range.start;
    const color_array_t::iterator colors = color_array.begin() + arg_start;
    color_string_internal(cmd_str, highlight_role_t::command, colors);
 }

 // node does not necessarily have type symbol_argument here.
-void highlighter_t::color_argument(tnode_t<g::tok_string> node) {
+void highlighter_t::color_as_argument(const ast::node_t &node) {
    auto source_range = node.source_range();
-    if (!source_range) return;
-
-    const wcstring arg_str = node.get_source(this->buff);
+    const wcstring arg_str = get_source(source_range);

    // Get an iterator to the colors associated with the argument.
-    const size_t arg_start = source_range->start;
+    const size_t arg_start = source_range.start;
    const color_array_t::iterator arg_colors = color_array.begin() + arg_start;

    // Color this argument without concern for command substitutions.
@ -905,15 +917,13 @@ void highlighter_t::color_argument(tnode_t<g::tok_string> node) {

 /// Indicates whether the source range of the given node forms a valid path in the given
 /// working_directory.
-static bool node_is_potential_path(const wcstring &src, const parse_node_t &node,
-                                   const operation_context_t &ctx,
-                                   const wcstring &working_directory) {
-    if (!node.has_source()) return false;
-
+static bool range_is_potential_path(const wcstring &src, const source_range_t &range,
+                                    const operation_context_t &ctx,
+                                    const wcstring &working_directory) {
    // Get the node source, unescape it, and then pass it to is_potential_path along with the
    // working directory (as a one element list).
    bool result = false;
-    wcstring token(src, node.source_start, node.source_length);
+    wcstring token = src.substr(range.start, range.length);
    if (unescape_string_in_place(&token, UNESCAPE_SPECIAL)) {
        // Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY.
        // Put it back.
@ -925,172 +935,257 @@ static bool node_is_potential_path(const wcstring &src, const parse_node_t &node
    return result;
 }

-bool highlighter_t::is_cd(tnode_t<g::plain_statement> stmt) const {
-    bool cmd_is_cd = false;
-    if (this->io_ok && stmt.has_source()) {
-        wcstring cmd_str;
-        if (plain_statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
-            cmd_is_cd = (cmd_str == L"cd");
-        }
+bool highlighter_t::is_cd(const ast::decorated_statement_t &stmt) const {
+    wcstring cmd_str;
+    if (this->io_ok && statement_get_expanded_command(this->buff, stmt, ctx, &cmd_str)) {
+        return cmd_str == L"cd";
    }
-    return cmd_is_cd;
+    return false;
 }

-// Color all of the arguments of the given node list, which should be argument_list or
-// argument_or_redirection_list.
-void highlighter_t::color_arguments(const std::vector<tnode_t<g::argument>> &args, bool cmd_is_cd) {
-    // Find all the arguments of this list.
-    for (tnode_t<g::argument> arg : args) {
-        this->color_argument(arg.child<0>());
+void highlighter_t::visit(const ast::keyword_base_t &kw) {
+    highlight_role_t role = highlight_role_t::normal;
+    switch (kw.kw) {
+        case parse_keyword_t::kw_begin:
+        case parse_keyword_t::kw_builtin:
+        case parse_keyword_t::kw_case:
+        case parse_keyword_t::kw_command:
+        case parse_keyword_t::kw_else:
+        case parse_keyword_t::kw_end:
+        case parse_keyword_t::kw_exec:
+        case parse_keyword_t::kw_for:
+        case parse_keyword_t::kw_function:
+        case parse_keyword_t::kw_if:
+        case parse_keyword_t::kw_in:
+        case parse_keyword_t::kw_switch:
+        case parse_keyword_t::kw_while:
+            role = highlight_role_t::command;
+            break;

-        if (cmd_is_cd) {
-            // Mark this as an error if it's not 'help' and not a valid cd path.
-            wcstring param = arg.get_source(this->buff);
-            if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
-                bool is_help = string_prefixes_string(param, L"--help") ||
-                               string_prefixes_string(param, L"-h");
-                if (!is_help && this->io_ok &&
-                    !is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
-                    this->color_node(arg, highlight_role_t::error);
-                }
+        case parse_keyword_t::kw_and:
+        case parse_keyword_t::kw_or:
+        case parse_keyword_t::kw_not:
+        case parse_keyword_t::kw_exclam:
+        case parse_keyword_t::kw_time:
+            role = highlight_role_t::operat;
+            break;
+
+        case parse_keyword_t::none:
+            break;
+    }
+    color_node(kw, role);
+}
+
+void highlighter_t::visit(const ast::token_base_t &tok) {
+    maybe_t<highlight_role_t> role = highlight_role_t::normal;
+    switch (tok.type) {
+        case parse_token_type_end:
+        case parse_token_type_pipe:
+        case parse_token_type_background:
+            role = highlight_role_t::statement_terminator;
+            break;
+
+        case parse_token_type_andand:
+        case parse_token_type_oror:
+            role = highlight_role_t::operat;
+            break;
+
+        case parse_token_type_string:
+            // Assume all strings are params. This handles e.g. the variables a for header or
+            // function header. Other strings (like arguments to commands) need more complex
+            // handling, which occurs in their respective overrides of visit().
+            role = highlight_role_t::param;
+
+        default:
+            break;
+    }
+    if (role) color_node(tok, *role);
+}
+
+void highlighter_t::visit(const ast::semi_nl_t &semi_nl) {
+    color_node(semi_nl, highlight_role_t::statement_terminator);
+}
+
+void highlighter_t::visit(const ast::argument_t &arg, bool cmd_is_cd) {
+    color_as_argument(arg);
+    if (cmd_is_cd && io_ok) {
+        // Mark this as an error if it's not 'help' and not a valid cd path.
+        wcstring param = arg.source(this->buff);
+        if (expand_one(param, expand_flag::skip_cmdsubst, ctx)) {
+            bool is_help =
+                string_prefixes_string(param, L"--help") || string_prefixes_string(param, L"-h");
+            if (!is_help && this->io_ok &&
+                !is_potential_cd_path(param, working_directory, ctx, PATH_EXPAND_TILDE)) {
+                this->color_node(arg, highlight_role_t::error);
            }
        }
    }
 }

-void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node) {
-    if (!redirection_node.has_source()) return;
+void highlighter_t::visit(const ast::variable_assignment_t &varas) {
+    color_as_argument(varas);
+    // TODO: Color the '=' in the variable assignment as an operator, for fun.
+    //    if (auto where = variable_assignment_equals_pos(varas.source(this->buff))) {
+    //        this->color_array.at(*where) = highlight_role_t::operat;
+    //    }
+}

-    tnode_t<g::tok_redirection> redir_prim = redirection_node.child<0>();  // like 2>
-    tnode_t<g::tok_string> redir_target = redirection_node.child<1>();     // like &1 or file path
+void highlighter_t::visit(const ast::decorated_statement_t &stmt) {
+    // Color any decoration.
+    if (stmt.opt_decoration) this->visit(*stmt.opt_decoration);

-    if (redir_prim) {
-        wcstring target;
-        const maybe_t<pipe_or_redir_t> redirect =
-            redirection_for_node(redirection_node, this->buff, &target);
+    // Color the command's source code.
+    // If we get no source back, there's nothing to color.
+    maybe_t<wcstring> cmd = stmt.command.try_source(this->buff);
+    if (!cmd.has_value()) return;

-        // We may get a missing redirection type if the redirection is invalid.
-        auto hl = (redirect && redirect->is_valid()) ? highlight_role_t::redirection
-                                                     : highlight_role_t::error;
-        this->color_node(redir_prim, hl);
+    wcstring expanded_cmd;
+    bool is_valid_cmd = false;
+    if (!this->io_ok) {
+        // We cannot check if the command is invalid, so just assume it's valid.
+        is_valid_cmd = true;
+    } else if (variable_assignment_equals_pos(*cmd)) {
+        is_valid_cmd = true;
+    } else {
+        // Check to see if the command is valid.
+        // Try expanding it. If we cannot, it's an error.
+        bool expanded = statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
+        if (expanded && !has_expand_reserved(expanded_cmd)) {
+            is_valid_cmd =
+                command_is_valid(expanded_cmd, stmt.decoration(), working_directory, ctx.vars);
+        }
+    }

-        // Check if the argument contains a command substitution. If so, highlight it as a param
-        // even though it's a command redirection, and don't try to do any other validation.
-        if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
-            this->color_argument(redir_target);
+    // Color our statement.
+    if (is_valid_cmd) {
+        this->color_command(stmt.command);
+    } else {
+        this->color_node(stmt.command, highlight_role_t::error);
+    }
+
+    // Color arguments and redirections.
+    // Except if our command is 'cd' we have special logic for how arguments are colored.
+    bool is_cd = (expanded_cmd == L"cd");
+    for (const ast::argument_or_redirection_t &v : stmt.args_or_redirs) {
+        if (v.is_argument()) {
+            this->visit(v.argument(), is_cd);
        } else {
-            // No command substitution, so we can highlight the target file or fd. For example,
-            // disallow redirections into a non-existent directory.
-            bool target_is_valid = true;
+            this->visit(v.redirection());
+        }
+    }
+}

-            if (!redirect || !redirect->is_valid()) {
-                // not a valid redirection
-                target_is_valid = false;
-            } else if (!this->io_ok) {
-                // I/O is disallowed, so we don't have much hope of catching anything but gross
-                // errors. Assume it's valid.
-                target_is_valid = true;
-            } else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
-                // Could not be expanded.
-                target_is_valid = false;
-            } else {
-                // Ok, we successfully expanded our target. Now verify that it works with this
-                // redirection. We will probably need it as a path (but not in the case of fd
-                // redirections). Note that the target is now unescaped.
-                const wcstring target_path =
-                    path_apply_working_directory(target, this->working_directory);
-                switch (redirect->mode) {
-                    case redirection_mode_t::fd: {
-                        if (target == L"-") {
-                            target_is_valid = true;
-                        } else {
-                            int fd = fish_wcstoi(target.c_str());
-                            target_is_valid = !errno && fd >= 0;
-                        }
-                        break;
+void highlighter_t::visit(const ast::redirection_t &redir) {
+    maybe_t<pipe_or_redir_t> oper =
+        pipe_or_redir_t::from_string(redir.oper.source(this->buff));  // like 2>
+    wcstring target = redir.target.source(this->buff);                 // like &1 or file path
+
+    assert(oper.has_value() &&
+           "Should have successfully parsed a pipe_or_redir_t since it was in our ast");
+
+    // Color the > part.
+    // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1)
+    // If so, color the whole thing invalid and stop.
+    if (!oper->is_valid()) {
+        this->color_node(redir, highlight_role_t::error);
+        return;
+    }
+
+    // Color the operator part like 2>.
+    this->color_node(redir.oper, highlight_role_t::redirection);
+
+    // Color the target part.
+    // Check if the argument contains a command substitution. If so, highlight it as a param
+    // even though it's a command redirection, and don't try to do any other validation.
+    if (parse_util_locate_cmdsubst(target.c_str(), nullptr, nullptr, true) != 0) {
+        this->color_as_argument(redir.target);
+    } else {
+        // No command substitution, so we can highlight the target file or fd. For example,
+        // disallow redirections into a non-existent directory.
+        bool target_is_valid = true;
+        if (!this->io_ok) {
+            // I/O is disallowed, so we don't have much hope of catching anything but gross
+            // errors. Assume it's valid.
+            target_is_valid = true;
+        } else if (!expand_one(target, expand_flag::skip_cmdsubst, ctx)) {
+            // Could not be expanded.
+            target_is_valid = false;
+        } else {
+            // Ok, we successfully expanded our target. Now verify that it works with this
+            // redirection. We will probably need it as a path (but not in the case of fd
+            // redirections). Note that the target is now unescaped.
+            const wcstring target_path =
+                path_apply_working_directory(target, this->working_directory);
+            switch (oper->mode) {
+                case redirection_mode_t::fd: {
+                    if (target == L"-") {
+                        target_is_valid = true;
+                    } else {
+                        int fd = fish_wcstoi(target.c_str());
+                        target_is_valid = !errno && fd >= 0;
                    }
-                    case redirection_mode_t::input: {
-                        // Input redirections must have a readable non-directory.
-                        struct stat buf = {};
-                        target_is_valid = !waccess(target_path, R_OK) &&
-                                          !wstat(target_path, &buf) && !S_ISDIR(buf.st_mode);
-                        break;
+                    break;
+                }
+                case redirection_mode_t::input: {
+                    // Input redirections must have a readable non-directory.
+                    struct stat buf = {};
+                    target_is_valid = !waccess(target_path, R_OK) && !wstat(target_path, &buf) &&
+                                      !S_ISDIR(buf.st_mode);
+                    break;
+                }
+                case redirection_mode_t::overwrite:
+                case redirection_mode_t::append:
+                case redirection_mode_t::noclob: {
+                    // Test whether the file exists, and whether it's writable (possibly after
+                    // creating it). access() returns failure if the file does not exist.
+                    bool file_exists = false, file_is_writable = false;
+                    int err = 0;
+
+                    struct stat buf = {};
+                    if (wstat(target_path, &buf) < 0) {
+                        err = errno;
                    }
-                    case redirection_mode_t::overwrite:
-                    case redirection_mode_t::append:
-                    case redirection_mode_t::noclob: {
-                        // Test whether the file exists, and whether it's writable (possibly after
-                        // creating it). access() returns failure if the file does not exist.
-                        bool file_exists = false, file_is_writable = false;
-                        int err = 0;

-                        struct stat buf = {};
-                        if (wstat(target_path, &buf) < 0) {
-                            err = errno;
-                        }
+                    if (string_suffixes_string(L"/", target)) {
+                        // Redirections to things that are directories is definitely not
+                        // allowed.
+                        file_exists = false;
+                        file_is_writable = false;
+                    } else if (err == 0) {
+                        // No err. We can write to it if it's not a directory and we have
+                        // permission.
+                        file_exists = true;
+                        file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
+                    } else if (err == ENOENT) {
+                        // File does not exist. Check if its parent directory is writable.
+                        wcstring parent = wdirname(target_path);

-                        if (string_suffixes_string(L"/", target)) {
-                            // Redirections to things that are directories is definitely not
-                            // allowed.
-                            file_exists = false;
-                            file_is_writable = false;
-                        } else if (err == 0) {
-                            // No err. We can write to it if it's not a directory and we have
-                            // permission.
-                            file_exists = true;
-                            file_is_writable = !S_ISDIR(buf.st_mode) && !waccess(target_path, W_OK);
-                        } else if (err == ENOENT) {
-                            // File does not exist. Check if its parent directory is writable.
-                            wcstring parent = wdirname(target_path);
+                        // Ensure that the parent ends with the path separator. This will ensure
+                        // that we get an error if the parent directory is not really a
+                        // directory.
+                        if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');

-                            // Ensure that the parent ends with the path separator. This will ensure
-                            // that we get an error if the parent directory is not really a
-                            // directory.
-                            if (!string_suffixes_string(L"/", parent)) parent.push_back(L'/');
-
-                            // Now the file is considered writable if the parent directory is
-                            // writable.
-                            file_exists = false;
-                            file_is_writable = (0 == waccess(parent, W_OK));
-                        } else {
-                            // Other errors we treat as not writable. This includes things like
-                            // ENOTDIR.
-                            file_exists = false;
-                            file_is_writable = false;
-                        }
-
-                        // NOCLOB means that we must not overwrite files that exist.
-                        target_is_valid =
-                            file_is_writable &&
-                            !(file_exists && redirect->mode == redirection_mode_t::noclob);
-                        break;
+                        // Now the file is considered writable if the parent directory is
+                        // writable.
+                        file_exists = false;
+                        file_is_writable = (0 == waccess(parent, W_OK));
+                    } else {
+                        // Other errors we treat as not writable. This includes things like
+                        // ENOTDIR.
+                        file_exists = false;
+                        file_is_writable = false;
                    }
+
+                    // NOCLOB means that we must not overwrite files that exist.
+                    target_is_valid =
+                        file_is_writable &&
+                        !(file_exists && oper->mode == redirection_mode_t::noclob);
+                    break;
                }
            }
-
-            if (redir_target) {
-                auto hl = target_is_valid ? highlight_role_t::redirection : highlight_role_t::error;
-                this->color_node(redir_target, hl);
-            }
-        }
-    }
-}
-
-/// Color all of the redirections of the given command.
-void highlighter_t::color_redirections(tnode_t<g::arguments_or_redirections_list> list) {
-    for (const auto &node : list.descendants<g::redirection>()) {
-        this->color_redirection(node);
-    }
-}
-
-/// Color all the children of the command with the given type.
-void highlighter_t::color_children(const parse_node_t &parent, parse_token_type_t type,
-                                   highlight_spec_t color) {
-    for (node_offset_t idx = 0; idx < parent.child_count; idx++) {
-        const parse_node_t *child = this->parse_tree.get_child(parent, idx);
-        if (child != nullptr && child->type == type) {
-            this->color_node(*child, color);
        }
+        this->color_node(redir.target,
+                         target_is_valid ? highlight_role_t::redirection : highlight_role_t::error);
    }
 }

@ -1145,171 +1240,42 @@ highlighter_t::color_array_t highlighter_t::highlight() {
        ASSERT_IS_BACKGROUND_THREAD();
    }

-    const size_t length = buff.size();
-    assert(this->buff.size() == this->color_array.size());
-    if (length == 0) return color_array;
-
-    // Start out at zero.
+    this->color_array.resize(this->buff.size());
    std::fill(this->color_array.begin(), this->color_array.end(), highlight_spec_t{});

-    // Walk the node tree.
-    for (const parse_node_t &node : parse_tree) {
-        if (ctx.check_cancel()) return std::move(color_array);
-        switch (node.type) {
-            // Color direct string descendants, e.g. 'for' and 'in'.
-            case symbol_while_header:
-            case symbol_begin_header:
-            case symbol_function_header:
-            case symbol_if_clause:
-            case symbol_else_clause:
-            case symbol_case_item:
-            case symbol_decorated_statement:
-            case symbol_if_statement: {
-                this->color_children(node, parse_token_type_string, highlight_role_t::command);
-                break;
-            }
-            case symbol_switch_statement: {
-                tnode_t<g::switch_statement> switchn(&parse_tree, &node);
-                auto literal_switch = switchn.child<0>();
-                auto switch_arg = switchn.child<1>();
-                this->color_node(literal_switch, highlight_role_t::command);
-                this->color_node(switch_arg, highlight_role_t::param);
-                break;
-            }
-            case symbol_for_header: {
-                tnode_t<g::for_header> fhead(&parse_tree, &node);
-                // Color the 'for' and 'in' as commands.
-                auto literal_for = fhead.child<0>();
-                auto literal_in = fhead.child<2>();
-                this->color_node(literal_for, highlight_role_t::command);
-                this->color_node(literal_in, highlight_role_t::command);
+    this->visit_children(*ast.top());
+    if (ctx.check_cancel()) return std::move(color_array);

-                // Color the variable name as a parameter.
-                this->color_argument(fhead.child<1>());
-                break;
-            }
-
-            case parse_token_type_andand:
-            case parse_token_type_oror:
-                this->color_node(node, highlight_role_t::operat);
-                break;
-
-            case symbol_not_statement:
-                this->color_children(node, parse_token_type_string, highlight_role_t::operat);
-                break;
-
-            case symbol_job_decorator:
-                this->color_node(node, highlight_role_t::operat);
-                break;
-
-            case symbol_variable_assignment: {
-                tnode_t<g::variable_assignment> variable_assignment = {&parse_tree, &node};
-                this->color_argument(variable_assignment.child<0>());
-                break;
-            }
-
-            case parse_token_type_pipe:
-            case parse_token_type_background:
-            case parse_token_type_end:
-            case symbol_optional_background: {
-                this->color_node(node, highlight_role_t::statement_terminator);
-                break;
-            }
-            case symbol_optional_time: {
-                this->color_node(node, highlight_role_t::operat);
-                break;
-            }
-            case symbol_plain_statement: {
-                tnode_t<g::plain_statement> stmt(&parse_tree, &node);
-                // Get the decoration from the parent.
-                enum parse_statement_decoration_t decoration = get_decoration(stmt);
-
-                // Color the command.
-                tnode_t<g::tok_string> cmd_node = stmt.child<0>();
-                maybe_t<wcstring> cmd = cmd_node.get_source(buff);
-                if (!cmd) {
-                    break;  // not much as we can do without a node that has source text
-                }
-
-                bool is_valid_cmd = false;
-                if (!this->io_ok) {
-                    // We cannot check if the command is invalid, so just assume it's valid.
-                    is_valid_cmd = true;
-                } else if (variable_assignment_equals_pos(*cmd)) {
-                    is_valid_cmd = true;
-                } else {
-                    wcstring expanded_cmd;
-                    // Check to see if the command is valid.
-                    // Try expanding it. If we cannot, it's an error.
-                    bool expanded =
-                        plain_statement_get_expanded_command(buff, stmt, ctx, &expanded_cmd);
-                    if (expanded && !has_expand_reserved(expanded_cmd)) {
-                        is_valid_cmd =
-                            command_is_valid(expanded_cmd, decoration, working_directory, ctx.vars);
-                    }
-                }
-                if (!is_valid_cmd) {
-                    this->color_node(*cmd_node, highlight_role_t::error);
-                } else {
-                    this->color_command(cmd_node);
-                }
-                break;
-            }
-            // Only work on root lists, so that we don't re-color child lists.
-            case symbol_arguments_or_redirections_list: {
-                tnode_t<g::arguments_or_redirections_list> list(&parse_tree, &node);
-                if (argument_list_is_root(list)) {
-                    bool cmd_is_cd = is_cd(list.try_get_parent<g::plain_statement>());
-                    this->color_arguments(list.descendants<g::argument>(), cmd_is_cd);
-                    this->color_redirections(list);
-                }
-                break;
-            }
-            case symbol_argument_list: {
-                tnode_t<g::argument_list> list(&parse_tree, &node);
-                if (argument_list_is_root(list)) {
-                    this->color_arguments(list.descendants<g::argument>());
-                }
-                break;
-            }
-            case symbol_end_command: {
-                this->color_node(node, highlight_role_t::command);
-                break;
-            }
-            case parse_special_type_parse_error:
-            case parse_special_type_tokenizer_error: {
-                this->color_node(node, highlight_role_t::error);
-                break;
-            }
-            case parse_special_type_comment: {
-                this->color_node(node, highlight_role_t::comment);
-                break;
-            }
-            default: {
-                break;
-            }
-        }
+    // Color every comment.
+    const auto &extras = ast.extras();
+    for (const source_range_t &r : extras.comments) {
+        this->color_range(r, highlight_role_t::comment);
    }

-    if (!this->io_ok || this->cursor_pos > this->buff.size()) {
-        return std::move(color_array);
+    // Color every extra semi.
+    for (const source_range_t &r : extras.semis) {
+        this->color_range(r, highlight_role_t::statement_terminator);
    }

-    // If the cursor is over an argument, and that argument is a valid path, underline it.
-    for (const auto &node : parse_tree) {
-        // Must be an argument with source.
-        if (node.type != symbol_argument || !node.has_source()) continue;
+    // Color every error range.
+    for (const source_range_t &r : extras.errors) {
+        this->color_range(r, highlight_role_t::error);
+    }

-        if (ctx.check_cancel()) return std::move(color_array);
-
-        // Underline every valid path.
-        if (node_is_potential_path(buff, node, ctx, working_directory)) {
-            // It is, underline it.
-            for (size_t i = node.source_start; i < node.source_start + node.source_length; i++) {
+    // Underline every valid path.
+    if (io_ok) {
+        for (const ast::node_t &node : ast) {
+            const ast::argument_t *arg = node.try_as<ast::argument_t>();
+            if (!arg || arg->unsourced) continue;
+            if (ctx.check_cancel()) break;
+            if (range_is_potential_path(buff, arg->range, ctx, working_directory)) {
                // Don't color highlight_role_t::error because it looks dorky. For example,
                // trying to cd into a non-directory would show an underline and also red.
-                if (this->color_array.at(i).foreground != highlight_role_t::error) {
-                    this->color_array.at(i).valid_path = true;
+                for (size_t i = arg->range.start, end = arg->range.start + arg->range.length;
+                     i < end; i++) {
+                    if (this->color_array.at(i).foreground != highlight_role_t::error) {
+                        this->color_array.at(i).valid_path = true;
+                    }
                }
            }
        }
--- a/src/history.cpp
+++ b/src/history.cpp
@ -29,6 +29,7 @@
 #include <type_traits>
 #include <unordered_set>

+#include "ast.h"
 #include "common.h"
 #include "env.h"
 #include "fallback.h"  // IWYU pragma: keep
@ -44,7 +45,6 @@
 #include "parser.h"
 #include "path.h"
 #include "reader.h"
-#include "tnode.h"
 #include "wcstringutil.h"
 #include "wildcard.h"  // IWYU pragma: keep
 #include "wutil.h"     // IWYU pragma: keep
@ -1096,8 +1096,7 @@ void history_impl_t::populate_from_config_path() {
 static bool should_import_bash_history_line(const wcstring &line) {
    if (line.empty()) return false;

-    parse_node_tree_t parse_tree;
-    if (!parse_tree_from_string(line, parse_flag_none, &parse_tree, nullptr)) return false;
+    if (ast::ast_t::parse(line).errored()) return false;

    // In doing this test do not allow incomplete strings. Hence the "false" argument.
    parse_error_list_t errors;
@ -1274,38 +1273,33 @@ void history_t::add_pending_with_file_detection(const wcstring &str,

    // Find all arguments that look like they could be file paths.
    bool needs_sync_write = false;
-    parse_node_tree_t tree;
-    parse_tree_from_string(str, parse_flag_none, &tree, nullptr);
+    using namespace ast;
+    auto ast = ast_t::parse(str);

    path_list_t potential_paths;
-    for (const parse_node_t &node : tree) {
-        if (!node.has_source()) {
-            continue;
-        }
-
-        if (node.type == symbol_argument) {
-            wcstring potential_path = node.get_source(str);
+    for (const node_t &node : ast) {
+        if (const argument_t *arg = node.try_as<argument_t>()) {
+            wcstring potential_path = arg->source(str);
            bool unescaped = unescape_string_in_place(&potential_path, UNESCAPE_DEFAULT);
            if (unescaped && string_could_be_path(potential_path)) {
                potential_paths.push_back(potential_path);
            }
-        } else if (node.type == symbol_plain_statement) {
+        } else if (const decorated_statement_t *stmt = node.try_as<decorated_statement_t>()) {
            // Hack hack hack - if the command is likely to trigger an exit, then don't do
            // background file detection, because we won't be able to write it to our history file
            // before we exit.
            // Also skip it for 'echo'. This is because echo doesn't take file paths, but also
            // because the history file test wants to find the commands in the history file
            // immediately after running them, so it can't tolerate the asynchronous file detection.
-            if (get_decoration({&tree, &node}) == parse_statement_decoration_exec) {
+            if (stmt->decoration() == parse_statement_decoration_exec) {
                needs_sync_write = true;
            }

-            if (maybe_t<wcstring> command = command_for_plain_statement({&tree, &node}, str)) {
-                unescape_string_in_place(&*command, UNESCAPE_DEFAULT);
-                if (*command == L"exit" || *command == L"reboot" || *command == L"restart" ||
-                    *command == L"echo") {
-                    needs_sync_write = true;
-                }
+            wcstring command = stmt->command.source(str);
+            unescape_string_in_place(&command, UNESCAPE_DEFAULT);
+            if (command == L"exit" || command == L"reboot" || command == L"restart" ||
+                command == L"echo") {
+                needs_sync_write = true;
            }
        }
    }
--- a/src/parse_constants.h
+++ b/src/parse_constants.h
@ -6,54 +6,27 @@

 #include "common.h"

-#define PARSE_ASSERT(a) assert(a)
 #define PARSER_DIE()                   \
    do {                               \
        FLOG(error, L"Parser dying!"); \
        exit_without_destructors(-1);  \
    } while (0)

+// A range of source code.
+struct source_range_t {
+    uint32_t start;
+    uint32_t length;
+
+    uint32_t end() const {
+        assert(start + length >= start && "Overflow");
+        return start + length;
+    }
+};
+
 // IMPORTANT: If the following enum table is modified you must also update token_enum_map below.
 enum parse_token_type_t : uint8_t {
    token_type_invalid = 1,
-    // Non-terminal tokens
-    symbol_job_list,
-    symbol_job_conjunction,
-    symbol_job_conjunction_continuation,
-    symbol_job_decorator,
-    symbol_job,
-    symbol_job_continuation,
-    symbol_statement,
-    symbol_block_statement,
-    symbol_block_header,
-    symbol_for_header,
-    symbol_while_header,
-    symbol_begin_header,
-    symbol_function_header,
-    symbol_if_statement,
-    symbol_if_clause,
-    symbol_else_clause,
-    symbol_else_continuation,
-    symbol_switch_statement,
-    symbol_case_item_list,
-    symbol_case_item,
-    symbol_not_statement,
-    symbol_decorated_statement,
-    symbol_plain_statement,
-    symbol_variable_assignment,
-    symbol_variable_assignments,
-    symbol_arguments_or_redirections_list,
-    symbol_andor_job_list,
-    symbol_argument_list,
-    // Freestanding argument lists are parsed from the argument list supplied to 'complete -a'.
-    // They are not generated by parse trees rooted in symbol_job_list.
-    symbol_freestanding_argument_list,
-    symbol_argument,
-    symbol_redirection,
-    symbol_optional_background,
-    symbol_optional_newlines,
-    symbol_optional_time,
-    symbol_end_command,
+
    // Terminal types.
    parse_token_type_string,
    parse_token_type_pipe,
@ -68,13 +41,6 @@ enum parse_token_type_t : uint8_t {
    parse_special_type_parse_error,
    parse_special_type_tokenizer_error,
    parse_special_type_comment,
-
-    LAST_TOKEN_TYPE = parse_special_type_comment,
-    FIRST_TERMINAL_TYPE = parse_token_type_string,
-    LAST_TERMINAL_TYPE = parse_token_type_terminate,
-    LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
-    FIRST_PARSE_TOKEN_TYPE = parse_token_type_string,
-    LAST_PARSE_TOKEN_TYPE = parse_token_type_end
 };

 const enum_map<parse_token_type_t> token_enum_map[] = {
@ -89,9 +55,6 @@ const enum_map<parse_token_type_t> token_enum_map[] = {
    {parse_token_type_andand, L"parse_token_type_andand"},
    {parse_token_type_oror, L"parse_token_type_oror"},
    {parse_token_type_terminate, L"parse_token_type_terminate"},
-// Define all symbols
-#define ELEM(sym) {symbol_##sym, L"symbol_" #sym},
-#include "parse_grammar_elements.inc"
    {token_type_invalid, L"token_type_invalid"},
    {token_type_invalid, nullptr}};
 #define token_enum_map_len (sizeof token_enum_map / sizeof *token_enum_map)
@ -147,7 +110,7 @@ const enum_map<parse_keyword_t> keyword_enum_map[] = {{parse_keyword_t::kw_excla

 // Node tag values.

-// Statement decorations, stored in node tag.
+// Statement decorations.
 enum parse_statement_decoration_t {
    parse_statement_decoration_none,
    parse_statement_decoration_command,
@ -155,19 +118,6 @@ enum parse_statement_decoration_t {
    parse_statement_decoration_exec,
 };

-// Job decorations, stored in node tag.
-enum parse_job_decoration_t {
-    parse_job_decoration_none,
-    parse_job_decoration_and,
-    parse_job_decoration_or,
-};
-
-// Whether a statement is backgrounded.
-enum parse_optional_background_t { parse_no_background, parse_background };
-
-// Whether a job is prefixed with "time".
-enum parse_optional_time_t { parse_optional_time_no_time, parse_optional_time_time };
-
 // Parse error code list.
 enum parse_error_code_t {
    parse_error_none,
@ -193,6 +143,26 @@ enum parse_error_code_t {
    parse_error_andor_in_pipeline,         // "and" or "or" after a pipe
 };

+enum {
+    parse_flag_none = 0,
+
+    /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
+    /// disconnected trees. This is intended to be used by syntax highlighting.
+    parse_flag_continue_after_error = 1 << 0,
+    /// Include comment tokens.
+    parse_flag_include_comments = 1 << 1,
+    /// Indicate that the tokenizer should accept incomplete tokens */
+    parse_flag_accept_incomplete_tokens = 1 << 2,
+    /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
+    /// tree where some nodes may have no productions.
+    parse_flag_leave_unterminated = 1 << 3,
+    /// Indicate that the parser should generate job_list entries for blank lines.
+    parse_flag_show_blank_lines = 1 << 4,
+    /// Indicate that extra semis should be generated.
+    parse_flag_show_extra_semis = 1 << 5,
+};
+typedef unsigned int parse_tree_flags_t;
+
 enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 };
 typedef unsigned int parser_test_error_bits_t;

@ -214,6 +184,9 @@ struct parse_error_t {
 };
 typedef std::vector<parse_error_t> parse_error_list_t;

+wcstring token_type_user_presentable_description(parse_token_type_t type,
+                                                 parse_keyword_t keyword = parse_keyword_t::none);
+
 // Special source_start value that means unknown.
 #define SOURCE_LOCATION_UNKNOWN (static_cast<size_t>(-1))

@ -221,6 +194,13 @@ typedef std::vector<parse_error_t> parse_error_list_t;
 /// errors in a substring of a larger source buffer.
 void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt);

+// The location of a pipeline.
+enum class pipeline_position_t {
+    none,       // not part of a pipeline
+    first,      // first command in a pipeline
+    subsequent  // second or further command in a pipeline
+};
+
 /// Maximum number of function calls.
 #define FISH_MAX_STACK_DEPTH 128

--- a/src/parse_execution.cpp
+++ b/src/parse_execution.cpp
--- a/src/parse_execution.h
+++ b/src/parse_execution.h
@ -1,9 +1,10 @@
-// Provides the "linkage" between a parse_node_tree_t and actual execution structures (job_t, etc.).
+// Provides the "linkage" between an ast and actual execution structures (job_t, etc.).
 #ifndef FISH_PARSE_EXECUTION_H
 #define FISH_PARSE_EXECUTION_H

 #include <stddef.h>

+#include "ast.h"
 #include "common.h"
 #include "io.h"
 #include "parse_constants.h"
@ -38,7 +39,7 @@ class parse_execution_context_t {
    const operation_context_t &ctx;

    // The currently executing job node, used to indicate the line number.
-    tnode_t<grammar::job> executing_job_node{};
+    const ast::job_t *executing_job_node{};

    // Cached line number information.
    size_t cached_lineno_offset = 0;
@ -59,88 +60,91 @@ class parse_execution_context_t {

    // Report an error, setting $status to \p status. Always returns
    // 'end_execution_reason_t::error'.
-    end_execution_reason_t report_error(int status, const parse_node_t &node, const wchar_t *fmt,
+    end_execution_reason_t report_error(int status, const ast::node_t &node, const wchar_t *fmt,
                                        ...) const;
    end_execution_reason_t report_errors(int status, const parse_error_list_t &error_list) const;

    /// Command not found support.
    end_execution_reason_t handle_command_not_found(const wcstring &cmd,
-                                                    tnode_t<grammar::plain_statement> statement,
+                                                    const ast::decorated_statement_t &statement,
                                                    int err_code);

    // Utilities
-    wcstring get_source(const parse_node_t &node) const;
-    tnode_t<grammar::plain_statement> infinite_recursive_statement_in_job_list(
-        tnode_t<grammar::job_list> job_list, wcstring *out_func_name) const;
+    wcstring get_source(const ast::node_t &node) const;
+    const ast::decorated_statement_t *infinite_recursive_statement_in_job_list(
+        const ast::job_list_t &job_list, wcstring *out_func_name) const;

    // Expand a command which may contain variables, producing an expand command and possibly
    // arguments. Prints an error message on error.
-    end_execution_reason_t expand_command(tnode_t<grammar::plain_statement> statement,
+    end_execution_reason_t expand_command(const ast::decorated_statement_t &statement,
                                          wcstring *out_cmd, wcstring_list_t *out_args) const;

-    /// Return whether we should skip a job with the given bool statement type.
-    bool should_skip(parse_job_decoration_t type) const;
-
    /// Indicates whether a job is a simple block (one block, no redirections).
-    bool job_is_simple_block(tnode_t<grammar::job> job) const;
+    bool job_is_simple_block(const ast::job_t &job) const;

-    enum process_type_t process_type_for_command(tnode_t<grammar::plain_statement> statement,
+    enum process_type_t process_type_for_command(const ast::decorated_statement_t &statement,
                                                 const wcstring &cmd) const;
    end_execution_reason_t apply_variable_assignments(
-        process_t *proc, tnode_t<grammar::variable_assignments> variable_assignments,
+        process_t *proc, const ast::variable_assignment_list_t &variable_assignments,
        const block_t **block);

    // These create process_t structures from statements.
    end_execution_reason_t populate_job_process(
-        job_t *job, process_t *proc, tnode_t<grammar::statement> statement,
-        tnode_t<grammar::variable_assignments> variable_assignments);
+        job_t *job, process_t *proc, const ast::statement_t &statement,
+        const ast::variable_assignment_list_t &variable_assignments_list_t);
    end_execution_reason_t populate_not_process(job_t *job, process_t *proc,
-                                                tnode_t<grammar::not_statement> not_statement);
+                                                const ast::not_statement_t &not_statement);
    end_execution_reason_t populate_plain_process(job_t *job, process_t *proc,
-                                                  tnode_t<grammar::plain_statement> statement);
+                                                  const ast::decorated_statement_t &statement);

    template <typename Type>
    end_execution_reason_t populate_block_process(job_t *job, process_t *proc,
-                                                  tnode_t<grammar::statement> statement,
-                                                  tnode_t<Type> specific_statement);
+                                                  const ast::statement_t &statement,
+                                                  const Type &specific_statement);

    // These encapsulate the actual logic of various (block) statements.
-    end_execution_reason_t run_block_statement(tnode_t<grammar::block_statement> statement,
+    end_execution_reason_t run_block_statement(const ast::block_statement_t &statement,
                                               const block_t *associated_block);
-    end_execution_reason_t run_for_statement(tnode_t<grammar::for_header> header,
-                                             tnode_t<grammar::job_list> contents);
-    end_execution_reason_t run_if_statement(tnode_t<grammar::if_statement> statement,
+    end_execution_reason_t run_for_statement(const ast::for_header_t &header,
+                                             const ast::job_list_t &contents);
+    end_execution_reason_t run_if_statement(const ast::if_statement_t &statement,
                                            const block_t *associated_block);
-    end_execution_reason_t run_switch_statement(tnode_t<grammar::switch_statement> statement);
-    end_execution_reason_t run_while_statement(tnode_t<grammar::while_header> header,
-                                               tnode_t<grammar::job_list> contents,
+    end_execution_reason_t run_switch_statement(const ast::switch_statement_t &statement);
+    end_execution_reason_t run_while_statement(const ast::while_header_t &header,
+                                               const ast::job_list_t &contents,
                                               const block_t *associated_block);
-    end_execution_reason_t run_function_statement(tnode_t<grammar::block_statement> statement,
-                                                  tnode_t<grammar::function_header> header);
-    end_execution_reason_t run_begin_statement(tnode_t<grammar::job_list> contents);
+    end_execution_reason_t run_function_statement(const ast::block_statement_t &statement,
+                                                  const ast::function_header_t &header);
+    end_execution_reason_t run_begin_statement(const ast::job_list_t &contents);

    enum globspec_t { failglob, nullglob };
-    using argument_node_list_t = std::vector<tnode_t<grammar::argument>>;
-    end_execution_reason_t expand_arguments_from_nodes(const argument_node_list_t &argument_nodes,
+    using ast_args_list_t = std::vector<const ast::argument_t *>;
+
+    static ast_args_list_t get_argument_nodes(const ast::argument_list_t &args);
+    static ast_args_list_t get_argument_nodes(const ast::argument_or_redirection_list_t &args);
+
+    end_execution_reason_t expand_arguments_from_nodes(const ast_args_list_t &argument_nodes,
                                                       wcstring_list_t *out_arguments,
                                                       globspec_t glob_behavior);

    // Determines the list of redirections for a node.
-    end_execution_reason_t determine_redirections(
-        tnode_t<grammar::arguments_or_redirections_list> node,
-        redirection_spec_list_t *out_redirections);
+    end_execution_reason_t determine_redirections(const ast::argument_or_redirection_list_t &list,
+                                                  redirection_spec_list_t *out_redirections);

-    end_execution_reason_t run_1_job(tnode_t<grammar::job> job, const block_t *associated_block);
-    end_execution_reason_t run_job_conjunction(tnode_t<grammar::job_conjunction> job_expr,
+    end_execution_reason_t run_1_job(const ast::job_t &job, const block_t *associated_block);
+    end_execution_reason_t test_and_run_1_job_conjunction(const ast::job_conjunction_t &jc,
+                                                          const block_t *associated_block);
+    end_execution_reason_t run_job_conjunction(const ast::job_conjunction_t &job_expr,
                                               const block_t *associated_block);
-    template <typename Type>
-    end_execution_reason_t run_job_list(tnode_t<Type> job_list_node,
+    end_execution_reason_t run_job_list(const ast::job_list_t &job_list_node,
                                        const block_t *associated_block);
-    end_execution_reason_t populate_job_from_job_node(job_t *j, tnode_t<grammar::job> job_node,
+    end_execution_reason_t run_job_list(const ast::andor_job_list_t &job_list_node,
+                                        const block_t *associated_block);
+    end_execution_reason_t populate_job_from_job_node(job_t *j, const ast::job_t &job_node,
                                                      const block_t *associated_block);

    // Returns the line number of the node. Not const since it touches cached_lineno_offset.
-    int line_offset_of_node(tnode_t<grammar::job> node);
+    int line_offset_of_node(const ast::job_t *node);
    int line_offset_of_character_at_offset(size_t offset);

   public:
@ -159,14 +163,14 @@ class parse_execution_context_t {
    /// Returns the source string.
    const wcstring &get_source() const { return pstree->src; }

-    /// Return the parse tree.
-    const parse_node_tree_t &tree() const { return pstree->tree; }
+    /// Return the parsed ast.
+    const ast::ast_t &ast() const { return *pstree->ast; }

    /// Start executing at the given node. Returns 0 if there was no error, 1 if there was an
    /// error.
-    end_execution_reason_t eval_node(tnode_t<grammar::statement> statement,
+    end_execution_reason_t eval_node(const ast::statement_t &statement,
                                     const block_t *associated_block);
-    end_execution_reason_t eval_node(tnode_t<grammar::job_list> job_list,
+    end_execution_reason_t eval_node(const ast::job_list_t &job_list,
                                     const block_t *associated_block);
 };

--- a/src/parse_grammar.h
+++ b/src/parse_grammar.h
@ -1,401 +0,0 @@
-// Programmatic representation of fish grammar
-#ifndef FISH_PARSE_GRAMMAR_H
-#define FISH_PARSE_GRAMMAR_H
-
-#include <array>
-#include <tuple>
-#include <type_traits>
-
-#include "parse_constants.h"
-#include "tokenizer.h"
-
-struct parse_token_t;
-typedef uint8_t parse_node_tag_t;
-
-using parse_node_tag_t = uint8_t;
-struct parse_token_t;
-namespace grammar {
-
-using production_element_t = uint8_t;
-
-enum {
-    // The maximum length of any seq production.
-    MAX_PRODUCTION_LENGTH = 6
-};
-
-// Define primitive types.
-template <enum parse_token_type_t Token>
-struct primitive {
-    using type_tuple = std::tuple<>;
-    static constexpr parse_token_type_t token = Token;
-    static constexpr production_element_t element() { return Token; }
-};
-
-using tok_end = primitive<parse_token_type_end>;
-using tok_string = primitive<parse_token_type_string>;
-using tok_pipe = primitive<parse_token_type_pipe>;
-using tok_background = primitive<parse_token_type_background>;
-using tok_redirection = primitive<parse_token_type_redirection>;
-using tok_andand = primitive<parse_token_type_andand>;
-using tok_oror = primitive<parse_token_type_oror>;
-
-// Define keyword types.
-template <parse_keyword_t Keyword>
-struct keyword {
-    using type_tuple = std::tuple<>;
-    static constexpr parse_token_type_t token = parse_token_type_string;
-    static constexpr production_element_t element() {
-        // Convert a parse_keyword_t enum to a production_element_t enum.
-        return static_cast<uint32_t>(Keyword) + LAST_TOKEN_OR_SYMBOL + 1;
-    }
-};
-
-// Define special types.
-// Comments are not emitted as part of productions, but specially by the parser.
-struct comment {
-    using type_tuple = std::tuple<>;
-    static constexpr parse_token_type_t token = parse_special_type_comment;
-};
-
-// Forward declare all the symbol types.
-#define ELEM(T) struct T;
-#include "parse_grammar_elements.inc"
-
-// A production is a sequence of production elements.
-// +1 to hold the terminating token_type_invalid
-template <size_t Count>
-using production_t = std::array<const production_element_t, Count + 1>;
-
-// This is an ugly hack to avoid ODR violations
-// Given some type, return a pointer to its production.
-template <typename T>
-const production_element_t *production_for() {
-    static constexpr auto prod = T::production;
-    return prod.data();
-}
-
-// Get some production element.
-template <typename T>
-constexpr production_element_t element() {
-    return T::element();
-}
-
-// Template goo.
-namespace detail {
-template <typename T, typename Tuple>
-struct tuple_contains;
-
-template <typename T>
-struct tuple_contains<T, std::tuple<>> : std::false_type {};
-
-template <typename T, typename U, typename... Ts>
-struct tuple_contains<T, std::tuple<U, Ts...>> : tuple_contains<T, std::tuple<Ts...>> {};
-
-template <typename T, typename... Ts>
-struct tuple_contains<T, std::tuple<T, Ts...>> : std::true_type {};
-
-struct void_type {
-    using type = void;
-};
-
-// Support for checking whether the index N is valid for T::type_tuple.
-template <size_t N, typename T>
-static constexpr bool index_valid() {
-    return N < std::tuple_size<typename T::type_tuple>::value;
-}
-
-// Get the Nth type of T::type_tuple.
-template <size_t N, typename T>
-using tuple_element = std::tuple_element<N, typename T::type_tuple>;
-
-// Get the Nth type of T::type_tuple, or void if N is out of bounds.
-template <size_t N, typename T>
-using tuple_element_or_void =
-    typename std::conditional<index_valid<N, T>(), tuple_element<N, T>, void_type>::type::type;
-
-// Make a tuple by mapping the Nth item of a list of 'seq's.
-template <size_t N, typename... Ts>
-struct tuple_nther {
-    // A tuple of the Nth types of tuples (or voids).
-    using type = std::tuple<tuple_element_or_void<N, Ts>...>;
-};
-
-// Given a list of Options, each one a seq, check to see if any of them contain type Desired at
-// index Index.
-template <typename Desired, size_t Index, typename... Options>
-inline constexpr bool type_possible() {
-    using nths = typename tuple_nther<Index, Options...>::type;
-    return tuple_contains<Desired, nths>::value;
-}
-}  // namespace detail
-
-// Partial specialization hack.
-#define ELEM(T)                                   \
-    template <>                                   \
-    constexpr production_element_t element<T>() { \
-        return symbol_##T;                        \
-    }
-#include "parse_grammar_elements.inc"
-
-// Empty produces nothing.
-struct empty {
-    using type_tuple = std::tuple<>;
-    static constexpr production_t<0> production = {{token_type_invalid}};
-    static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
-                                               parse_node_tag_t *) {
-        return production_for<empty>();
-    }
-};
-
-// Sequence represents a list of (at least two) productions.
-template <class T0, class... Ts>
-struct seq {
-    static constexpr production_t<1 + sizeof...(Ts)> production = {
-        {element<T0>(), element<Ts>()..., token_type_invalid}};
-
-    static_assert(1 + sizeof...(Ts) <= MAX_PRODUCTION_LENGTH, "MAX_PRODUCTION_LENGTH too small");
-
-    using type_tuple = std::tuple<T0, Ts...>;
-
-    template <typename Desired, size_t Index>
-    static constexpr bool type_possible() {
-        using element_t = detail::tuple_element_or_void<Index, seq>;
-        return std::is_same<Desired, element_t>::value;
-    }
-
-    static const production_element_t *resolve(const parse_token_t &, const parse_token_t &,
-                                               parse_node_tag_t *) {
-        return production_for<seq>();
-    }
-};
-
-template <class... Args>
-using produces_sequence = seq<Args...>;
-
-// Ergonomic way to create a production for a single element.
-template <class T>
-using single = seq<T>;
-
-template <class T>
-using produces_single = single<T>;
-
-// Alternative represents a choice.
-struct alternative {};
-
-// Following are the grammar productions.
-#define BODY(T) static constexpr parse_token_type_t token = symbol_##T;
-
-#define DEF(T) struct T : public
-
-#define DEF_ALT(T) struct T : public alternative
-#define ALT_BODY(T, ...)                                                                     \
-    BODY(T)                                                                                  \
-    using type_tuple = std::tuple<>;                                                         \
-    template <typename Desired, size_t Index>                                                \
-    static constexpr bool type_possible() {                                                  \
-        return detail::type_possible<Desired, Index, __VA_ARGS__>();                         \
-    }                                                                                        \
-    static const production_element_t *resolve(const parse_token_t &, const parse_token_t &, \
-                                               parse_node_tag_t *)
-
-// A job_list is a list of job_conjunctions, separated by semicolons or newlines
-DEF_ALT(job_list) {
-    using normal = seq<job_decorator, job_conjunction, job_list>;
-    using empty_line = seq<tok_end, job_list>;
-    using empty = grammar::empty;
-    ALT_BODY(job_list, normal, empty_line, empty);
-};
-
-// Job decorators are 'and' and 'or'. These apply to the whole job.
-DEF_ALT(job_decorator) {
-    using ands = single<keyword<parse_keyword_t::kw_and>>;
-    using ors = single<keyword<parse_keyword_t::kw_or>>;
-    using empty = grammar::empty;
-    ALT_BODY(job_decorator, ands, ors, empty);
-};
-
-// A job_conjunction is a job followed by a continuation.
-DEF(job_conjunction) produces_sequence<job, job_conjunction_continuation>{BODY(job_conjunction)};
-
-DEF_ALT(job_conjunction_continuation) {
-    using andands = seq<tok_andand, optional_newlines, job_conjunction>;
-    using orors = seq<tok_oror, optional_newlines, job_conjunction>;
-    using empty = grammar::empty;
-    ALT_BODY(job_conjunction_continuation, andands, orors, empty);
-};
-
-/// The time builtin.
-DEF_ALT(optional_time) {
-    using empty = grammar::empty;
-    using time = single<keyword<parse_keyword_t::kw_time>>;
-    ALT_BODY(optional_time, empty, time);
-};
-
-// A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases
-// like if statements, where we require a command). To represent "non-empty", we require a
-// statement, followed by a possibly empty job_continuation, and then optionally a background
-// specifier '&'
-DEF(job)
-produces_sequence<optional_time, variable_assignments, statement, job_continuation,
-                  optional_background>{BODY(job)};
-
-DEF_ALT(job_continuation) {
-    using piped =
-        seq<tok_pipe, optional_newlines, variable_assignments, statement, job_continuation>;
-    using empty = grammar::empty;
-    ALT_BODY(job_continuation, piped, empty);
-};
-
-// A list of assignments like HOME=$PWD
-DEF_ALT(variable_assignments) {
-    using empty = grammar::empty;
-    using var = seq<variable_assignment, variable_assignments>;
-    ALT_BODY(variable_assignments, empty, var);
-};
-// A string token like VAR=value
-DEF(variable_assignment) produces_single<tok_string>{BODY(variable_assignment)};
-
-// A statement is a normal command, or an if / while / and etc
-DEF_ALT(statement) {
-    using nots = single<not_statement>;
-    using block = single<block_statement>;
-    using ifs = single<if_statement>;
-    using switchs = single<switch_statement>;
-    using decorated = single<decorated_statement>;
-    ALT_BODY(statement, nots, block, ifs, switchs, decorated);
-};
-
-// A block is a conditional, loop, or begin/end
-DEF(if_statement)
-produces_sequence<if_clause, else_clause, end_command, arguments_or_redirections_list>{
-    BODY(if_statement)};
-
-DEF(if_clause)
-produces_sequence<keyword<parse_keyword_t::kw_if>, job_conjunction, tok_end, andor_job_list,
-                  job_list>{BODY(if_clause)};
-
-DEF_ALT(else_clause) {
-    using empty = grammar::empty;
-    using else_cont = seq<keyword<parse_keyword_t::kw_else>, else_continuation>;
-    ALT_BODY(else_clause, empty, else_cont);
-};
-
-DEF_ALT(else_continuation) {
-    using else_if = seq<if_clause, else_clause>;
-    using else_only = seq<tok_end, job_list>;
-    ALT_BODY(else_continuation, else_if, else_only);
-};
-
-DEF(switch_statement)
-produces_sequence<keyword<parse_keyword_t::kw_switch>, argument, tok_end, case_item_list,
-                  end_command, arguments_or_redirections_list>{BODY(switch_statement)};
-
-DEF_ALT(case_item_list) {
-    using empty = grammar::empty;
-    using case_items = seq<case_item, case_item_list>;
-    using blank_line = seq<tok_end, case_item_list>;
-    ALT_BODY(case_item_list, empty, case_items, blank_line);
-};
-
-DEF(case_item)
-produces_sequence<keyword<parse_keyword_t::kw_case>, argument_list, tok_end, job_list>{
-    BODY(case_item)};
-
-DEF(block_statement)
-produces_sequence<block_header, job_list, end_command, arguments_or_redirections_list>{
-    BODY(block_statement)};
-
-DEF_ALT(block_header) {
-    using forh = single<for_header>;
-    using whileh = single<while_header>;
-    using funch = single<function_header>;
-    using beginh = single<begin_header>;
-    ALT_BODY(block_header, forh, whileh, funch, beginh);
-};
-
-DEF(for_header)
-produces_sequence<keyword<parse_keyword_t::kw_for>, tok_string, keyword<parse_keyword_t::kw_in>,
-                  argument_list, tok_end>{BODY(for_header)};
-
-DEF(while_header)
-produces_sequence<keyword<parse_keyword_t::kw_while>, job_conjunction, tok_end, andor_job_list>{
-    BODY(while_header)};
-
-DEF(begin_header) produces_single<keyword<parse_keyword_t::kw_begin>>{BODY(begin_header)};
-
-// Functions take arguments, and require at least one (the name). No redirections allowed.
-DEF(function_header)
-produces_sequence<keyword<parse_keyword_t::kw_function>, argument, argument_list, tok_end>{
-    BODY(function_header)};
-
-DEF_ALT(not_statement) {
-    using nots =
-        seq<keyword<parse_keyword_t::kw_not>, variable_assignments, optional_time, statement>;
-    using exclams =
-        seq<keyword<parse_keyword_t::kw_exclam>, variable_assignments, optional_time, statement>;
-    ALT_BODY(not_statement, nots, exclams);
-};
-
-// An andor_job_list is zero or more job lists, where each starts with an `and` or `or` boolean
-// statement.
-DEF_ALT(andor_job_list) {
-    using empty = grammar::empty;
-    using andor_job = seq<job_decorator, job_conjunction, andor_job_list>;
-    using empty_line = seq<tok_end, andor_job_list>;
-    ALT_BODY(andor_job_list, empty, andor_job, empty_line);
-};
-
-// A decorated_statement is a command with a list of arguments_or_redirections, possibly with
-// "builtin" or "command" or "exec"
-DEF_ALT(decorated_statement) {
-    using plains = single<plain_statement>;
-    using cmds = seq<keyword<parse_keyword_t::kw_command>, plain_statement>;
-    using builtins = seq<keyword<parse_keyword_t::kw_builtin>, plain_statement>;
-    using execs = seq<keyword<parse_keyword_t::kw_exec>, plain_statement>;
-    ALT_BODY(decorated_statement, plains, cmds, builtins, execs);
-};
-
-DEF(plain_statement)
-produces_sequence<tok_string, arguments_or_redirections_list>{BODY(plain_statement)};
-
-DEF_ALT(argument_list) {
-    using empty = grammar::empty;
-    using arg = seq<argument, argument_list>;
-    ALT_BODY(argument_list, empty, arg);
-};
-
-DEF_ALT(arguments_or_redirections_list) {
-    using empty = grammar::empty;
-    using arg = seq<argument, arguments_or_redirections_list>;
-    using redir = seq<redirection, arguments_or_redirections_list>;
-    ALT_BODY(arguments_or_redirections_list, empty, arg, redir);
-};
-
-DEF(argument) produces_single<tok_string>{BODY(argument)};
-DEF(redirection) produces_sequence<tok_redirection, tok_string>{BODY(redirection)};
-
-DEF_ALT(optional_background) {
-    using empty = grammar::empty;
-    using background = single<tok_background>;
-    ALT_BODY(optional_background, empty, background);
-};
-
-DEF(end_command) produces_single<keyword<parse_keyword_t::kw_end>>{BODY(end_command)};
-
-// Note optional_newlines only allows newline-style tok_end, not semicolons.
-DEF_ALT(optional_newlines) {
-    using empty = grammar::empty;
-    using newlines = seq<tok_end, optional_newlines>;
-    ALT_BODY(optional_newlines, empty, newlines);
-};
-
-// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
-// TOK_END (newlines, and even semicolons, for historical reasons)
-DEF_ALT(freestanding_argument_list) {
-    using empty = grammar::empty;
-    using arg = seq<argument, freestanding_argument_list>;
-    using semicolon = seq<tok_end, freestanding_argument_list>;
-    ALT_BODY(freestanding_argument_list, empty, arg, semicolon);
-};
-}  // namespace grammar
-#endif
--- a/src/parse_grammar_elements.inc
+++ b/src/parse_grammar_elements.inc
@ -1,37 +0,0 @@
-// Define ELEM before including this file.
-ELEM(job_list)
-ELEM(job)
-ELEM(job_decorator)
-ELEM(job_conjunction)
-ELEM(job_conjunction_continuation)
-ELEM(job_continuation)
-ELEM(statement)
-ELEM(if_statement)
-ELEM(if_clause)
-ELEM(else_clause)
-ELEM(else_continuation)
-ELEM(switch_statement)
-ELEM(case_item_list)
-ELEM(case_item)
-ELEM(block_statement)
-ELEM(block_header)
-ELEM(for_header)
-ELEM(while_header)
-ELEM(begin_header)
-ELEM(function_header)
-ELEM(not_statement)
-ELEM(andor_job_list)
-ELEM(decorated_statement)
-ELEM(variable_assignment)
-ELEM(variable_assignments)
-ELEM(plain_statement)
-ELEM(argument_list)
-ELEM(arguments_or_redirections_list)
-ELEM(argument)
-ELEM(redirection)
-ELEM(optional_background)
-ELEM(optional_newlines)
-ELEM(optional_time)
-ELEM(end_command)
-ELEM(freestanding_argument_list)
-#undef ELEM
--- a/src/parse_productions.cpp
+++ b/src/parse_productions.cpp
@ -1,466 +0,0 @@
-#include "config.h"  // IWYU pragma: keep
-
-#include "parse_productions.h"
-
-#include <stdio.h>
-
-#include "common.h"
-#include "flog.h"
-#include "parse_constants.h"
-#include "parse_grammar.h"
-#include "parse_tree.h"
-
-using namespace parse_productions;
-using namespace grammar;
-
-#define NO_PRODUCTION nullptr
-
-// Herein are encoded the productions for our LL2 fish grammar.
-//
-// Each symbol (e.g. symbol_job_list) has a corresponding function (e.g. resolve_job_list). The
-// function accepts two tokens, representing the first and second lookahead, and returns a
-// production representing the rule, or NULL on error. There is also a tag value which is returned
-// by reference; the tag is a sort of node annotation.
-//
-// Productions are generally a static const array, and we return a pointer to the array (yes,
-// really).
-
-#define RESOLVE(SYM)                          \
-    const production_element_t *SYM::resolve( \
-        const parse_token_t &token1, const parse_token_t &token2, parse_node_tag_t *out_tag)
-
-/// A job_list is a list of jobs, separated by semicolons or newlines.
-RESOLVE(job_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_string: {
-            // Some keywords are special.
-            switch (token1.keyword) {
-                case parse_keyword_t::kw_end:
-                case parse_keyword_t::kw_else:
-                case parse_keyword_t::kw_case: {
-                    return production_for<empty>();  // end this job list
-                }
-                default: {
-                    return production_for<normal>();  // normal string
-                }
-            }
-        }
-        case parse_token_type_pipe:
-        case parse_token_type_redirection:
-        case parse_token_type_background: {
-            return production_for<normal>();
-        }
-        case parse_token_type_end: {
-            return production_for<empty_line>();
-        }
-        case parse_token_type_terminate: {
-            return production_for<empty>();  // no more commands, just transition to empty
-        }
-        default: {
-            return NO_PRODUCTION;
-        }
-    }
-}
-
-// A job decorator is AND or OR
-RESOLVE(job_decorator) {
-    // If it's followed by --help, it's not a decoration.
-    if (token2.is_help_argument) {
-        *out_tag = parse_job_decoration_none;
-        return production_for<empty>();
-    }
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_and: {
-            *out_tag = parse_job_decoration_and;
-            return production_for<ands>();
-        }
-        case parse_keyword_t::kw_or: {
-            *out_tag = parse_job_decoration_or;
-            return production_for<ors>();
-        }
-        default: {
-            *out_tag = parse_job_decoration_none;
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(job_conjunction_continuation) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    switch (token1.type) {
-        case parse_token_type_andand:
-            *out_tag = parse_job_decoration_and;
-            return production_for<andands>();
-        case parse_token_type_oror:
-            *out_tag = parse_job_decoration_or;
-            return production_for<orors>();
-        default:
-            return production_for<empty>();
-    }
-}
-
-RESOLVE(job_continuation) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_pipe: {
-            return production_for<piped>();  // pipe, continuation
-        }
-        default: {
-            return production_for<empty>();  // not a pipe, no job continuation
-        }
-    }
-}
-
-// A statement is a normal command, or an if / while / and etc.
-RESOLVE(statement) {
-    UNUSED(out_tag);
-
-    // The only block-like builtin that takes any parameters is 'function' So go to decorated
-    // statements if the subsequent token looks like '--'. The logic here is subtle:
-    //
-    // If we are 'begin', then we expect to be invoked with no arguments.
-    // If we are 'function', then we are a non-block if we are invoked with -h or --help
-    // If we are anything else, we require an argument, so do the same thing if the subsequent token
-    // is a statement terminator.
-    if (token1.type == parse_token_type_string) {
-        // If we are a function, then look for help arguments. Otherwise, if the next token looks
-        // like an option (starts with a dash), then parse it as a decorated statement.
-        if (token1.keyword == parse_keyword_t::kw_function && token2.is_help_argument) {
-            return production_for<decorated>();
-        } else if (token1.keyword != parse_keyword_t::kw_function && token2.has_dash_prefix) {
-            return production_for<decorated>();
-        }
-
-        // Likewise if the next token doesn't look like an argument at all. This corresponds to e.g.
-        // a "naked if".
-        bool naked_invocation_invokes_help = (token1.keyword != parse_keyword_t::kw_begin &&
-                                              token1.keyword != parse_keyword_t::kw_end);
-        if (naked_invocation_invokes_help &&
-            (token2.type == parse_token_type_end || token2.type == parse_token_type_terminate)) {
-            return production_for<decorated>();
-        }
-    }
-
-    switch (token1.type) {
-        case parse_token_type_string: {
-            switch (token1.keyword) {
-                case parse_keyword_t::kw_not:
-                case parse_keyword_t::kw_exclam: {
-                    return production_for<nots>();
-                }
-                case parse_keyword_t::kw_for:
-                case parse_keyword_t::kw_while:
-                case parse_keyword_t::kw_function:
-                case parse_keyword_t::kw_begin: {
-                    return production_for<block>();
-                }
-                case parse_keyword_t::kw_if: {
-                    return production_for<ifs>();
-                }
-                case parse_keyword_t::kw_else: {
-                    return NO_PRODUCTION;
-                }
-                case parse_keyword_t::kw_switch: {
-                    return production_for<switchs>();
-                }
-                case parse_keyword_t::kw_end: {
-                    return NO_PRODUCTION;
-                }
-                // All other keywords fall through to decorated statement.
-                default: {
-                    return production_for<decorated>();
-                }
-            }
-        }
-        case parse_token_type_pipe:
-        case parse_token_type_redirection:
-        case parse_token_type_background:
-        case parse_token_type_terminate: {
-            return NO_PRODUCTION;
-        }
-        default: {
-            return NO_PRODUCTION;
-        }
-    }
-}
-
-RESOLVE(else_clause) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_else: {
-            return production_for<else_cont>();
-        }
-        default: {
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(else_continuation) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_if: {
-            return production_for<else_if>();
-        }
-        default: {
-            return production_for<else_only>();
-        }
-    }
-}
-
-RESOLVE(case_item_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    if (token1.keyword == parse_keyword_t::kw_case)
-        return production_for<case_items>();
-    else if (token1.type == parse_token_type_end)
-        return production_for<blank_line>();
-    else
-        return production_for<empty>();
-}
-
-RESOLVE(not_statement) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_not:
-            return production_for<nots>();
-        case parse_keyword_t::kw_exclam:
-            return production_for<exclams>();
-        default:
-            return NO_PRODUCTION;
-    }
-}
-
-RESOLVE(andor_job_list) {
-    UNUSED(out_tag);
-
-    if (token1.type == parse_token_type_end) {
-        return production_for<empty_line>();
-    } else if (token1.keyword == parse_keyword_t::kw_and ||
-               token1.keyword == parse_keyword_t::kw_or) {
-        // Check that the argument to and/or is a string that's not help. Otherwise it's either 'and
-        // --help' or a naked 'and', and not part of this list.
-        if (token2.type == parse_token_type_string && !token2.is_help_argument) {
-            return production_for<andor_job>();
-        }
-    }
-    // All other cases end the list.
-    return production_for<empty>();
-}
-
-RESOLVE(argument_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    switch (token1.type) {
-        case parse_token_type_string: {
-            return production_for<arg>();
-        }
-        default: {
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(freestanding_argument_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_string: {
-            return production_for<arg>();
-        }
-        case parse_token_type_end: {
-            return production_for<semicolon>();
-        }
-        default: {
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(block_header) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_for: {
-            return production_for<forh>();
-        }
-        case parse_keyword_t::kw_while: {
-            return production_for<whileh>();
-        }
-        case parse_keyword_t::kw_function: {
-            return production_for<funch>();
-        }
-        case parse_keyword_t::kw_begin: {
-            return production_for<beginh>();
-        }
-        default: {
-            return NO_PRODUCTION;
-        }
-    }
-}
-
-RESOLVE(variable_assignments) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    if (token1.may_be_variable_assignment) {
-        assert(token1.type == parse_token_type_string);
-        return production_for<var>();
-    }
-    return production_for<empty>();
-}
-
-RESOLVE(decorated_statement) {
-    // and/or are typically parsed in job_conjunction at the beginning of a job
-    // However they may be reached here through e.g. true && and false.
-    // Refuse to parse them as a command except for --help. See #6089.
-    if ((token1.keyword == parse_keyword_t::kw_and || token1.keyword == parse_keyword_t::kw_or) &&
-        !token2.is_help_argument) {
-        return NO_PRODUCTION;
-    }
-
-    // If this is e.g. 'command --help' then the command is 'command' and not a decoration. If the
-    // second token is not a string, then this is a naked 'command' and we should execute it as
-    // undecorated.
-    if (token2.type != parse_token_type_string || token2.has_dash_prefix) {
-        return production_for<plains>();
-    }
-
-    switch (token1.keyword) {
-        case parse_keyword_t::kw_command: {
-            *out_tag = parse_statement_decoration_command;
-            return production_for<cmds>();
-        }
-        case parse_keyword_t::kw_builtin: {
-            *out_tag = parse_statement_decoration_builtin;
-            return production_for<builtins>();
-        }
-        case parse_keyword_t::kw_exec: {
-            *out_tag = parse_statement_decoration_exec;
-            return production_for<execs>();
-        }
-        default: {
-            *out_tag = parse_statement_decoration_none;
-            return production_for<plains>();
-        }
-    }
-}
-
-RESOLVE(arguments_or_redirections_list) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-
-    switch (token1.type) {
-        case parse_token_type_string:
-            return production_for<arg>();
-        case parse_token_type_redirection:
-            return production_for<redir>();
-        default:
-            return production_for<empty>();
-    }
-}
-
-RESOLVE(optional_newlines) {
-    UNUSED(token2);
-    UNUSED(out_tag);
-    if (token1.is_newline) return production_for<newlines>();
-    return production_for<empty>();
-}
-
-RESOLVE(optional_background) {
-    UNUSED(token2);
-
-    switch (token1.type) {
-        case parse_token_type_background: {
-            *out_tag = parse_background;
-            return production_for<background>();
-        }
-        default: {
-            *out_tag = parse_no_background;
-            return production_for<empty>();
-        }
-    }
-}
-
-RESOLVE(optional_time) {
-    if (token1.keyword == parse_keyword_t::kw_time && !token2.is_help_argument) {
-        *out_tag = parse_optional_time_time;
-        return production_for<time>();
-    }
-    *out_tag = parse_optional_time_no_time;
-    return production_for<empty>();
-}
-
-const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
-                                                                    const parse_token_t &input1,
-                                                                    const parse_token_t &input2,
-                                                                    parse_node_tag_t *out_tag) {
-    // this is **extremely** chatty
-    FLOGF(parse_productions_chatty, L"Resolving production for %ls with input token <%ls>",
-          token_type_description(node_type), input1.describe().c_str());
-
-    // Fetch the function to resolve the list of productions.
-    const production_element_t *(*resolver)(const parse_token_t &input1,  //!OCLINT(unused param)
-                                            const parse_token_t &input2,  //!OCLINT(unused param)
-                                            parse_node_tag_t *out_tag) =  //!OCLINT(unused param)
-        nullptr;
-    switch (node_type) {
-// Handle all of our grammar elements
-#define ELEM(SYM)                \
-    case (symbol_##SYM):         \
-        resolver = SYM::resolve; \
-        break;
-#include "parse_grammar_elements.inc"
-
-        // Everything else is an error.
-        case parse_token_type_string:
-        case parse_token_type_pipe:
-        case parse_token_type_redirection:
-        case parse_token_type_background:
-        case parse_token_type_andand:
-        case parse_token_type_oror:
-        case parse_token_type_end:
-        case parse_token_type_terminate: {
-            FLOGF(error, L"Terminal token type %ls passed to %s", token_type_description(node_type),
-                  __FUNCTION__);
-            PARSER_DIE();
-            break;
-        }
-        case parse_special_type_parse_error:
-        case parse_special_type_tokenizer_error:
-        case parse_special_type_comment: {
-            FLOGF(error, L"Special type %ls passed to %s\n", token_type_description(node_type),
-                  __FUNCTION__);
-            PARSER_DIE();
-            break;
-        }
-        case token_type_invalid: {
-            FLOGF(error, L"token_type_invalid passed to %s", __FUNCTION__);
-            PARSER_DIE();
-            break;
-        }
-    }
-    PARSE_ASSERT(resolver != nullptr);
-
-    const production_element_t *result = resolver(input1, input2, out_tag);
-    if (result == nullptr) {
-        FLOGF(parse_productions, L"Node type '%ls' has no production for input '%ls' (in %s)",
-              token_type_description(node_type), input1.describe().c_str(), __FUNCTION__);
-    }
-
-    return result;
-}
--- a/src/parse_productions.h
+++ b/src/parse_productions.h
@ -1,49 +0,0 @@
-// Programmatic representation of fish code.
-#ifndef FISH_PARSE_TREE_CONSTRUCTION_H
-#define FISH_PARSE_TREE_CONSTRUCTION_H
-
-#include <sys/types.h>
-
-#include "parse_constants.h"
-
-struct parse_token_t;
-
-namespace parse_productions {
-
-// A production is an array of unsigned char. Symbols are encoded directly as their symbol value.
-// Keywords are encoded with an offset of LAST_TOKEN_OR_SYMBOL + 1. So essentially we glom together
-// keywords and symbols.
-typedef uint8_t production_element_t;
-
-/// Resolve the type from a production element.
-inline parse_token_type_t production_element_type(production_element_t elem) {
-    if (elem > LAST_TOKEN_OR_SYMBOL) {
-        return parse_token_type_string;
-    } else {
-        return static_cast<parse_token_type_t>(elem);
-    }
-}
-
-/// Resolve the keyword from a production element.
-inline parse_keyword_t production_element_keyword(production_element_t elem) {
-    if (elem > LAST_TOKEN_OR_SYMBOL) {
-        // First keyword is LAST_TOKEN_OR_SYMBOL + 1.
-        return static_cast<parse_keyword_t>(elem - LAST_TOKEN_OR_SYMBOL - 1);
-    } else {
-        return parse_keyword_t::none;
-    }
-}
-
-/// Check if an element is valid.
-inline bool production_element_is_valid(production_element_t elem) {
-    return elem != token_type_invalid;
-}
-
-/// Fetch a production. We are passed two input tokens. The first input token is guaranteed to not
-/// be invalid; the second token may be invalid if there's no more tokens. We may also set flags.
-const production_element_t *production_for_token(parse_token_type_t node_type,
-                                                 const parse_token_t &input1,
-                                                 const parse_token_t &input2, uint8_t *out_tag);
-}  // namespace parse_productions
-
-#endif
--- a/src/parse_tree.cpp
+++ b/src/parse_tree.cpp
--- a/src/parse_tree.h
+++ b/src/parse_tree.h
@ -6,30 +6,19 @@
 #include <stdint.h>
 #include <sys/types.h>

+#include <deque>
 #include <memory>
 #include <vector>

 #include "common.h"
 #include "maybe.h"
 #include "parse_constants.h"
-#include "parse_grammar.h"
 #include "tokenizer.h"

-class parse_node_tree_t;
-
-typedef uint32_t node_offset_t;
-
-#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
-
 typedef uint32_t source_offset_t;

 constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);

-struct source_range_t {
-    uint32_t start;
-    uint32_t length;
-};
-
 /// A struct representing the token type that we use internally.
 struct parse_token_t {
    enum parse_token_type_t type;  // The type of the token as represented by the parser
@ -41,38 +30,35 @@ struct parse_token_t {
    bool is_newline{false};            // Hackish: if TOK_END, whether the source is a newline.
    bool preceding_escaped_nl{false};  // Whether there was an escaped newline preceding this token.
    bool may_be_variable_assignment{false};  // Hackish: whether this token is a string like FOO=bar
+    tokenizer_error_t tok_error{tokenizer_error_t::none}; // If this is a tokenizer error, that error.
    source_offset_t source_start{SOURCE_OFFSET_INVALID};
    source_offset_t source_length{0};

+    /// \return the source range.
+    /// Note the start may be invalid.
+    source_range_t range() const {
+        return source_range_t{source_start, source_length};
+    }
+
+    /// \return whether we are a string with the dash prefix set.
+    bool is_dash_prefix_string() const {
+        return type == parse_token_type_string && has_dash_prefix;
+    }
+
    wcstring describe() const;
    wcstring user_presentable_description() const;

    constexpr parse_token_t(parse_token_type_t type) : type(type) {}
 };

-enum {
-    parse_flag_none = 0,
-
-    /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of
-    /// disconnected trees. This is intended to be used by syntax highlighting.
-    parse_flag_continue_after_error = 1 << 0,
-    /// Include comment tokens.
-    parse_flag_include_comments = 1 << 1,
-    /// Indicate that the tokenizer should accept incomplete tokens */
-    parse_flag_accept_incomplete_tokens = 1 << 2,
-    /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished'
-    /// tree where some nodes may have no productions.
-    parse_flag_leave_unterminated = 1 << 3,
-    /// Indicate that the parser should generate job_list entries for blank lines.
-    parse_flag_show_blank_lines = 1 << 4
-};
-typedef unsigned int parse_tree_flags_t;
-
-wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src);
+/// Return a new parse token, advancing the tokenizer.
+parse_token_t next_parse_token(tokenizer_t *tok, maybe_t<tok_t> *out_token, wcstring *storage);

 const wchar_t *token_type_description(parse_token_type_t type);
 const wchar_t *keyword_description(parse_keyword_t type);

+parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err);
+
 // Node flags.
 enum {
    /// Flag indicating that the node has associated comment nodes.
@ -88,145 +74,29 @@ typedef uint8_t parse_node_flags_t;
 /// Node-type specific tag value.
 typedef uint8_t parse_node_tag_t;

-/// Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields
-/// is important.
-class parse_node_t {
-   public:
-    // Start in the source code.
-    source_offset_t source_start{SOURCE_OFFSET_INVALID};
-    // Length of our range in the source code.
-    source_offset_t source_length{0};
-    // Parent
-    node_offset_t parent{NODE_OFFSET_INVALID};
-    // Children
-    node_offset_t child_start{0};
-    // Number of children.
-    uint8_t child_count{0};
-    // Type of the node.
-    enum parse_token_type_t type;
-    // Keyword associated with node.
-    enum parse_keyword_t keyword { parse_keyword_t::none };
-    // Node flags.
-    parse_node_flags_t flags : 4;
-    // This is used to store e.g. the statement decoration.
-    parse_node_tag_t tag : 4;
-    // Description
-    wcstring describe() const;
-
-    // Constructor
-    explicit parse_node_t(parse_token_type_t ty) : type(ty), flags(0), tag(0) {}
-
-    node_offset_t child_offset(node_offset_t which) const {
-        PARSE_ASSERT(which < child_count);
-        return child_start + which;
-    }
-
-    /// Indicate if this node has a range of source code associated with it.
-    bool has_source() const {
-        // Should never have a nonempty range with an invalid offset.
-        assert(this->source_start != SOURCE_OFFSET_INVALID || this->source_length == 0);
-        return this->source_length > 0;
-    }
-
-    /// Indicate if the node has comment nodes.
-    bool has_comments() const { return this->flags & parse_node_flag_has_comments; }
-
-    /// Indicates if we have a preceding escaped newline.
-    bool has_preceding_escaped_newline() const {
-        return this->flags & parse_node_flag_preceding_escaped_nl;
-    }
-
-    source_range_t source_range() const {
-        assert(has_source());
-        return {source_start, source_length};
-    }
-
-    /// Gets source for the node, or the empty string if it has no source.
-    wcstring get_source(const wcstring &str) const {
-        if (!has_source())
-            return wcstring();
-        else
-            return wcstring(str, this->source_start, this->source_length);
-    }
-
-    /// Returns whether the given location is within the source range or at its end.
-    bool location_in_or_at_end_of_source_range(size_t loc) const {
-        return has_source() && source_start <= loc && loc - source_start <= source_length;
-    }
-};
-
-template <typename Type>
-class tnode_t;
-
-/// The parse tree itself.
-class parse_node_tree_t : public std::vector<parse_node_t> {
-   public:
-    parse_node_tree_t() {}
-    parse_node_tree_t(parse_node_tree_t &&) = default;
-    parse_node_tree_t &operator=(parse_node_tree_t &&) = default;
-    parse_node_tree_t(const parse_node_tree_t &) = delete;             // no copying
-    parse_node_tree_t &operator=(const parse_node_tree_t &) = delete;  // no copying
-
-    // Get the node corresponding to a child of the given node, or NULL if there is no such child.
-    // If expected_type is provided, assert that the node has that type.
-    const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which,
-                                  parse_token_type_t expected_type = token_type_invalid) const;
-
-    // Find the first direct child of the given node of the given type. asserts on failure.
-    const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const;
-
-    template <typename Type>
-    tnode_t<Type> find_child(const parse_node_t &parent) const;
-
-    // Get the node corresponding to the parent of the given node, or NULL if there is no such
-    // child. If expected_type is provided, only returns the parent if it is of that type. Note the
-    // asymmetry: get_child asserts since the children are known, but get_parent does not, since the
-    // parent may not be known.
-    const parse_node_t *get_parent(const parse_node_t &node,
-                                   parse_token_type_t expected_type = token_type_invalid) const;
-
-    // Finds a node containing the given source location. If 'parent' is not NULL, it must be an
-    // ancestor.
-    const parse_node_t *find_node_matching_source_location(parse_token_type_t type,
-                                                           size_t source_loc,
-                                                           const parse_node_t *parent) const;
-    // Utilities
-
-    /// Given a node, return all of its comment nodes.
-    std::vector<tnode_t<grammar::comment>> comment_nodes_for_node(const parse_node_t &parent) const;
-
-   private:
-    template <typename Type>
-    friend class tnode_t;
-    /// Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return
-    /// the next element of the given type in that list, and the tail (by reference). Returns NULL
-    /// if we've exhausted the list.
-    const parse_node_t *next_node_in_node_list(const parse_node_t &node_list,
-                                               parse_token_type_t entry_type,
-                                               const parse_node_t **list_tail) const;
-};
-
-/// The big entry point. Parse a string, attempting to produce a tree for the given goal type.
-bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags,
-                            parse_node_tree_t *output, parse_error_list_t *errors,
-                            parse_token_type_t goal = symbol_job_list);
+namespace ast {
+class ast_t;
+}

 /// A type wrapping up a parse tree and the original source behind it.
 struct parsed_source_t {
    wcstring src;
-    parse_node_tree_t tree;
+    std::unique_ptr<ast::ast_t> ast;

-    parsed_source_t(wcstring s, parse_node_tree_t t) : src(std::move(s)), tree(std::move(t)) {}
+    parsed_source_t(wcstring s, ast::ast_t &&ast);
+    ~parsed_source_t();

    parsed_source_t(const parsed_source_t &) = delete;
    void operator=(const parsed_source_t &) = delete;
-    parsed_source_t(parsed_source_t &&) = default;
-    parsed_source_t &operator=(parsed_source_t &&) = default;
+    parsed_source_t(parsed_source_t &&) = delete;
+    parsed_source_t &operator=(parsed_source_t &&) = delete;
 };
+
 /// Return a shared pointer to parsed_source_t, or null on failure.
+/// If parse_flag_continue_after_error is not set, this will return null on any error.
 using parsed_source_ref_t = std::shared_ptr<const parsed_source_t>;
-parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags, parse_error_list_t *errors,
-                                 parse_token_type_t goal = symbol_job_list);
+parsed_source_ref_t parse_source(wcstring src, parse_tree_flags_t flags,
+                                 parse_error_list_t *errors);

 /// Error message for improper use of the exec builtin.
 #define EXEC_ERR_MSG _(L"The '%ls' command can not be used in a pipeline")
--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@ -14,6 +14,7 @@
 #include <string>
 #include <type_traits>

+#include "ast.h"
 #include "builtin.h"
 #include "common.h"
 #include "expand.h"
@ -22,7 +23,6 @@
 #include "parse_constants.h"
 #include "parse_util.h"
 #include "parser.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "wcstringutil.h"
 #include "wildcard.h"
@ -565,121 +565,16 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote,
    return result;
 }

-/// We are given a parse tree, the index of a node within the tree, its indent, and a vector of
-/// indents the same size as the original source string. Set the indent correspdonding to the node's
-/// source range, if appropriate.
-///
-/// trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false <ret>'
-/// then we have an if node with an empty job list (without source) but we want the last line to be
-/// indented anyways.
-///
-/// switch statements also indent.
-///
-/// max_visited_node_idx is the largest index we visited.
-static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx,
-                                      int node_indent, parse_token_type_t parent_type,
-                                      std::vector<int> *indents, int *trailing_indent,
-                                      node_offset_t *max_visited_node_idx) {
-    // Guard against incomplete trees.
-    if (node_idx > tree.size()) return;
-
-    // Update max_visited_node_idx.
-    if (node_idx > *max_visited_node_idx) *max_visited_node_idx = node_idx;
-
-    // We could implement this by utilizing the fish grammar. But there's an easy trick instead:
-    // almost everything that wraps a job list should be indented by 1. So just find all of the job
-    // lists. One exception is switch, which wraps a case_item_list instead of a job_list. The other
-    // exception is job_list itself: a job_list is a job and a job_list, and we want that child list
-    // to be indented the same as the parent. So just find all job_lists whose parent is not a
-    // job_list, and increment their indent by 1. We also want to treat andor_job_list like
-    // job_lists.
-    const parse_node_t &node = tree.at(node_idx);
-    const parse_token_type_t node_type = node.type;
-
-    // Increment the indent if we are either a root job_list, or root case_item_list.
-    const bool is_root_job_list = node_type != parent_type && (node_type == symbol_job_list ||
-                                                               node_type == symbol_andor_job_list);
-    const bool is_root_case_item_list =
-        node_type == symbol_case_item_list && parent_type != symbol_case_item_list;
-    if (is_root_job_list || is_root_case_item_list) {
-        node_indent += 1;
-    }
-
-    // If we have source, store the trailing indent unconditionally. If we do not have source, store
-    // the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job
-    // lists from affecting the trailing indent. For example, code like this:
-    //
-    //   if foo
-    //
-    // will be parsed as this:
-    //
-    //   job_list
-    //     job
-    //        if_statement
-    //          job [if]
-    //          job_list [empty]
-    //     job_list [empty]
-    //
-    // There's two "terminal" job lists, and we want the innermost one.
-    //
-    // Note we are relying on the fact that nodes are in the same order as the source, i.e. an
-    // in-order traversal of the node tree also traverses the source from beginning to end.
-    if (node.has_source() || node_indent > *trailing_indent) {
-        *trailing_indent = node_indent;
-    }
-
-    // Store the indent into the indent array.
-    if (node.source_start != SOURCE_OFFSET_INVALID && node.source_start < indents->size()) {
-        if (node.has_source()) {
-            // A normal non-empty node. Store the indent unconditionally.
-            indents->at(node.source_start) = node_indent;
-        } else {
-            // An empty node. We have a source offset but no source length. This can come about when
-            // a node is legitimately empty:
-            //
-            //   while true; end
-            //
-            // The job_list inside the while loop is empty. It still has a source offset (at the end
-            // of the while statement) but no source extent. We still need to capture that indent,
-            // because there may be comments inside:
-            //
-            //      while true
-            //         # loop forever
-            //      end
-            //
-            // The 'loop forever' comment must be indented, by virtue of storing the indent.
-            //
-            // Now consider what happens if we remove the end:
-            //
-            //     while true
-            //       # loop forever
-            //
-            // Now both the job_list and end_command are unmaterialized. However, we want the indent
-            // to be of the job_list and not the end_command.  Therefore, we only store the indent
-            // if it's bigger.
-            if (node_indent > indents->at(node.source_start)) {
-                indents->at(node.source_start) = node_indent;
-            }
-        }
-    }
-
-    // Recursive to all our children.
-    for (node_offset_t idx = 0; idx < node.child_count; idx++) {
-        // Note we pass our type to our child, which becomes its parent node type.
-        compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents,
-                                  trailing_indent, max_visited_node_idx);
-    }
-}
-
 std::vector<int> parse_util_compute_indents(const wcstring &src) {
    // Make a vector the same size as the input string, which contains the indents. Initialize them
-    // to -1.
+    // to 0.
+    static wcstring ssss;
+    ssss = src;
    const size_t src_size = src.size();
-    std::vector<int> indents(src_size, -1);
+    std::vector<int> indents(src_size, 0);

    // Simple trick: if our source does not contain a newline, then all indents are 0.
    if (src.find('\n') == wcstring::npos) {
-        std::fill(indents.begin(), indents.end(), 0);
        return indents;
    }

@ -687,78 +582,141 @@ std::vector<int> parse_util_compute_indents(const wcstring &src) {
    // the last node we visited becomes the input indent of the next. I.e. in the case of 'switch
    // foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it
    // were a case item list.
-    parse_node_tree_t tree;
-    parse_tree_from_string(src,
-                           parse_flag_continue_after_error | parse_flag_include_comments |
-                               parse_flag_accept_incomplete_tokens,
-                           &tree, nullptr /* errors */);
+    using namespace ast;
+    auto ast =
+        ast_t::parse(src, parse_flag_continue_after_error | parse_flag_include_comments |
+                              parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated);

-    // Start indenting at the first node. If we have a parse error, we'll have to start indenting
-    // from the top again.
-    node_offset_t start_node_idx = 0;
-    int last_trailing_indent = 0;
+    // Visit all of our nodes. When we get a job_list or case_item_list, increment indent while
+    // visiting its children.
+    struct indent_visitor_t {
+        explicit indent_visitor_t(std::vector<int> &indents) : indents(indents) {}

-    while (start_node_idx < tree.size()) {
-        // The indent that we'll get for the last line.
-        int trailing_indent = 0;
+        void visit(const node_t &node) {
+            int inc = 0;
+            int dec = 0;
+            switch (node.type) {
+                case type_t::job_list:
+                case type_t::andor_job_list:
+                    // Job lists are never unwound.
+                    inc = 1;
+                    dec = 1;
+                    break;

-        // Biggest offset we visited.
-        node_offset_t max_visited_node_idx = 0;
+                // Increment indents for conditions in headers (#1665).
+                case type_t::job_conjunction:
+                    if (node.parent->type == type_t::while_header ||
+                        node.parent->type == type_t::if_clause) {
+                        inc = 1;
+                        dec = 1;
+                    }
+                    break;

-        // Invoke the recursive version. As a hack, pass job_list for the 'parent' token type, which
-        // will prevent the really-root job list from indenting.
-        compute_indents_recursive(tree, start_node_idx, last_trailing_indent, symbol_job_list,
-                                  &indents, &trailing_indent, &max_visited_node_idx);
+                // Increment indents for piped remainders.
+                case type_t::job_continuation_list:
+                    if (node.as<job_continuation_list_t>()->count() > 0) {
+                        inc = 1;
+                        dec = 1;
+                    }
+                    break;

-        // We may have more to indent. The trailing indent becomes our current indent. Start at the
-        // node after the last we visited.
-        last_trailing_indent = trailing_indent;
-        start_node_idx = max_visited_node_idx + 1;
-    }
+                case type_t::case_item_list:
+                    // Here's a hack. Consider:
+                    // switch abc
+                    //    cas
+                    //
+                    // fish will see that 'cas' is not valid inside a switch statement because it is
+                    // not "case". It will then unwind back to the top level job list, producing a
+                    // parse tree like:
+                    //
+                    //   job_list
+                    //      switch_job
+                    //         <err>
+                    //      normal_job
+                    //         cas
+                    //
+                    // And so we will think that the 'cas' job is at the same level as the switch.
+                    // To address this, if we see that the switch statement was not closed, do not
+                    // decrement the indent afterwards.
+                    inc = 1;
+                    dec = node.parent->as<switch_statement_t>()->end.unsourced ? 0 : 1;
+                    break;

-    // Handle comments. Each comment node has a parent (which is whatever the top of the symbol
-    // stack was when the comment was encountered). So the source range of the comment has the same
-    // indent as its parent.
-    const size_t tree_size = tree.size();
-    for (node_offset_t i = 0; i < tree_size; i++) {
-        const parse_node_t &node = tree.at(i);
-        if (node.type == parse_special_type_comment && node.has_source() &&
-            node.parent < tree_size) {
-            const parse_node_t &parent = tree.at(node.parent);
-            if (parent.source_start != SOURCE_OFFSET_INVALID) {
-                indents.at(node.source_start) = indents.at(parent.source_start);
+                default:
+                    break;
            }
-        }
-    }
+            indent += inc;

-    // Now apply the indents. The indents array has -1 for places where the indent does not change,
-    // so start at each value and extend it along the run of -1s.
-    int last_indent = 0;
-    for (size_t i = 0; i < src_size; i++) {
-        int this_indent = indents.at(i);
-        if (this_indent < 0) {
-            indents.at(i) = last_indent;
+            // If we increased the indentation, apply it to the remainder of the string, even if the
+            // list is empty. For example (where _ represents the cursor):
+            //
+            //    if foo
+            //       _
+            //
+            // we want to indent the newline.
+            if (inc) {
+                std::fill(indents.begin() + last_leaf_end, indents.end(), indent);
+                last_indent = indent;
+            }
+
+            // If this is a leaf node, apply the current indentation.
+            if (node.category == category_t::leaf) {
+                auto range = node.source_range();
+                if (range.length > 0) {
+                    // Fill to the end.
+                    // Later nodes will come along and overwrite these.
+                    std::fill(indents.begin() + range.start, indents.end(), indent);
+                    last_leaf_end = range.start + range.length;
+                    last_indent = indent;
+                }
+            }
+
+
+            node_visitor(*this).accept_children_of(&node);
+            indent -= dec;
+        }
+
+        // The one-past-the-last index of the most recently encountered leaf node.
+        // We use this to populate the indents even if there's no tokens in the range.
+        size_t last_leaf_end{0};
+
+        // The last indent which we assigned.
+        int last_indent{-1};
+
+        // List of indents, which we populate.
+        std::vector<int> &indents;
+
+        // Initialize our starting indent to -1, as our top-level node is a job list which
+        // will immediately increment it.
+        int indent{-1};
+    };
+
+    indent_visitor_t iv(indents);
+    node_visitor(iv).accept(ast.top());
+
+    // All newlines now get the *next* indent.
+    // For example, in this code:
+    //    if true
+    //       stuff
+    // the newline "belongs" to the if statement as it ends its job.
+    // But when rendered, it visually belongs to the job list.
+
+    // FIXME: if there's a middle newline, we will indent it wrongly.
+    // For example:
+    //    if true
+    //
+    //    end
+    // Here the middle newline should be indented by 1.
+
+    size_t idx = src_size;
+    int next_indent = iv.last_indent;
+    while (idx--) {
+        if (src.at(idx) == L'\n') {
+            indents.at(idx) = next_indent;
        } else {
-            // New indent level.
-            last_indent = this_indent;
-            // Make all whitespace before a token have the new level. This avoid using the wrong
-            // indentation level if a new line starts with whitespace.
-            size_t prev_char_idx = i;
-            while (prev_char_idx--) {
-                if (!std::wcschr(L" \n\t\r", src.at(prev_char_idx))) break;
-                indents.at(prev_char_idx) = last_indent;
-            }
+            next_indent = indents.at(idx);
        }
    }
-
-    // Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly
-    // indented even if it is empty.
-    size_t suffix_idx = src_size;
-    while (suffix_idx--) {
-        if (!std::wcschr(L" \n\t\r", src.at(suffix_idx))) break;
-        indents.at(suffix_idx) = last_trailing_indent;
-    }
-
    return indents;
 }

@ -790,17 +748,13 @@ bool parse_util_argument_is_help(const wchar_t *s) {
    return std::wcscmp(L"-h", s) == 0 || std::wcscmp(L"--help", s) == 0;
 }

-/// Check if the first argument under the given node is --help.
-static bool first_argument_is_help(tnode_t<grammar::plain_statement> statement,
-                                   const wcstring &src) {
-    bool is_help = false;
-    auto arg_nodes = get_argument_nodes(statement.child<1>());
-    if (!arg_nodes.empty()) {
-        // Check the first argument only.
-        wcstring first_arg_src = arg_nodes.front().get_source(src);
-        is_help = parse_util_argument_is_help(first_arg_src.c_str());
+// \return a pointer to the first argument node of an argument_or_redirection_list_t, or nullptr if
+// there are no arguments.
+const ast::argument_t *get_first_arg(const ast::argument_or_redirection_list_t &list) {
+    for (const ast::argument_or_redirection_t &v : list) {
+        if (v.is_argument()) return &v.argument();
    }
-    return is_help;
+    return nullptr;
 }

 /// Given a wide character immediately after a dollar sign, return the appropriate error message.
@ -956,11 +910,13 @@ static parser_test_error_bits_t detect_dollar_cmdsub_errors(size_t arg_src_offse
 /// Test if this argument contains any errors. Detected errors include syntax errors in command
 /// substitutions, improperly escaped characters and improper use of the variable expansion
 /// operator.
-parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::argument> node,
+parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argument_t &arg,
                                                              const wcstring &arg_src,
                                                              parse_error_list_t *out_errors) {
-    assert(node.has_source() && "argument has no source");
-    auto source_start = node.source_range()->start;
+    maybe_t<source_range_t> source_range = arg.try_source_range();
+    if (!source_range.has_value()) return 0;
+
+    size_t source_start = source_range->start;
    int err = 0;
    wchar_t *paran_begin, *paran_end;
    int do_loop = 1;
@ -1054,10 +1010,10 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(tnode_t<grammar::a
 }

 /// Given that the job given by node should be backgrounded, return true if we detect any errors.
-static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
+static bool detect_errors_in_backgrounded_job(const ast::job_t &job,
                                              parse_error_list_t *parse_errors) {
-    namespace g = grammar;
-    auto source_range = job.source_range();
+    using namespace ast;
+    auto source_range = job.try_source_range();
    if (!source_range) return false;

    bool errored = false;
@ -1066,54 +1022,77 @@ static bool detect_errors_in_backgrounded_job(tnode_t<grammar::job> job,
    // foo & ; or bar
    // if foo & ; end
    // while foo & ; end
-    auto job_conj = job.try_get_parent<g::job_conjunction>();
-    if (job_conj.try_get_parent<g::if_clause>()) {
-        errored = append_syntax_error(parse_errors, source_range->start,
-                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
-    } else if (job_conj.try_get_parent<g::while_header>()) {
-        errored = append_syntax_error(parse_errors, source_range->start,
-                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
-    } else if (auto jlist = job_conj.try_get_parent<g::job_list>()) {
-        // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
-        // Fetch the job list and then advance it by one.
-        auto first_jconj = jlist.next_in_list<g::job_conjunction>();
-        assert(first_jconj == job.try_get_parent<g::job_conjunction>() &&
-               "Expected first job to be the node we found");
-        (void)first_jconj;
+    const job_conjunction_t *job_conj = job.parent->try_as<job_conjunction_t>();
+    if (!job_conj) return false;

-        // Try getting the next job's decorator.
-        if (auto next_job_dec = jlist.next_in_list<g::job_decorator>()) {
-            // The next job is indeed a boolean statement.
-            parse_job_decoration_t bool_type = bool_statement_type(next_job_dec);
-            if (bool_type == parse_job_decoration_and) {
-                errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
-                                              BOOL_AFTER_BACKGROUND_ERROR_MSG, L"and");
-            } else if (bool_type == parse_job_decoration_or) {
-                errored = append_syntax_error(parse_errors, next_job_dec.source_range()->start,
-                                              BOOL_AFTER_BACKGROUND_ERROR_MSG, L"or");
+    if (job_conj->parent->try_as<if_clause_t>()) {
+        errored = append_syntax_error(parse_errors, source_range->start,
+                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
+    } else if (job_conj->parent->try_as<while_header_t>()) {
+        errored = append_syntax_error(parse_errors, source_range->start,
+                                      BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
+    } else if (const ast::job_list_t *jlist = job_conj->parent->try_as<ast::job_list_t>()) {
+        // This isn't very complete, e.g. we don't catch 'foo & ; not and bar'.
+        // Find the index of ourselves in the job list.
+        size_t index;
+        for (index = 0; index < jlist->count(); index++) {
+            if (jlist->at(index) == job_conj) break;
+        }
+        assert(index < jlist->count() && "Should have found the job in the list");
+
+        // Try getting the next job and check its decorator.
+        if (const job_conjunction_t *next = jlist->at(index + 1)) {
+            if (const keyword_base_t *deco = next->decorator.contents.get()) {
+                assert(
+                    (deco->kw == parse_keyword_t::kw_and || deco->kw == parse_keyword_t::kw_or) &&
+                    "Unexpected decorator keyword");
+                const wchar_t *deco_name = (deco->kw == parse_keyword_t::kw_and ? L"and" : L"or");
+                errored = append_syntax_error(parse_errors, deco->source_range().start,
+                                              BOOL_AFTER_BACKGROUND_ERROR_MSG, deco_name);
            }
        }
    }
    return errored;
 }

-static bool detect_errors_in_plain_statement(const wcstring &buff_src,
-                                             const parse_node_tree_t &node_tree,
-                                             tnode_t<grammar::plain_statement> pst,
-                                             parse_error_list_t *parse_errors) {
-    using namespace grammar;
+static bool detect_errors_in_decorated_statement(const wcstring &buff_src,
+                                                 const ast::decorated_statement_t &dst,
+                                                 parse_error_list_t *parse_errors) {
+    using namespace ast;
    bool errored = false;
-    auto source_start = pst.source_range()->start;
+    auto source_start = dst.source_range().start;
+    const parse_statement_decoration_t decoration = dst.decoration();

-    // In a few places below, we want to know if we are in a pipeline.
-    tnode_t<statement> st = pst.try_get_parent<decorated_statement>().try_get_parent<statement>();
-    pipeline_position_t pipe_pos = get_pipeline_position(st);
-    bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
+    // Determine if the first argument is help.
+    bool first_arg_is_help = false;
+    if (const auto *arg = get_first_arg(dst.args_or_redirs)) {
+        wcstring arg_src = arg->source(buff_src);
+        first_arg_is_help = parse_util_argument_is_help(arg_src.c_str());
+    }

-    // We need to know the decoration.
-    const enum parse_statement_decoration_t decoration = get_decoration(pst);
+    // Get the statement we are part of.
+    const statement_t *st = dst.parent->as<statement_t>();
+
+    // Walk up to the job.
+    const ast::job_t *job = nullptr;
+    for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) {
+        assert(cursor && "Reached root without finding a job");
+        job = cursor->try_as<ast::job_t>();
+    }
+    assert(job && "Should have found the job");
+
+    // Check our pipeline position.
+    pipeline_position_t pipe_pos;
+    if (job->continuation.empty()) {
+        pipe_pos = pipeline_position_t::none;
+    } else if (&job->statement == st) {
+        pipe_pos = pipeline_position_t::first;
+    } else {
+        pipe_pos = pipeline_position_t::subsequent;
+    }

    // Check that we don't try to pipe through exec.
+    bool is_in_pipeline = (pipe_pos != pipeline_position_t::none);
    if (is_in_pipeline && decoration == parse_statement_decoration_exec) {
        errored = append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, L"exec");
    }
@ -1124,14 +1103,14 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
    if (pipe_pos == pipeline_position_t::subsequent) {
        // check if our command is 'and' or 'or'. This is very clumsy; we don't catch e.g. quoted
        // commands.
-        wcstring command = pst.child<0>().get_source(buff_src);
+        wcstring command = dst.command.source(buff_src);
        if (command == L"and" || command == L"or") {
            errored =
                append_syntax_error(parse_errors, source_start, EXEC_ERR_MSG, command.c_str());
        }
    }

-    if (maybe_t<wcstring> unexp_command = command_for_plain_statement(pst, buff_src)) {
+    if (maybe_t<wcstring> unexp_command = dst.command.try_source(buff_src)) {
        wcstring command;
        // Check that we can expand the command.
        if (expand_to_command_and_args(*unexp_command, operation_context_t::empty(), &command,
@ -1148,40 +1127,40 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,

        // Check that we don't return from outside a function. But we allow it if it's
        // 'return --help'.
-        if (!errored && command == L"return") {
+        if (!errored && command == L"return" && !first_arg_is_help) {
+            // See if we are in a function.
            bool found_function = false;
-            for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
-                 ancestor = node_tree.get_parent(*ancestor)) {
-                auto fh = tnode_t<block_statement>::try_create(&node_tree, ancestor)
-                              .child<0>()
-                              .try_get_child<function_header, 0>();
-                if (fh) {
-                    found_function = true;
-                    break;
+            for (const node_t *cursor = &dst; cursor != nullptr; cursor = cursor->parent) {
+                if (const auto *bs = cursor->try_as<block_statement_t>()) {
+                    if (bs->header->type == type_t::function_header) {
+                        found_function = true;
+                        break;
+                    }
                }
            }
-            if (!found_function && !first_argument_is_help(pst, buff_src)) {
+
+            if (!found_function) {
                errored = append_syntax_error(parse_errors, source_start, INVALID_RETURN_ERR_MSG);
            }
        }

        // Check that we don't break or continue from outside a loop.
-        if (!errored && (command == L"break" || command == L"continue")) {
+        if (!errored && (command == L"break" || command == L"continue") && !first_arg_is_help) {
            // Walk up until we hit a 'for' or 'while' loop. If we hit a function first,
            // stop the search; we can't break an outer loop from inside a function.
            // This is a little funny because we can't tell if it's a 'for' or 'while'
            // loop from the ancestor alone; we need the header. That is, we hit a
            // block_statement, and have to check its header.
            bool found_loop = false;
-            for (const parse_node_t *ancestor = pst.node(); ancestor != nullptr;
-                 ancestor = node_tree.get_parent(*ancestor)) {
-                tnode_t<block_header> bh =
-                    tnode_t<block_statement>::try_create(&node_tree, ancestor).child<0>();
-                if (bh.try_get_child<while_header, 0>() || bh.try_get_child<for_header, 0>()) {
+            for (const node_t *ancestor = &dst; ancestor != nullptr; ancestor = ancestor->parent) {
+                const auto *block = ancestor->try_as<block_statement_t>();
+                if (!block) continue;
+                if (block->header->type == type_t::for_header ||
+                    block->header->type == type_t::while_header) {
                    // This is a loop header, so we can break or continue.
                    found_loop = true;
                    break;
-                } else if (bh.try_get_child<function_header, 0>()) {
+                } else if (block->header->type == type_t::function_header) {
                    // This is a function header, so we cannot break or
                    // continue. We stop our search here.
                    found_loop = false;
@ -1189,7 +1168,7 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
                }
            }

-            if (!found_loop && !first_argument_is_help(pst, buff_src)) {
+            if (!found_loop) {
                errored = append_syntax_error(
                    parse_errors, source_start,
                    (command == L"break" ? INVALID_BREAK_ERR_MSG : INVALID_CONTINUE_ERR_MSG));
@ -1208,12 +1187,21 @@ static bool detect_errors_in_plain_statement(const wcstring &buff_src,
    return errored;
 }

+// Given we have a trailing argument_or_redirection_list, like `begin; end > /dev/null`, verify that
+// there are no arguments in the list.
+static bool detect_errors_in_block_redirection_list(
+    const ast::argument_or_redirection_list_t &args_or_redirs, parse_error_list_t *out_errors) {
+    if (const auto *first_arg = get_first_arg(args_or_redirs)) {
+        return append_syntax_error(out_errors, first_arg->source_range().start,
+                                   BACKGROUND_IN_CONDITIONAL_ERROR_MSG);
+    }
+    return false;
+}
+
 parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
                                                  parse_error_list_t *out_errors,
                                                  bool allow_incomplete,
                                                  parsed_source_ref_t *out_pstree) {
-    namespace g = grammar;
-    parse_node_tree_t node_tree;
    parse_error_list_t parse_errors;

    parser_test_error_bits_t res = 0;
@ -1233,12 +1221,15 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
    // allow_incomplete is set.
    bool has_unclosed_quote_or_subshell = false;

-    // Parse the input string into a parse tree. Some errors are detected here.
-    bool parsed = parse_tree_from_string(
-        buff_src, allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none, &node_tree,
-        &parse_errors);
+    const parse_tree_flags_t parse_flags =
+        allow_incomplete ? parse_flag_leave_unterminated : parse_flag_none;

+    // Parse the input string into an ast. Some errors are detected here.
+    using namespace ast;
+    auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors);
    if (allow_incomplete) {
+        // Issue #1238: If the only error was unterminated quote, then consider this to have parsed
+        // successfully.
        size_t idx = parse_errors.size();
        while (idx--) {
            if (parse_errors.at(idx).code == parse_error_tokenizer_unterminated_quote ||
@ -1250,19 +1241,14 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
        }
    }

-    // Issue #1238: If the only error was unterminated quote, then consider this to have parsed
-    // successfully. A better fix would be to have parse_tree_from_string return this information
-    // directly (but it would be a shame to munge up its nice bool return).
-    if (parse_errors.empty() && has_unclosed_quote_or_subshell) {
-        parsed = true;
-    }
-
-    if (!parsed) {
-        errored = true;
-    }
-
    // has_unclosed_quote_or_subshell may only be set if allow_incomplete is true.
    assert(!has_unclosed_quote_or_subshell || allow_incomplete);
+    if (has_unclosed_quote_or_subshell) {
+        // We do not bother to validate the rest of the tree in this case.
+        return PARSER_TEST_INCOMPLETE;
+    }
+
+    errored = !parse_errors.empty();

    // Expand all commands.
    // Verify 'or' and 'and' not used inside pipelines.
@ -1271,21 +1257,17 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
    // Verify no variable expansions.

    if (!errored) {
-        for (const parse_node_t &node : node_tree) {
-            if (node.type == symbol_end_command && !node.has_source()) {
-                // An 'end' without source is an unclosed block.
-                has_unclosed_block = true;
-            } else if (node.type == symbol_statement && !node.has_source()) {
-                // Check for a statement without source in a pipeline, i.e. unterminated pipeline.
-                auto pipe_pos = get_pipeline_position({&node_tree, &node});
-                if (pipe_pos != pipeline_position_t::none) {
+        for (const node_t &node : ast) {
+            if (const job_continuation_t *jc = node.try_as<job_continuation_t>()) {
+                // Somewhat clumsy way of checking for a statement without source in a pipeline.
+                // See if our pipe has source but our statement does not.
+                if (!jc->pipe.unsourced && !jc->statement.try_source_range().has_value()) {
                    has_unclosed_pipe = true;
                }
-            } else if (node.type == symbol_argument) {
-                tnode_t<g::argument> arg{&node_tree, &node};
-                const wcstring arg_src = node.get_source(buff_src);
-                res |= parse_util_detect_errors_in_argument(arg, arg_src, &parse_errors);
-            } else if (node.type == symbol_job) {
+            } else if (const argument_t *arg = node.try_as<argument_t>()) {
+                wcstring arg_src = arg->source(buff_src);
+                res |= parse_util_detect_errors_in_argument(*arg, arg_src, &parse_errors);
+            } else if (const ast::job_t *job = node.try_as<ast::job_t>()) {
                // Disallow background in the following cases:
                //
                // foo & ; and bar
@ -1293,25 +1275,27 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
                // if foo & ; end
                // while foo & ; end
                // If it's not a background job, nothing to do.
-                auto job = tnode_t<g::job>{&node_tree, &node};
-                if (job_node_is_background(job)) {
-                    errored |= detect_errors_in_backgrounded_job(job, &parse_errors);
+                if (job->bg) {
+                    errored |= detect_errors_in_backgrounded_job(*job, &parse_errors);
                }
-            } else if (node.type == symbol_arguments_or_redirections_list) {
-                // verify no arguments to the end command of if, switch, begin (#986).
-                auto list = tnode_t<g::arguments_or_redirections_list>{&node_tree, &node};
-                if (list.try_get_parent<g::if_statement>() ||
-                    list.try_get_parent<g::switch_statement>() ||
-                    list.try_get_parent<g::block_statement>()) {
-                    if (auto arg = list.next_in_list<g::argument>()) {
-                        errored = append_syntax_error(&parse_errors, arg.source_range()->start,
-                                                      END_ARG_ERR_MSG);
-                    }
-                }
-            } else if (node.type == symbol_plain_statement) {
-                tnode_t<grammar::plain_statement> pst{&node_tree, &node};
+            } else if (const ast::decorated_statement_t *stmt =
+                           node.try_as<decorated_statement_t>()) {
+                errored |= detect_errors_in_decorated_statement(buff_src, *stmt, &parse_errors);
+            } else if (const auto *block = node.try_as<block_statement_t>()) {
+                // If our 'end' had no source, we are unsourced.
+                if (block->end.unsourced) has_unclosed_block = true;
                errored |=
-                    detect_errors_in_plain_statement(buff_src, node_tree, pst, &parse_errors);
+                    detect_errors_in_block_redirection_list(block->args_or_redirs, &parse_errors);
+            } else if (const auto *ifs = node.try_as<if_statement_t>()) {
+                // If our 'end' had no source, we are unsourced.
+                if (ifs->end.unsourced) has_unclosed_block = true;
+                errored |=
+                    detect_errors_in_block_redirection_list(ifs->args_or_redirs, &parse_errors);
+            } else if (const auto *switchs = node.try_as<switch_statement_t>()) {
+                // If our 'end' had no source, we are unsourced.
+                if (switchs->end.unsourced) has_unclosed_block = true;
+                errored |=
+                    detect_errors_in_block_redirection_list(switchs->args_or_redirs, &parse_errors);
            }
        }
    }
@ -1325,8 +1309,9 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
        *out_errors = std::move(parse_errors);
    }

+    // \return the ast to our caller if requested.
    if (out_pstree != nullptr) {
-        *out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(node_tree));
+        *out_pstree = std::make_shared<parsed_source_t>(buff_src, std::move(ast));
    }

    return res;
@ -1341,25 +1326,21 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
                                                 false /* don't skip caret */);
    };

-    // Parse the string as an argument list.
+    // Parse the string as a freestanding argument list.
+    using namespace ast;
    parse_error_list_t errors;
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, &errors,
-                                symbol_freestanding_argument_list)) {
-        // Failed to parse.
+    auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors);
+    if (!errors.empty()) {
        return get_error_text(errors);
    }

    // Get the root argument list and extract arguments from it.
    // Test each of these.
-    assert(!tree.empty() && "Should have parsed a tree");
-    tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
-    while (auto arg = arg_list.next_in_list<grammar::argument>()) {
-        const wcstring arg_src = arg.get_source(arg_list_src);
+    for (const argument_t &arg : ast.top()->as<freestanding_argument_list_t>()->arguments) {
+        const wcstring arg_src = arg.source(arg_list_src);
        if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) {
            return get_error_text(errors);
        }
    }
-
    return none();
 }
--- a/src/parse_util.h
+++ b/src/parse_util.h
@ -10,6 +10,10 @@
 #include "parse_tree.h"
 #include "tokenizer.h"

+namespace ast {
+struct argument_t;
+}
+
 /// Find the beginning and end of the first subshell in the specified string.
 ///
 /// \param in the string to search for subshells
@ -127,7 +131,6 @@ std::vector<int> parse_util_compute_indents(const wcstring &src);
 /// incomplete (e.g. an unclosed quote), an error is not returned and the PARSER_TEST_INCOMPLETE bit
 /// is set in the return value. If allow_incomplete is not set, then incomplete strings result in an
 /// error. If out_pstree is not NULL, the resulting tree is returned by reference.
-class parse_node_tree_t;
 parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src,
                                                  parse_error_list_t *out_errors = nullptr,
                                                  bool allow_incomplete = true,
@ -141,10 +144,9 @@ maybe_t<wcstring> parse_util_detect_errors_in_argument_list(const wcstring &arg_
 /// Test if this argument contains any errors. Detected errors include syntax errors in command
 /// substitutions, improperly escaped characters and improper use of the variable expansion
 /// operator. This does NOT currently detect unterminated quotes.
-class parse_node_t;
+
 parser_test_error_bits_t parse_util_detect_errors_in_argument(
-    tnode_t<grammar::argument> node, const wcstring &arg_src,
-    parse_error_list_t *out_errors = nullptr);
+    const ast::argument_t &arg, const wcstring &arg_src, parse_error_list_t *out_errors = nullptr);

 /// Given a string containing a variable expansion error, append an appropriate error to the errors
 /// list. The global_token_pos is the offset of the token in the larger source, and the dollar_pos
--- a/src/parser.cpp
+++ b/src/parser.cpp
@ -11,6 +11,7 @@
 #include <memory>
 #include <utility>

+#include "ast.h"
 #include "common.h"
 #include "env.h"
 #include "event.h"
@ -25,7 +26,6 @@
 #include "proc.h"
 #include "reader.h"
 #include "sanity.h"
-#include "tnode.h"
 #include "wutil.h"  // IWYU pragma: keep

 class io_chain_t;
@ -328,19 +328,18 @@ completion_list_t parser_t::expand_argument_list(const wcstring &arg_list_src,
                                                 expand_flags_t eflags,
                                                 const operation_context_t &ctx) {
    // Parse the string as an argument list.
-    parse_node_tree_t tree;
-    if (!parse_tree_from_string(arg_list_src, parse_flag_none, &tree, nullptr /* errors */,
-                                symbol_freestanding_argument_list)) {
+    auto ast = ast::ast_t::parse_argument_list(arg_list_src);
+    if (ast.errored()) {
        // Failed to parse. Here we expect to have reported any errors in test_args.
        return {};
    }

    // Get the root argument list and extract arguments from it.
    completion_list_t result;
-    assert(!tree.empty());
-    tnode_t<grammar::freestanding_argument_list> arg_list(&tree, &tree.at(0));
-    while (auto arg = arg_list.next_in_list<grammar::argument>()) {
-        const wcstring arg_src = arg.get_source(arg_list_src);
+    const ast::freestanding_argument_list_t *list =
+        ast.top()->as<ast::freestanding_argument_list_t>();
+    for (const ast::argument_t &arg : list->arguments) {
+        wcstring arg_src = arg.source(arg_list_src);
        if (expand_string(arg_src, &result, eflags, ctx) == expand_result_t::error) {
            break;  // failed to expand a string
        }
@ -656,10 +655,10 @@ eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io,
 eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
                          const job_group_ref_t &job_group, enum block_type_t block_type) {
    assert(block_type == block_type_t::top || block_type == block_type_t::subst);
-    if (!ps->tree.empty()) {
-        // Execute the first node.
-        tnode_t<grammar::job_list> start{&ps->tree, &ps->tree.front()};
-        return this->eval_node(ps, start, io, job_group, block_type);
+    const auto *job_list = ps->ast->top()->as<ast::job_list_t>();
+    if (!job_list->empty()) {
+        // Execute the top job list.
+        return this->eval_node(ps, *job_list, io, job_group, block_type);
    } else {
        auto status = proc_status_t::from_exit_code(get_last_status());
        bool break_expand = false;
@ -669,11 +668,11 @@ eval_res_t parser_t::eval(const parsed_source_ref_t &ps, const io_chain_t &io,
 }

 template <typename T>
-eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
+eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, const T &node,
                               const io_chain_t &block_io, const job_group_ref_t &job_group,
                               block_type_t block_type) {
    static_assert(
-        std::is_same<T, grammar::statement>::value || std::is_same<T, grammar::job_list>::value,
+        std::is_same<T, ast::statement_t>::value || std::is_same<T, ast::job_list_t>::value,
        "Unexpected node type");
    // Handle cancellation requests. If our block stack is currently empty, then we already did
    // successfully cancel (or there was nothing to cancel); clear the flag. If our block stack is
@ -725,9 +724,9 @@ eval_res_t parser_t::eval_node(const parsed_source_ref_t &ps, tnode_t<T> node,
 }

 // Explicit instantiations. TODO: use overloads instead?
-template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::statement>,
+template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::statement_t &,
                                        const io_chain_t &, const job_group_ref_t &, block_type_t);
-template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, tnode_t<grammar::job_list>,
+template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast::job_list_t &,
                                        const io_chain_t &, const job_group_ref_t &, block_type_t);

 void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors,
--- a/src/parser.h
+++ b/src/parser.h
@ -300,9 +300,9 @@ class parser_t : public std::enable_shared_from_this<parser_t> {
                    block_type_t block_type = block_type_t::top);

    /// Evaluates a node.
-    /// The node type must be grammar::statement or grammar::job_list.
+    /// The node type must be ast_t::statement_t or ast::job_list_t.
    template <typename T>
-    eval_res_t eval_node(const parsed_source_ref_t &ps, tnode_t<T> node, const io_chain_t &block_io,
+    eval_res_t eval_node(const parsed_source_ref_t &ps, const T &node, const io_chain_t &block_io,
                         const job_group_ref_t &job_group,
                         block_type_t block_type = block_type_t::top);

--- a/src/proc.h
+++ b/src/proc.h
@ -21,7 +21,6 @@
 #include "global_safety.h"
 #include "io.h"
 #include "parse_tree.h"
-#include "tnode.h"
 #include "topic_monitor.h"

 /// Types of processes.
@ -44,6 +43,10 @@ enum class job_control_t {
    none,
 };

+namespace ast {
+struct statement_t;
+}
+
 /// A proc_status_t is a value type that encapsulates logic around exited vs stopped vs signaled,
 /// etc.
 class proc_status_t {
@ -261,10 +264,10 @@ class process_t {
    /// Type of process.
    process_type_t type{process_type_t::external};

-    /// For internal block processes only, the node offset of the statement.
+    /// For internal block processes only, the node of the statement.
    /// This is always either block, ifs, or switchs, never boolean or decorated.
    parsed_source_ref_t block_node_source{};
-    tnode_t<grammar::statement> internal_block_node{};
+    const ast::statement_t *internal_block_node{};

    struct concrete_assignment {
        wcstring variable_name;
--- a/src/reader.cpp
+++ b/src/reader.cpp
@ -44,6 +44,7 @@
 #include <set>
 #include <stack>

+#include "ast.h"
 #include "color.h"
 #include "common.h"
 #include "complete.h"
@ -74,7 +75,6 @@
 #include "screen.h"
 #include "signal.h"
 #include "termsize.h"
-#include "tnode.h"
 #include "tokenizer.h"
 #include "wutil.h"  // IWYU pragma: keep

@ -935,33 +935,29 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
    const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset;

    // Parse this subcmd.
-    parse_node_tree_t parse_tree;
-    parse_tree_from_string(subcmd,
-                           parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens,
-                           &parse_tree, nullptr);
+    using namespace ast;
+    auto ast =
+        ast_t::parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens |
+                                 parse_flag_leave_unterminated);

    // Look for plain statements where the cursor is at the end of the command.
-    using namespace grammar;
-    tnode_t<tok_string> matching_cmd_node;
-    for (const parse_node_t &node : parse_tree) {
-        // Only interested in plain statements with source.
-        if (node.type != symbol_plain_statement || !node.has_source()) continue;
+    const ast::string_t *matching_cmd_node = nullptr;
+    for (const node_t &n : ast) {
+        const decorated_statement_t *stmt = n.try_as<decorated_statement_t>();
+        if (!stmt) continue;

-        // Get the command node. Skip it if we can't or it has no source.
-        tnode_t<plain_statement> statement(&parse_tree, &node);
-        tnode_t<tok_string> cmd_node = statement.child<0>();
+        // Skip if we have a decoration.
+        if (stmt->opt_decoration) continue;

-        // Skip decorated statements.
-        if (get_decoration(statement) != parse_statement_decoration_none) continue;
-
-        auto msource = cmd_node.source_range();
+        // See if the command's source range range contains our cursor, including at the end.
+        auto msource = stmt->command.try_source_range();
        if (!msource) continue;

        // Now see if its source range contains our cursor, including at the end.
        if (subcmd_cursor_pos >= msource->start &&
            subcmd_cursor_pos <= msource->start + msource->length) {
            // Success!
-            matching_cmd_node = cmd_node;
+            matching_cmd_node = &stmt->command;
            break;
        }
    }
@ -969,11 +965,12 @@ maybe_t<edit_t> reader_expand_abbreviation_in_command(const wcstring &cmdline, s
    // Now if we found a command node, expand it.
    maybe_t<edit_t> result{};
    if (matching_cmd_node) {
-        const wcstring token = matching_cmd_node.get_source(subcmd);
+        assert(!matching_cmd_node->unsourced && "Should not be unsourced");
+        const wcstring token = matching_cmd_node->source(subcmd);
        if (auto abbreviation = expand_abbreviation(token, vars)) {
            // There was an abbreviation! Replace the token in the full command. Maintain the
            // relative position of the cursor.
-            source_range_t r = *matching_cmd_node.source_range();
+            source_range_t r = matching_cmd_node->source_range();
            result = edit_t(subcmd_offset + r.start, r.length, std::move(*abbreviation));
        }
    }
--- a/src/tnode.cpp
+++ b/src/tnode.cpp
@ -1,152 +0,0 @@
-#include "tnode.h"
-
-const parse_node_t *parse_node_tree_t::next_node_in_node_list(
-    const parse_node_t &node_list, parse_token_type_t entry_type,
-    const parse_node_t **out_list_tail) const {
-    parse_token_type_t list_type = node_list.type;
-
-    // Paranoia - it doesn't make sense for a list type to contain itself.
-    assert(list_type != entry_type);
-
-    const parse_node_t *list_cursor = &node_list;
-    const parse_node_t *list_entry = nullptr;
-
-    // Loop while we don't have an item but do have a list. Note that some nodes may contain
-    // nothing; e.g. job_list contains blank lines as a production.
-    while (list_entry == nullptr && list_cursor != nullptr) {
-        const parse_node_t *next_cursor = nullptr;
-
-        // Walk through the children.
-        for (node_offset_t i = 0; i < list_cursor->child_count; i++) {
-            const parse_node_t *child = this->get_child(*list_cursor, i);
-            if (child->type == entry_type) {
-                // This is the list entry.
-                list_entry = child;
-            } else if (child->type == list_type) {
-                // This is the next in the list.
-                next_cursor = child;
-            }
-        }
-        // Go to the next entry, even if it's NULL.
-        list_cursor = next_cursor;
-    }
-
-    // Return what we got.
-    assert(list_cursor == nullptr || list_cursor->type == list_type);
-    assert(list_entry == nullptr || list_entry->type == entry_type);
-    if (out_list_tail != nullptr) *out_list_tail = list_cursor;
-    return list_entry;
-}
-
-enum parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt) {
-    parse_statement_decoration_t decoration = parse_statement_decoration_none;
-    if (auto decorated_statement = stmt.try_get_parent<grammar::decorated_statement>()) {
-        decoration = static_cast<parse_statement_decoration_t>(decorated_statement.tag());
-    }
-    return decoration;
-}
-
-enum parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt) {
-    return static_cast<parse_job_decoration_t>(stmt.tag());
-}
-
-enum parse_job_decoration_t bool_statement_type(
-    tnode_t<grammar::job_conjunction_continuation> cont) {
-    return static_cast<parse_job_decoration_t>(cont.tag());
-}
-
-maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
-                                              const wcstring &src, wcstring *out_target) {
-    assert(redirection && "redirection is missing");
-    tnode_t<grammar::tok_redirection> prim = redirection.child<0>();  // like 2>
-    assert(prim && "expected to have primitive");
-
-    maybe_t<pipe_or_redir_t> result{};
-    if (prim.has_source()) {
-        result = pipe_or_redir_t::from_string(prim.get_source(src));
-        assert(result.has_value() && "Failed to parse valid redirection");
-        assert(!result->is_pipe && "Should not be a pipe");
-    }
-    if (out_target != nullptr) {
-        tnode_t<grammar::tok_string> target = redirection.child<1>();  // like 1 or file path
-        *out_target = target.has_source() ? target.get_source(src) : wcstring();
-    }
-    return result;
-}
-
-std::vector<tnode_t<grammar::comment>> parse_node_tree_t::comment_nodes_for_node(
-    const parse_node_t &parent) const {
-    std::vector<tnode_t<grammar::comment>> result;
-    if (parent.has_comments()) {
-        // Walk all our nodes, looking for comment nodes that have the given node as a parent.
-        for (size_t i = 0; i < this->size(); i++) {
-            const parse_node_t &potential_comment = this->at(i);
-            if (potential_comment.type == parse_special_type_comment &&
-                this->get_parent(potential_comment) == &parent) {
-                result.emplace_back(this, &potential_comment);
-            }
-        }
-    }
-    return result;
-}
-
-variable_assignment_node_list_t get_variable_assignment_nodes(
-    tnode_t<grammar::variable_assignments> list, size_t max) {
-    return list.descendants<grammar::variable_assignment>(max);
-}
-
-maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
-                                              const wcstring &src) {
-    tnode_t<grammar::tok_string> cmd = stmt.child<0>();
-    if (cmd && cmd.has_source()) {
-        return cmd.get_source(src);
-    }
-    return none();
-}
-
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list> list, size_t max) {
-    return list.descendants<grammar::argument>(max);
-}
-
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list> list,
-                                         size_t max) {
-    return list.descendants<grammar::argument>(max);
-}
-
-bool job_node_is_background(tnode_t<grammar::job> job) {
-    tnode_t<grammar::optional_background> bg = job.child<4>();
-    return bg.tag() == parse_background;
-}
-
-parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction> conj) {
-    using namespace grammar;
-    tnode_t<job_decorator> dec;
-    // We have two possible parents: job_list and andor_job_list.
-    if (auto p = conj.try_get_parent<job_list>()) {
-        dec = p.require_get_child<job_decorator, 0>();
-    } else if (auto p = conj.try_get_parent<andor_job_list>()) {
-        dec = p.require_get_child<job_decorator, 0>();
-    }
-    // note this returns 0 (none) if dec is empty.
-    return bool_statement_type(dec);
-}
-
-pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st) {
-    using namespace grammar;
-    if (!st) {
-        return pipeline_position_t::none;
-    }
-
-    // If we're part of a job continuation, we're definitely in a pipeline.
-    if (st.try_get_parent<job_continuation>()) {
-        return pipeline_position_t::subsequent;
-    }
-
-    // Check if we're the beginning of a job, and if so, whether that job
-    // has a non-empty continuation.
-    tnode_t<job_continuation> jc = st.try_get_parent<job>().child<3>();
-    if (jc.try_get_child<statement, 3>()) {
-        return pipeline_position_t::first;
-    }
-    return pipeline_position_t::none;
-}
--- a/src/tnode.h
+++ b/src/tnode.h
@ -1,278 +0,0 @@
-// Type-safe access to fish parse trees.
-#ifndef FISH_TNODE_H
-#define FISH_TNODE_H
-
-#include "parse_grammar.h"
-#include "parse_tree.h"
-
-// Check if a child type is possible for a parent type at a given index.
-template <typename Parent, typename Child, size_t Index>
-constexpr bool child_type_possible_at_index() {
-    return Parent::template type_possible<Child, Index>();
-}
-
-// Check if a child type is possible for a parent type at any index.
-// The number of cases here should match MAX_PRODUCTION_LENGTH.
-template <typename Parent, typename Child>
-constexpr bool child_type_possible() {
-    return child_type_possible_at_index<Parent, Child, 0>() ||
-           child_type_possible_at_index<Parent, Child, 1>() ||
-           child_type_possible_at_index<Parent, Child, 2>() ||
-           child_type_possible_at_index<Parent, Child, 3>() ||
-           child_type_possible_at_index<Parent, Child, 4>() ||
-           child_type_possible_at_index<Parent, Child, 5>();
-}
-
-/// tnode_t ("typed node") is type-safe access to a parse_tree. A tnode_t holds both a pointer to a
-/// parse_node_tree_t and a pointer to a parse_node_t. (Note that the parse_node_tree_t is unowned;
-/// the caller must ensure that the tnode does not outlive the tree.
-///
-/// tnode_t is a lightweight value-type class. It ought to be passed by value. A tnode_t may also be
-/// "missing", associated with a null parse_node_t pointer. operator bool() may be used to check if
-/// a tnode_t is misisng.
-///
-/// A tnode_t is parametrized by a grammar element, and uses the fish grammar to statically
-/// type-check accesses to children and parents. Any particular tnode either corresponds to a
-/// sequence (a single child) or an alternation (multiple possible children). A sequence may have
-/// its children accessed directly via child(), which is templated on the index  (and returns a
-/// tnode of the proper type). Alternations may be disambiguated via try_get_child(), which returns
-/// an empty child if the child has the wrong type, or require_get_child() which aborts if the child
-/// has the wrong type.
-template <typename Type>
-class tnode_t {
-    /// The tree containing our node.
-    const parse_node_tree_t *tree = nullptr;
-
-    /// The node in the tree
-    const parse_node_t *nodeptr = nullptr;
-
-    // Helper to get a child type at a given index.
-    template <class Element, uint32_t Index>
-    using child_at = typename std::tuple_element<Index, typename Element::type_tuple>::type;
-
-   public:
-    tnode_t() = default;
-
-    tnode_t(const parse_node_tree_t *t, const parse_node_t *n) : tree(t), nodeptr(n) {
-        assert(t && "tree cannot be null in this constructor");
-        assert((!n || n->type == Type::token) && "node has wrong type");
-    }
-
-    // Try to create a tnode from the given tree and parse node.
-    // Returns an empty node if the parse node is null, or has the wrong type.
-    static tnode_t try_create(const parse_node_tree_t *tree, const parse_node_t *node) {
-        assert(tree && "tree cannot be null");
-        return tnode_t(tree, node && node->type == Type::token ? node : nullptr);
-    }
-
-    /// Temporary conversion to parse_node_t to assist in migration.
-    /* implicit */ operator const parse_node_t &() const {
-        assert(nodeptr && "Empty tnode_t");
-        return *nodeptr;
-    }
-
-    /* implicit */ operator const parse_node_t *() const { return nodeptr; }
-
-    /// \return the underlying (type-erased) node.
-    const parse_node_t *node() const { return nodeptr; }
-
-    /// Check whether we're populated.
-    explicit operator bool() const { return nodeptr != nullptr; }
-
-    bool operator==(const tnode_t &rhs) const { return tree == rhs.tree && nodeptr == rhs.nodeptr; }
-
-    bool operator!=(const tnode_t &rhs) const { return !(*this == rhs); }
-
-    // Helper to return whether the given tree is the same as ours.
-    bool matches_node_tree(const parse_node_tree_t &t) const { return &t == tree; }
-
-    const parse_node_tree_t *get_tree() const { return tree; }
-
-    bool has_source() const { return nodeptr && nodeptr->has_source(); }
-
-    // return the tag, or 0 if missing.
-    parse_node_tag_t tag() const { return nodeptr ? nodeptr->tag : 0; }
-
-    // return the number of children, or 0 if missing.
-    uint8_t child_count() const { return nodeptr ? nodeptr->child_count : 0; }
-
-    maybe_t<source_range_t> source_range() const {
-        if (!nodeptr || nodeptr->source_start == NODE_OFFSET_INVALID) return none();
-        return source_range_t{nodeptr->source_start, nodeptr->source_length};
-    }
-
-    wcstring get_source(const wcstring &str) const {
-        if (!nodeptr) {
-            return L"";
-        }
-        return nodeptr->get_source(str);
-    }
-
-    bool location_in_or_at_end_of_source_range(size_t loc) const {
-        return nodeptr && nodeptr->location_in_or_at_end_of_source_range(loc);
-    }
-
-    static tnode_t find_node_matching_source_location(const parse_node_tree_t *tree,
-                                                      size_t source_loc,
-                                                      const parse_node_t *parent) {
-        assert(tree && "null tree");
-        return tnode_t{tree,
-                       tree->find_node_matching_source_location(Type::token, source_loc, parent)};
-    }
-
-    /// Type-safe access to a child at the given index.
-    template <node_offset_t Index>
-    tnode_t<child_at<Type, Index>> child() const {
-        using child_type = child_at<Type, Index>;
-        const parse_node_t *child = nullptr;
-        if (nodeptr) child = tree->get_child(*nodeptr, Index, child_type::token);
-        return tnode_t<child_type>{tree, child};
-    }
-
-    /// Return a parse_node_t for a child.
-    /// This is used to disambiguate alts.
-    template <node_offset_t Index>
-    const parse_node_t &get_child_node() const {
-        assert(nodeptr && "receiver is missing in get_child_node");
-        return *tree->get_child(*nodeptr, Index);
-    }
-
-    /// If the child at the given index has the given type, return it; otherwise return an empty
-    /// child. Note this will refuse to compile if the child type is not possible.
-    /// This is used for e.g. alternations.
-    template <class ChildType, node_offset_t Index>
-    tnode_t<ChildType> try_get_child() const {
-        static_assert(child_type_possible_at_index<Type, ChildType, Index>(),
-                      "Cannot contain a child of this type");
-        const parse_node_t *child = nullptr;
-        if (nodeptr) child = tree->get_child(*nodeptr, Index);
-        if (child && child->type == ChildType::token) return {tree, child};
-        return {tree, nullptr};
-    }
-
-    /// assert that this is not empty and that the child at index Index has the given type, then
-    /// return that child. Note this will refuse to compile if the child type is not possible.
-    template <class ChildType, node_offset_t Index>
-    tnode_t<ChildType> require_get_child() const {
-        assert(nodeptr && "receiver is missing in require_get_child()");
-        auto result = try_get_child<ChildType, Index>();
-        assert(result && "require_get_child(): wrong child type");
-        return result;
-    }
-
-    /// Find the first direct child of the given node of the given type. asserts on failure.
-    template <class ChildType>
-    tnode_t<ChildType> find_child() const {
-        static_assert(child_type_possible<Type, ChildType>(), "Cannot have that type as a child");
-        assert(nodeptr && "receiver is missing in find_child()");
-        tnode_t<ChildType> result{tree, &tree->find_child(*nodeptr, ChildType::token)};
-        assert(result && "cannot find child");
-        return result;
-    }
-
-    /// Type-safe access to a node's parent.
-    /// If the parent exists and has type ParentType, return it.
-    /// Otherwise return a missing tnode.
-    template <class ParentType>
-    tnode_t<ParentType> try_get_parent() const {
-        static_assert(child_type_possible<ParentType, Type>(), "Parent cannot have us as a child");
-        if (!nodeptr) return {};
-        return {tree, tree->get_parent(*nodeptr, ParentType::token)};
-    }
-
-    /// Finds all descendants (up to max_count) under this node of the given type.
-    template <typename DescendantType>
-    std::vector<tnode_t<DescendantType>> descendants(size_t max_count = -1) const {
-        if (!nodeptr) return {};
-        std::vector<tnode_t<DescendantType>> result;
-        std::vector<const parse_node_t *> stack{nodeptr};
-        while (!stack.empty() && result.size() < max_count) {
-            const parse_node_t *node = stack.back();
-            if (node->type == DescendantType::token) result.emplace_back(tree, node);
-            stack.pop_back();
-            node_offset_t index = node->child_count;
-            while (index--) {
-                stack.push_back(tree->get_child(*node, index));
-            }
-        }
-        return result;
-    }
-
-    /// Given that we are a list type, \return the next node of some Item in some node list,
-    /// adjusting 'this' to be the remainder of the list.
-    /// Returns an empty item on failure.
-    template <class ItemType>
-    tnode_t<ItemType> next_in_list() {
-        // We require that we can contain ourselves, and ItemType as well.
-        static_assert(child_type_possible<Type, Type>(), "Is not a list");
-        static_assert(child_type_possible<Type, ItemType>(), "Is not a list of that type");
-        if (!nodeptr) return {tree, nullptr};
-        const parse_node_t *next =
-            tree->next_node_in_node_list(*nodeptr, ItemType::token, &nodeptr);
-        return {tree, next};
-    }
-};
-
-template <typename Type>
-tnode_t<Type> parse_node_tree_t::find_child(const parse_node_t &parent) const {
-    return tnode_t<Type>(this, &this->find_child(parent, Type::token));
-}
-
-/// Return the arguments under an arguments_list or arguments_or_redirection_list
-/// Do not return more than max.
-using variable_assignment_node_list_t = std::vector<tnode_t<grammar::variable_assignment>>;
-variable_assignment_node_list_t get_variable_assignment_nodes(
-    tnode_t<grammar::variable_assignments>, size_t max = -1);
-
-/// Given a plain statement, get the command from the child node. Returns the command string on
-/// success, none on failure.
-maybe_t<wcstring> command_for_plain_statement(tnode_t<grammar::plain_statement> stmt,
-                                              const wcstring &src);
-
-/// Return the decoration for a plain statement.
-parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> stmt);
-
-/// Return the type for a boolean statement.
-parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_decorator> stmt);
-
-parse_job_decoration_t bool_statement_type(tnode_t<grammar::job_conjunction_continuation> cont);
-
-/// Given a redirection node, get the parsed redirection and target of the redirection (file path,
-/// or fd).
-maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
-                                              const wcstring &src, wcstring *out_target);
-
-/// Return the arguments under an arguments_list or arguments_or_redirection_list
-/// Do not return more than max.
-using arguments_node_list_t = std::vector<tnode_t<grammar::argument>>;
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::argument_list>, size_t max = -1);
-arguments_node_list_t get_argument_nodes(tnode_t<grammar::arguments_or_redirections_list>,
-                                         size_t max = -1);
-
-/// Return whether the given job is background because it has a & symbol.
-bool job_node_is_background(tnode_t<grammar::job>);
-
-/// If the conjunction is has a decorator (and/or), return it; otherwise return none. This only
-/// considers the leading conjunction, e.g. in `and true || false` only the 'true' conjunction will
-/// return 'and'.
-parse_job_decoration_t get_decorator(tnode_t<grammar::job_conjunction>);
-
-/// Return whether the statement is part of a pipeline.
-/// This doesn't detect e.g. pipelines involving our parent's block statements.
-enum class pipeline_position_t {
-    none,       // not part of a pipeline
-    first,      // first command in a pipeline
-    subsequent  // second or further command in a pipeline
-};
-pipeline_position_t get_pipeline_position(tnode_t<grammar::statement> st);
-
-/// Check whether an argument_list is a root list.
-inline bool argument_list_is_root(tnode_t<grammar::argument_list> list) {
-    return !list.try_get_parent<grammar::argument_list>();
-}
-
-inline bool argument_list_is_root(tnode_t<grammar::arguments_or_redirections_list> list) {
-    return !list.try_get_parent<grammar::arguments_or_redirections_list>();
-}
-
-#endif
--- a/tests/checks/indent.fish
+++ b/tests/checks/indent.fish
@ -49,7 +49,7 @@ end' | $fish_indent
 #CHECK: c
 #CHECK: echo thing
 #CHECK: end
-    
+
 echo 'echo foo |
 echo banana' | $fish_indent
 #CHECK: echo foo |
@ -57,12 +57,11 @@ echo banana' | $fish_indent

 echo 'echo foo \\
 ;' | $fish_indent
-#CHECK: echo foo \
-#CHECK: 
+#CHECK: echo foo

 echo 'echo foo \\
 ' | $fish_indent
-#CHECK: echo foo \
+#CHECK: echo foo

 echo -n '
 begin
@ -201,9 +200,9 @@ end; echo alpha "
 #CHECK: begin
 #CHECK: {{    }}echo hi
 #CHECK: else
+#CHECK:
 #CHECK: {{^}}echo bye
-#CHECK: end
-#CHECK: echo alpha "
+#CHECK: end; echo alpha "

 # issue 1665
 echo -n '
@ -285,7 +284,7 @@ echo bye
 #CHECK: 
 #CHECK: echo hi |
 #CHECK: 
-#CHECK: echo bye
+#CHECK: {{    }}echo bye

 echo 'a;;;;;;' | $fish_indent
 #CHECK: a