diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index 2e3ddfad4..25f0b3583 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -58,7 +58,6 @@ include_cpp! { generate!("get_flog_file_fd") generate!("log_extra_to_flog_file") - generate!("indent_visitor_t") generate!("fish_wcwidth") generate!("fish_wcswidth") diff --git a/fish-rust/src/parse_util.rs b/fish-rust/src/parse_util.rs index 197b3c5c2..69efdee25 100644 --- a/fish-rust/src/parse_util.rs +++ b/fish-rust/src/parse_util.rs @@ -1,5 +1,5 @@ //! Various mostly unrelated utility functions related to parsing, loading and evaluating fish code. -use crate::ast::{self, Ast, Keyword, Leaf, List, Node, NodeFfi, NodeVisitor}; +use crate::ast::{self, Ast, Keyword, Leaf, List, Node, NodeVisitor}; use crate::common::{ escape_string, unescape_string, valid_var_name, valid_var_name_char, EscapeFlags, EscapeStringStyle, UnescapeFlags, UnescapeStringStyle, @@ -9,17 +9,18 @@ use crate::expand::{ BRACE_SEP, INTERNAL_SEPARATOR, VARIABLE_EXPAND, VARIABLE_EXPAND_EMPTY, VARIABLE_EXPAND_SINGLE, }; use crate::ffi; -use crate::ffi::indent_visitor_t; use crate::ffi_tests::add_test; use crate::future_feature_flags::{feature_test, FeatureFlag}; use crate::operation_context::OperationContext; use crate::parse_constants::{ parse_error_offset_source_start, ParseError, ParseErrorCode, ParseErrorList, ParseKeyword, - ParserTestErrorBits, PipelinePosition, StatementDecoration, ERROR_BAD_VAR_CHAR1, - ERROR_BRACKETED_VARIABLE1, ERROR_BRACKETED_VARIABLE_QUOTED1, ERROR_NOT_ARGV_AT, - ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR, ERROR_NOT_PID, ERROR_NOT_STATUS, ERROR_NO_VAR_NAME, - INVALID_BREAK_ERR_MSG, INVALID_CONTINUE_ERR_MSG, INVALID_PIPELINE_CMD_ERR_MSG, - PARSER_TEST_ERROR, PARSER_TEST_INCOMPLETE, PARSE_FLAG_LEAVE_UNTERMINATED, PARSE_FLAG_NONE, + ParseTokenType, ParserTestErrorBits, PipelinePosition, StatementDecoration, + ERROR_BAD_VAR_CHAR1, ERROR_BRACKETED_VARIABLE1, ERROR_BRACKETED_VARIABLE_QUOTED1, + ERROR_NOT_ARGV_AT, ERROR_NOT_ARGV_COUNT, ERROR_NOT_ARGV_STAR, ERROR_NOT_PID, ERROR_NOT_STATUS, + ERROR_NO_VAR_NAME, INVALID_BREAK_ERR_MSG, INVALID_CONTINUE_ERR_MSG, + INVALID_PIPELINE_CMD_ERR_MSG, PARSER_TEST_ERROR, PARSER_TEST_INCOMPLETE, + PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS, PARSE_FLAG_CONTINUE_AFTER_ERROR, + PARSE_FLAG_INCLUDE_COMMENTS, PARSE_FLAG_LEAVE_UNTERMINATED, PARSE_FLAG_NONE, UNKNOWN_BUILTIN_ERR_MSG, }; use crate::tokenizer::{ @@ -27,12 +28,12 @@ use crate::tokenizer::{ TOK_SHOW_COMMENTS, }; use crate::wchar::{wstr, WString, L}; -use crate::wchar_ffi::WCharToFFI; +use crate::wchar_ffi::{WCharFromFFI, WCharToFFI}; use crate::wcstringutil::truncate; use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE}; use crate::wutil::{wgettext, wgettext_fmt}; +use cxx::CxxWString; use std::ops; -use std::pin::Pin; use widestring_suffix::widestrs; /// Handles slices: the square brackets in an expression like $foo[5..4] @@ -723,48 +724,243 @@ pub fn parse_util_escape_string_with_quote( result } +/// Given a string, parse it as fish code and then return the indents. The return value has the same +/// size as the string. +pub fn parse_util_compute_indents(src: &wstr) -> Vec { + // Make a vector the same size as the input string, which contains the indents. Initialize them + // to 0. + let mut indents = vec![0; src.len()]; + + // Simple trick: if our source does not contain a newline, then all indents are 0. + if !src.chars().any(|c| c == '\n') { + return indents; + } + + // Parse the string. We pass continue_after_error to produce a forest; the trailing indent of + // the last node we visited becomes the input indent of the next. I.e. in the case of 'switch + // foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it + // were a case item list. + let ast = Ast::parse( + src, + PARSE_FLAG_CONTINUE_AFTER_ERROR + | PARSE_FLAG_INCLUDE_COMMENTS + | PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS + | PARSE_FLAG_LEAVE_UNTERMINATED, + None, + ); + { + let mut iv = IndentVisitor::new(src, &mut indents); + iv.visit(ast.top()); + iv.record_line_continuations_until(iv.indents.len()); + iv.indents[iv.last_leaf_end..].fill(iv.last_indent); + + // All newlines now get the *next* indent. + // For example, in this code: + // if true + // stuff + // the newline "belongs" to the if statement as it ends its job. + // But when rendered, it visually belongs to the job list. + + let mut idx = src.len(); + let mut next_indent = iv.last_indent; + let src = src.as_char_slice(); + while idx != 0 { + idx -= 1; + if src[idx] == '\n' { + let empty_middle_line = src.get(idx + 1) == Some(&'\n'); + if !empty_middle_line { + iv.indents[idx] = next_indent; + } + } else { + next_indent = iv.indents[idx]; + } + } + // Add an extra level of indentation to continuation lines. + for mut idx in iv.line_continuations { + loop { + indents[idx] = indents[idx].wrapping_add(1); + idx += 1; + if idx == src.len() || src[idx] == '\n' { + break; + } + } + } + } + + indents +} + +// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while +// visiting its children. struct IndentVisitor<'a> { - companion: Pin<&'a mut indent_visitor_t>, + // companion: Pin<&'a mut indent_visitor_t>, + // The one-past-the-last index of the most recently encountered leaf node. + // We use this to populate the indents even if there's no tokens in the range. + last_leaf_end: usize, + + // The last indent which we assigned. + last_indent: i32, + + // The source we are indenting. + src: &'a wstr, + + // List of indents, which we populate. + indents: &'a mut Vec, + + // Initialize our starting indent to -1, as our top-level node is a job list which + // will immediately increment it. + indent: i32, + + // List of locations of escaped newline characters. + line_continuations: Vec, +} +impl<'a> IndentVisitor<'a> { + fn new(src: &'a wstr, indents: &'a mut Vec) -> Self { + Self { + last_leaf_end: 0, + last_indent: -1, + src, + indents, + indent: -1, + line_continuations: vec![], + } + } + /// \return whether a maybe_newlines node contains at least one newline. + fn has_newline(&self, nls: &ast::MaybeNewlines) -> bool { + nls.source(self.src).chars().any(|c| c == '\n') + } + fn record_line_continuations_until(&mut self, offset: usize) { + let gap_text = &self.src[self.last_leaf_end..offset]; + let gap_text = gap_text.as_char_slice(); + let Some(escaped_nl) = gap_text.windows(2).position(|w| *w == ['\\', '\n']) else { + return; + }; + if gap_text[..escaped_nl].contains(&'#') { + return; + } + let mut newline = escaped_nl + 1; + // The gap text might contain multiple newlines if there are multiple lines that + // don't contain an AST node, for example, comment lines, or lines containing only + // the escaped newline. + loop { + self.line_continuations.push(self.last_leaf_end + newline); + match gap_text[newline + 1..].iter().position(|c| *c == '\n') { + Some(nextnl) => newline = newline + 1 + nextnl, + None => break, + } + } + } } impl<'a> NodeVisitor<'a> for IndentVisitor<'a> { // Default implementation is to just visit children. fn visit(&mut self, node: &'a dyn Node) { - let ffi_node = NodeFfi::new(node); - let dec = self - .companion - .as_mut() - .visit((&ffi_node as *const NodeFfi<'_>).cast()); + let mut inc = 0; + let mut dec = 0; + use ast::{Category, Type}; + match node.typ() { + Type::job_list | Type::andor_job_list => { + // Job lists are never unwound. + inc = 1; + dec = 1; + } + + // Increment indents for conditions in headers (#1665). + Type::job_conjunction => { + if [Type::while_header, Type::if_clause].contains(&node.parent().unwrap().typ()) { + inc = 1; + dec = 1; + } + } + + // Increment indents for job_continuation_t if it contains a newline. + // This is a bit of a hack - it indents cases like: + // cmd1 | + // ....cmd2 + // but avoids "double indenting" if there's no newline: + // cmd1 | while cmd2 + // ....cmd3 + // end + // See #7252. + Type::job_continuation => { + if self.has_newline(&node.as_job_continuation().unwrap().newlines) { + inc = 1; + dec = 1; + } + } + + // Likewise for && and ||. + Type::job_conjunction_continuation => { + if self.has_newline(&node.as_job_conjunction_continuation().unwrap().newlines) { + inc = 1; + dec = 1; + } + } + + Type::case_item_list => { + // Here's a hack. Consider: + // switch abc + // cas + // + // fish will see that 'cas' is not valid inside a switch statement because it is + // not "case". It will then unwind back to the top level job list, producing a + // parse tree like: + // + // job_list + // switch_job + // + // normal_job + // cas + // + // And so we will think that the 'cas' job is at the same level as the switch. + // To address this, if we see that the switch statement was not closed, do not + // decrement the indent afterwards. + inc = 1; + let switchs = node.parent().unwrap().as_switch_statement().unwrap(); + dec = if switchs.end.has_source() { 1 } else { 0 }; + } + Type::token_base => { + if node.parent().unwrap().typ() == Type::begin_header + && node.as_token().unwrap().token_type() == ParseTokenType::end + { + // The newline after "begin" is optional, so it is part of the header. + // The header is not in the indented block, so indent the newline here. + if node.source(self.src) == L!("\n") { + inc = 1; + dec = 1; + } + } + } + _ => (), + } + + let range = node.source_range(); + if range.length() > 0 && node.category() == Category::leaf { + self.record_line_continuations_until(range.start()); + self.indents[self.last_leaf_end..range.start()].fill(self.last_indent); + } + + self.indent += inc; + + // If we increased the indentation, apply it to the remainder of the string, even if the + // list is empty. For example (where _ represents the cursor): + // + // if foo + // _ + // + // we want to indent the newline. + if inc != 0 { + self.last_indent = self.indent; + } + + // If this is a leaf node, apply the current indentation. + if node.category() == Category::leaf && range.length() != 0 { + self.indents[range.start()..range.end()].fill(self.indent); + self.last_leaf_end = range.end(); + self.last_indent = self.indent; + } + node.accept(self, false); - self.companion.as_mut().did_visit(dec); - } -} - -#[cxx::bridge] -#[allow(clippy::needless_lifetimes)] // false positive -mod parse_util_ffi { - extern "C++" { - include!("ast.h"); - include!("parse_util.h"); - type indent_visitor_t = crate::ffi::indent_visitor_t; - type Ast = crate::ast::Ast; - type NodeFfi<'a> = crate::ast::NodeFfi<'a>; - } - extern "Rust" { - type IndentVisitor<'a>; - unsafe fn new_indent_visitor( - companion: Pin<&mut indent_visitor_t>, - ) -> Box>; - #[cxx_name = "visit"] - unsafe fn visit_ffi<'a>(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>); - } -} - -fn new_indent_visitor(companion: Pin<&mut indent_visitor_t>) -> Box> { - Box::new(IndentVisitor { companion }) -} -impl<'a> IndentVisitor<'a> { - fn visit_ffi(self: &mut IndentVisitor<'a>, node: &'a NodeFfi<'a>) { - self.visit(node.as_node()); + self.indent -= dec; } } @@ -1577,3 +1773,200 @@ add_test!("test_escape_quotes", || { validate!("foo\nba'r", Some('"'), false, "foo\"\\n\"ba'r"); validate!("foo\\\\bar", Some('"'), false, "foo\\\\\\\\bar"); }); + +add_test!("test_indents", || { + // A struct which is either text or a new indent. + struct Segment { + // The indent to set + indent: i32, + text: &'static str, + } + fn do_validate(segments: &[Segment]) { + // Compute the indents. + let mut expected_indents = vec![]; + let mut text = WString::new(); + for segment in segments { + text.push_str(segment.text); + for _ in segment.text.chars() { + expected_indents.push(segment.indent); + } + } + let indents = parse_util_compute_indents(&text); + assert_eq!(indents, expected_indents); + } + macro_rules! validate { + ( $( $(,)? $indent:literal, $text:literal )* ) => { + let segments = vec![ + $( + Segment{ indent: $indent, text: $text }, + )* + ]; + do_validate(&segments); + }; + } + + #[rustfmt::skip] + #[allow(clippy::redundant_closure_call)] + (|| { + validate!( + 0, "if", 1, " foo", + 0, "\nend" + ); + validate!( + 0, "if", 1, " foo", + 1, "\nfoo", + 0, "\nend" + ); + + validate!( + 0, "if", 1, " foo", + 1, "\nif", 2, " bar", + 1, "\nend", + 0, "\nend" + ); + + validate!( + 0, "if", 1, " foo", + 1, "\nif", 2, " bar", + 2, "\n", + 1, "\nend\n" + ); + + validate!( + 0, "if", 1, " foo", + 1, "\nif", 2, " bar", + 2, "\n" + ); + + validate!( + 0, "begin", + 1, "\nfoo", + 1, "\n" + ); + + validate!( + 0, "begin", + 1, "\n;", + 0, "end", + 0, "\nfoo", 0, "\n" + ); + + validate!( + 0, "begin", + 1, "\n;", + 0, "end", + 0, "\nfoo", 0, "\n" + ); + + validate!( + 0, "if", 1, " foo", + 1, "\nif", 2, " bar", + 2, "\nbaz", + 1, "\nend", 1, "\n" + ); + + validate!( + 0, "switch foo", + 1, "\n" + ); + + validate!( + 0, "switch foo", + 1, "\ncase bar", + 1, "\ncase baz", + 2, "\nquux", + 2, "\nquux" + ); + + validate!( + 0, + "switch foo", + 1, + "\ncas" // parse error indentation handling + ); + + validate!( + 0, "while", + 1, " false", + 1, "\n# comment", // comment indentation handling + 1, "\ncommand", + 1, "\n# comment 2" + ); + + validate!( + 0, "begin", + 1, "\n", // "begin" is special because this newline belongs to the block header + 1, "\n" + ); + + // Continuation lines. + validate!( + 0, "echo 'continuation line' \\", + 1, "\ncont", + 0, "\n" + ); + validate!( + 0, "echo 'empty continuation line' \\", + 1, "\n" + ); + validate!( + 0, "begin # continuation line in block", + 1, "\necho \\", + 2, "\ncont" + ); + validate!( + 0, "begin # empty continuation line in block", + 1, "\necho \\", + 2, "\n", + 0, "\nend" + ); + validate!( + 0, "echo 'multiple continuation lines' \\", + 1, "\nline1 \\", + 1, "\n# comment", + 1, "\n# more comment", + 1, "\nline2 \\", + 1, "\n" + ); + validate!( + 0, "echo # inline comment ending in \\", + 0, "\nline" + ); + validate!( + 0, "# line comment ending in \\", + 0, "\nline" + ); + validate!( + 0, "echo 'multiple empty continuation lines' \\", + 1, "\n\\", + 1, "\n", + 0, "\n" + ); + validate!( + 0, "echo 'multiple statements with continuation lines' \\", + 1, "\nline 1", + 0, "\necho \\", + 1, "\n" + ); + // This is an edge case, probably okay to change the behavior here. + validate!( + 0, "begin", + 1, " \\", + 2, "\necho 'continuation line in block header' \\", + 2, "\n", + 1, "\n", + 0, "\nend" + ); + })(); +}); + +#[cxx::bridge] +mod parse_util_ffi { + extern "Rust" { + fn parse_util_compute_indents_ffi(src: &CxxWString) -> Vec; + } +} + +fn parse_util_compute_indents_ffi(src: &CxxWString) -> Vec { + parse_util_compute_indents(&src.from_ffi()) +} diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 06dfda69f..4ee6c0acf 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -1190,208 +1190,6 @@ static void test_cancellation() { signal_clear_cancel(); } -namespace indent_tests { -// A struct which is either text or a new indent. -struct segment_t { - // The indent to set - int indent{0}; - const char *text{nullptr}; - - /* implicit */ segment_t(int indent) : indent(indent) {} - /* implicit */ segment_t(const char *text) : text(text) {} -}; - -using indent_test_t = std::vector; -using indent_test_list_t = std::vector; - -// Add a new test to a test list based on a series of ints and texts. -template -void add_test(indent_test_list_t *v, const Types &...types) { - segment_t segments[] = {types...}; - v->emplace_back(std::begin(segments), std::end(segments)); -} -} // namespace indent_tests - -static void test_indents() { - say(L"Testing indents"); - using namespace indent_tests; - - indent_test_list_t tests; - add_test(&tests, // - 0, "if", 1, " foo", // - 0, "\nend"); - - add_test(&tests, // - 0, "if", 1, " foo", // - 1, "\nfoo", // - 0, "\nend"); - - add_test(&tests, // - 0, "if", 1, " foo", // - 1, "\nif", 2, " bar", // - 1, "\nend", // - 0, "\nend"); - - add_test(&tests, // - 0, "if", 1, " foo", // - 1, "\nif", 2, " bar", // - 2, "\n", // - 1, "\nend\n"); - - add_test(&tests, // - 0, "if", 1, " foo", // - 1, "\nif", 2, " bar", // - 2, "\n"); - - add_test(&tests, // - 0, "begin", // - 1, "\nfoo", // - 1, "\n"); - - add_test(&tests, // - 0, "begin", // - 1, "\n;", // - 0, "end", // - 0, "\nfoo", 0, "\n"); - - add_test(&tests, // - 0, "begin", // - 1, "\n;", // - 0, "end", // - 0, "\nfoo", 0, "\n"); - - add_test(&tests, // - 0, "if", 1, " foo", // - 1, "\nif", 2, " bar", // - 2, "\nbaz", // - 1, "\nend", 1, "\n"); - - add_test(&tests, // - 0, "switch foo", // - 1, "\n" // - ); - - add_test(&tests, // - 0, "switch foo", // - 1, "\ncase bar", // - 1, "\ncase baz", // - 2, "\nquux", // - 2, "\nquux" // - ); - - add_test(&tests, // - 0, "switch foo", // - 1, "\ncas" // parse error indentation handling - ); - - add_test(&tests, // - 0, "while", 1, " false", // - 1, "\n# comment", // comment indentation handling - 1, "\ncommand", // - 1, "\n# comment 2" // - ); - - add_test(&tests, // - 0, "begin", // - 1, "\n", // "begin" is special because this newline belongs to the block header - 1, "\n" // - ); - - // Continuation lines. - add_test(&tests, // - 0, "echo 'continuation line' \\", // - 1, "\ncont", // - 0, "\n" // - ); - add_test(&tests, // - 0, "echo 'empty continuation line' \\", // - 1, "\n" // - ); - add_test(&tests, // - 0, "begin # continuation line in block", // - 1, "\necho \\", // - 2, "\ncont" // - ); - add_test(&tests, // - 0, "begin # empty continuation line in block", // - 1, "\necho \\", // - 2, "\n", // - 0, "\nend" // - ); - add_test(&tests, // - 0, "echo 'multiple continuation lines' \\", // - 1, "\nline1 \\", // - 1, "\n# comment", // - 1, "\n# more comment", // - 1, "\nline2 \\", // - 1, "\n" // - ); - add_test(&tests, // - 0, "echo # inline comment ending in \\", // - 0, "\nline" // - ); - add_test(&tests, // - 0, "# line comment ending in \\", // - 0, "\nline" // - ); - add_test(&tests, // - 0, "echo 'multiple empty continuation lines' \\", // - 1, "\n\\", // - 1, "\n", // - 0, "\n" // - ); - add_test(&tests, // - 0, "echo 'multiple statements with continuation lines' \\", // - 1, "\nline 1", // - 0, "\necho \\", // - 1, "\n" // - ); - // This is an edge case, probably okay to change the behavior here. - add_test(&tests, // - 0, "begin", 1, " \\", // - 2, "\necho 'continuation line in block header' \\", // - 2, "\n", // - 1, "\n", // - 0, "\nend" // - ); - - int test_idx = 0; - for (const indent_test_t &test : tests) { - // Construct the input text and expected indents. - wcstring text; - std::vector expected_indents; - int current_indent = 0; - for (const segment_t &segment : test) { - if (!segment.text) { - current_indent = segment.indent; - } else { - wcstring tmp = str2wcstring(segment.text); - text.append(tmp); - expected_indents.insert(expected_indents.end(), tmp.size(), current_indent); - } - } - do_test(expected_indents.size() == text.size()); - - // Compute the indents. - std::vector indents = parse_util_compute_indents(text); - - if (expected_indents.size() != indents.size()) { - err(L"Indent vector has wrong size! Expected %lu, actual %lu", expected_indents.size(), - indents.size()); - } - do_test(expected_indents.size() == indents.size()); - for (size_t i = 0; i < text.size(); i++) { - if (expected_indents.at(i) != indents.at(i)) { - err(L"Wrong indent at index %lu (char 0x%02x) in test #%lu (expected %d, actual " - L"%d):\n%ls\n", - i, text.at(i), test_idx, expected_indents.at(i), indents.at(i), text.c_str()); - break; // don't keep showing errors for the rest of the test - } - } - test_idx++; - } -} - static void test_const_strlen() { do_test(const_strlen("") == 0); do_test(const_strlen(L"") == 0); @@ -1465,7 +1263,7 @@ void test_dir_iter() { const wcstring selflinkname = L"selflink"; // link to self const wcstring fifoname = L"fifo"; const std::vector names = {dirname, regname, reglinkname, dirlinkname, - badlinkname, selflinkname, fifoname}; + badlinkname, selflinkname, fifoname}; const auto is_link_name = [&](const wcstring &name) -> bool { return contains({reglinkname, dirlinkname, badlinkname, selflinkname}, name); @@ -3988,7 +3786,7 @@ void history_tests_t::test_history() { say(L"Testing history"); const std::vector items = {L"Gamma", L"beta", L"BetA", L"Beta", L"alpha", - L"AlphA", L"Alpha", L"alph", L"ALPH", L"ZZZ"}; + L"AlphA", L"Alpha", L"alph", L"ALPH", L"ZZZ"}; const history_search_flags_t nocase = history_search_ignore_case; // Populate a history. @@ -6625,7 +6423,6 @@ static const test_t s_tests[]{ {TEST_GROUP("debounce"), test_debounce_timeout}, {TEST_GROUP("parser"), test_parser}, {TEST_GROUP("cancellation"), test_cancellation}, - {TEST_GROUP("indents"), test_indents}, {TEST_GROUP("utf8"), test_utf8}, {TEST_GROUP("escape_sequences"), test_escape_sequences}, {TEST_GROUP("lru"), test_lru}, diff --git a/src/parse_util.cpp b/src/parse_util.cpp index 5e2b8853f..dc758b09e 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -593,196 +593,9 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote, return result; } -indent_visitor_t::indent_visitor_t(const wcstring &src, std::vector &indents) - : src(src), indents(indents), visitor(new_indent_visitor(*this)) {} - -bool indent_visitor_t::has_newline(const ast::maybe_newlines_t &nls) const { - return nls.ptr()->source(src)->find(L'\n') != wcstring::npos; -} - -int indent_visitor_t::visit(const void *node_) { - auto &node = *static_cast(node_); - int inc = 0; - int dec = 0; - using namespace ast; - switch (node.typ()) { - case type_t::job_list: - case type_t::andor_job_list: - // Job lists are never unwound. - inc = 1; - dec = 1; - break; - - // Increment indents for conditions in headers (#1665). - case type_t::job_conjunction: - if (node.parent()->typ() == type_t::while_header || - node.parent()->typ() == type_t::if_clause) { - inc = 1; - dec = 1; - } - break; - - // Increment indents for job_continuation_t if it contains a newline. - // This is a bit of a hack - it indents cases like: - // cmd1 | - // ....cmd2 - // but avoids "double indenting" if there's no newline: - // cmd1 | while cmd2 - // ....cmd3 - // end - // See #7252. - case type_t::job_continuation: - if (has_newline(node.as_job_continuation().newlines())) { - inc = 1; - dec = 1; - } - break; - - // Likewise for && and ||. - case type_t::job_conjunction_continuation: - if (has_newline(node.as_job_conjunction_continuation().newlines())) { - inc = 1; - dec = 1; - } - break; - - case type_t::case_item_list: - // Here's a hack. Consider: - // switch abc - // cas - // - // fish will see that 'cas' is not valid inside a switch statement because it is - // not "case". It will then unwind back to the top level job list, producing a - // parse tree like: - // - // job_list - // switch_job - // - // normal_job - // cas - // - // And so we will think that the 'cas' job is at the same level as the switch. - // To address this, if we see that the switch statement was not closed, do not - // decrement the indent afterwards. - inc = 1; - dec = node.parent()->as_switch_statement().end().ptr()->has_source() ? 1 : 0; - break; - case type_t::token_base: { - if (node.parent()->typ() == type_t::begin_header && - node.token_type() == parse_token_type_t::end) { - // The newline after "begin" is optional, so it is part of the header. - // The header is not in the indented block, so indent the newline here. - if (*node.source(src) == L"\n") { - inc = 1; - dec = 1; - } - } - break; - } - default: - break; - } - - auto range = node.source_range(); - if (range.length > 0 && node.category() == category_t::leaf) { - record_line_continuations_until(range.start); - std::fill(indents.begin() + last_leaf_end, indents.begin() + range.start, last_indent); - } - - indent += inc; - - // If we increased the indentation, apply it to the remainder of the string, even if the - // list is empty. For example (where _ represents the cursor): - // - // if foo - // _ - // - // we want to indent the newline. - if (inc) { - last_indent = indent; - } - - // If this is a leaf node, apply the current indentation. - if (node.category() == category_t::leaf && range.length > 0) { - std::fill(indents.begin() + range.start, indents.begin() + range.end(), indent); - last_leaf_end = range.start + range.length; - last_indent = indent; - } - - return dec; -} - -void indent_visitor_t::did_visit(int dec) { indent -= dec; } - -void indent_visitor_t::record_line_continuations_until(size_t offset) { - wcstring gap_text = src.substr(last_leaf_end, offset - last_leaf_end); - size_t escaped_nl = gap_text.find(L"\\\n"); - if (escaped_nl == wcstring::npos) return; - auto line_end = gap_text.begin() + escaped_nl; - if (std::find(gap_text.begin(), line_end, L'#') != line_end) return; - auto end = src.begin() + offset; - auto newline = src.begin() + last_leaf_end + escaped_nl + 1; - // The gap text might contain multiple newlines if there are multiple lines that - // don't contain an AST node, for example, comment lines, or lines containing only - // the escaped newline. - do { - line_continuations.push_back(newline - src.begin()); - newline = std::find(newline + 1, end, L'\n'); - } while (newline != end); -} - std::vector parse_util_compute_indents(const wcstring &src) { - // Make a vector the same size as the input string, which contains the indents. Initialize them - // to 0. - const size_t src_size = src.size(); - std::vector indents(src_size, 0); - - // Simple trick: if our source does not contain a newline, then all indents are 0. - if (src.find('\n') == wcstring::npos) { - return indents; - } - - // Parse the string. We pass continue_after_error to produce a forest; the trailing indent of - // the last node we visited becomes the input indent of the next. I.e. in the case of 'switch - // foo ; cas', we get an invalid parse tree (since 'cas' is not valid) but we indent it as if it - // were a case item list. - using namespace ast; - auto ast = - ast_parse(src, parse_flag_continue_after_error | parse_flag_include_comments | - parse_flag_accept_incomplete_tokens | parse_flag_leave_unterminated); - - indent_visitor_t iv(src, indents); - iv.visitor->visit(*ast->top()); - iv.record_line_continuations_until(indents.size()); - std::fill(indents.begin() + iv.last_leaf_end, indents.end(), iv.last_indent); - - // All newlines now get the *next* indent. - // For example, in this code: - // if true - // stuff - // the newline "belongs" to the if statement as it ends its job. - // But when rendered, it visually belongs to the job list. - - size_t idx = src_size; - int next_indent = iv.last_indent; - while (idx--) { - if (src.at(idx) == L'\n') { - bool empty_middle_line = idx + 1 < src_size && src.at(idx + 1) == L'\n'; - if (!empty_middle_line) { - indents.at(idx) = next_indent; - } - } else { - next_indent = indents.at(idx); - } - } - // Add an extra level of indentation to continuation lines. - for (size_t idx : iv.line_continuations) { - do { - indents.at(idx)++; - } while (++idx < src_size && src.at(idx) != L'\n'); - } - - return indents; + auto indents = parse_util_compute_indents_ffi(src); + return {indents.begin(), indents.end()}; } /// Append a syntax error to the given error list. diff --git a/src/parse_util.h b/src/parse_util.h index b589e2278..27ff06a86 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -114,47 +114,6 @@ wchar_t parse_util_get_quote_type(const wcstring &cmd, size_t pos); wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote, bool no_tilde = false); -// Visit all of our nodes. When we get a job_list or case_item_list, increment indent while -// visiting its children. -struct IndentVisitor; -struct indent_visitor_t { - indent_visitor_t(const wcstring &src, std::vector &indents); - indent_visitor_t(const indent_visitor_t &) = delete; - indent_visitor_t &operator=(const indent_visitor_t &) = delete; - - int visit(const void *node); - void did_visit(int dec); - -#if INCLUDE_RUST_HEADERS - /// \return whether a maybe_newlines node contains at least one newline. - bool has_newline(const ast::maybe_newlines_t &nls) const; - - void record_line_continuations_until(size_t offset); - - // The one-past-the-last index of the most recently encountered leaf node. - // We use this to populate the indents even if there's no tokens in the range. - size_t last_leaf_end{0}; - - // The last indent which we assigned. - int last_indent{-1}; - - // The source we are indenting. - const wcstring &src; - - // List of indents, which we populate. - std::vector &indents; - - // Initialize our starting indent to -1, as our top-level node is a job list which - // will immediately increment it. - int indent{-1}; - - // List of locations of escaped newline characters. - std::vector line_continuations; - - rust::Box visitor; -#endif -}; - /// Given a string, parse it as fish code and then return the indents. The return value has the same /// size as the string. std::vector parse_util_compute_indents(const wcstring &src);