From 2f6ed6183358c301e71a88b76f6005102566db15 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Sat, 27 Apr 2024 16:55:46 +0200 Subject: [PATCH] parse_util_cmdsubst_extent to return an exclusive range Given "1(23)4", this function returns an inclusive range, from the opening to the closing parenthesis. The subcommand is extracted by incrementing the range start and interpreting the result as an exclusive range. This is confusing, especially if we want to add multi-character quotes. Change it to always return the full range (including parentheses) and provide an easy way to access the command string. While at it, switch to returning an enum. This change is perhaps larger and more complex than necessary (sorry) because it is mainly made with multi-character quotes in mind. Let's see if that works out. --- src/expand.rs | 81 ++++++++++------------- src/highlight.rs | 62 ++++++----------- src/parse_constants.rs | 2 +- src/parse_util.rs | 147 +++++++++++++++++++++-------------------- src/reader.rs | 42 ++++++------ 5 files changed, 154 insertions(+), 180 deletions(-) diff --git a/src/expand.rs b/src/expand.rs index b5934c48f..8ebbd03ee 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -19,7 +19,9 @@ use crate::future_feature_flags::{feature_test, FeatureFlag}; use crate::history::{history_session_id, History}; use crate::operation_context::OperationContext; use crate::parse_constants::{ParseError, ParseErrorCode, ParseErrorList, SOURCE_LOCATION_UNKNOWN}; -use crate::parse_util::{parse_util_expand_variable_error, parse_util_locate_cmdsubst_range}; +use crate::parse_util::{ + parse_util_expand_variable_error, parse_util_locate_cmdsubst_range, MaybeParentheses, +}; use crate::path::path_apply_working_directory; use crate::util::wcsfilecmp_glob; use crate::wchar::prelude::*; @@ -922,42 +924,37 @@ pub fn expand_cmdsubst( out: &mut CompletionReceiver, errors: &mut Option<&mut ParseErrorList>, ) -> ExpandResult { - assert!(ctx.has_parser(), "Cannot expand without a parser"); let mut cursor = 0; - let mut paren_begin = 0; - let mut paren_end = 0; - let mut subcmd = L!(""); - let mut is_quoted = false; let mut has_dollar = false; - match parse_util_locate_cmdsubst_range( + let parens = match parse_util_locate_cmdsubst_range( &input, &mut cursor, - Some(&mut subcmd), - &mut paren_begin, - &mut paren_end, false, Some(&mut is_quoted), Some(&mut has_dollar), ) { - -1 => { + MaybeParentheses::Error => { append_syntax_error!(errors, SOURCE_LOCATION_UNKNOWN, "Mismatched parenthesis"); return ExpandResult::make_error(STATUS_EXPAND_ERROR.unwrap()); } - 0 => { + MaybeParentheses::None => { if !out.add(input) { return append_overflow_error(errors, None); } return ExpandResult::ok(); } - 1 => {} - _ => panic!(), - } + MaybeParentheses::CommandSubstitution(parens) => parens, + }; let mut sub_res = vec![]; let job_group = ctx.job_group.clone(); - let subshell_status = - exec_subshell_for_expand(subcmd, ctx.parser(), job_group.as_ref(), &mut sub_res); + let subshell_status = exec_subshell_for_expand( + &input[parens.command()], + ctx.parser(), + job_group.as_ref(), + &mut sub_res, + ); if subshell_status != 0 { // TODO: Ad-hoc switch, how can we enumerate the possible errors more safely? let err = match subshell_status { @@ -1003,12 +1000,12 @@ pub fn expand_cmdsubst( wgettext!("Unknown error while evaluating command substitution") } }; - append_cmdsub_error_formatted!(errors, paren_begin, paren_end, err.to_owned()); + append_cmdsub_error_formatted!(errors, parens.start(), parens.end() - 1, err.to_owned()); return ExpandResult::make_error(subshell_status); } // Expand slices like (cat /var/words)[1] - let mut tail_begin = paren_end + 1; + let mut tail_begin = parens.end(); if input.as_char_slice().get(tail_begin) == Some(&'[') { let mut slice_idx = vec![]; let slice_begin = tail_begin; @@ -1080,9 +1077,10 @@ pub fn expand_cmdsubst( // substitution output into the current expansion results. for tail_item in tail_expand { let mut whole_item = WString::new(); - whole_item - .reserve(paren_begin + 1 + sub_res_joined.len() + 1 + tail_item.completion.len()); - whole_item.push_utfstr(&input[..paren_begin - if has_dollar { 1 } else { 0 }]); + whole_item.reserve( + parens.start() + 1 + sub_res_joined.len() + 1 + tail_item.completion.len(), + ); + whole_item.push_utfstr(&input[..parens.start() - if has_dollar { 1 } else { 0 }]); whole_item.push(INTERNAL_SEPARATOR); whole_item.push_utfstr(&sub_res_joined); whole_item.push(INTERNAL_SEPARATOR); @@ -1099,8 +1097,9 @@ pub fn expand_cmdsubst( let sub_item2 = escape_string(&sub_item, EscapeStringStyle::Script(EscapeFlags::COMMA)); for tail_item in &*tail_expand { let mut whole_item = WString::new(); - whole_item.reserve(paren_begin + 1 + sub_item2.len() + 1 + tail_item.completion.len()); - whole_item.push_utfstr(&input[..paren_begin - if has_dollar { 1 } else { 0 }]); + whole_item + .reserve(parens.start() + 1 + sub_item2.len() + 1 + tail_item.completion.len()); + whole_item.push_utfstr(&input[..parens.start() - if has_dollar { 1 } else { 0 }]); whole_item.push(INTERNAL_SEPARATOR); whole_item.push_utfstr(&sub_item2); whole_item.push(INTERNAL_SEPARATOR); @@ -1320,33 +1319,23 @@ impl<'a, 'b, 'c> Expander<'a, 'b, 'c> { } if self.flags.contains(ExpandFlags::FAIL_ON_CMDSUBST) { let mut cursor = 0; - let mut start = 0; - let mut end = 0; - match parse_util_locate_cmdsubst_range( - &input, - &mut cursor, - None, - &mut start, - &mut end, - true, - None, - None, - ) { - 0 => { + match parse_util_locate_cmdsubst_range(&input, &mut cursor, true, None, None) { + MaybeParentheses::Error => { + return ExpandResult::make_error(STATUS_EXPAND_ERROR.unwrap()); + } + MaybeParentheses::None => { if !out.add(input) { return append_overflow_error(self.errors, None); } return ExpandResult::ok(); } - cmdsub => { - if cmdsub == 1 { - append_cmdsub_error!( - self.errors, - start, - end, - "command substitutions not allowed in command position. Try var=(your-cmd) $var ..." - ); - } + MaybeParentheses::CommandSubstitution(parens) => { + append_cmdsub_error!( + self.errors, + parens.start(), + parens.end()-1, + "command substitutions not allowed in command position. Try var=(your-cmd) $var ..." + ); return ExpandResult::make_error(STATUS_EXPAND_ERROR.unwrap()); } } diff --git a/src/highlight.rs b/src/highlight.rs index 69855633c..9f7609894 100644 --- a/src/highlight.rs +++ b/src/highlight.rs @@ -28,7 +28,9 @@ use crate::output::{parse_color, Outputter}; use crate::parse_constants::{ ParseKeyword, ParseTokenType, ParseTreeFlags, SourceRange, StatementDecoration, }; -use crate::parse_util::{parse_util_locate_cmdsubst_range, parse_util_slice_length}; +use crate::parse_util::{ + parse_util_locate_cmdsubst_range, parse_util_slice_length, MaybeParentheses, +}; use crate::path::{ path_apply_working_directory, path_as_implicit_cd, path_get_cdpath, path_get_path, paths_are_same_file, @@ -1023,42 +1025,27 @@ impl<'s> Highlighter<'s> { // Now do command substitutions. let mut cmdsub_cursor = 0; - let mut cmdsub_start = 0; - let mut cmdsub_end = 0; - let mut cmdsub_contents = L!(""); let mut is_quoted = false; - - while parse_util_locate_cmdsubst_range( + while let MaybeParentheses::CommandSubstitution(parens) = parse_util_locate_cmdsubst_range( arg_str, &mut cmdsub_cursor, - Some(&mut cmdsub_contents), - &mut cmdsub_start, - &mut cmdsub_end, /*accept_incomplete=*/ true, Some(&mut is_quoted), None, - ) > 0 - { - // The cmdsub_start is the open paren. cmdsub_end is either the close paren or the end of - // the string. cmdsub_contents extends from one past cmdsub_start to cmdsub_end. - assert!(cmdsub_end > cmdsub_start); - assert!(cmdsub_end - cmdsub_start - 1 == cmdsub_contents.len()); - - // Found a command substitution. Compute the position of the start and end of the cmdsub - // contents, within our overall src. - let arg_subcmd_start = arg_start + cmdsub_start; - let arg_subcmd_end = arg_start + cmdsub_end; - - // Highlight the parens. The open paren must exist; the closed paren may not if it was + ) { + // Highlight the parens. The open parens must exist; the closed paren may not if it was // incomplete. - assert!(cmdsub_start < arg_str.len()); - self.color_array[arg_subcmd_start] = HighlightSpec::with_fg(HighlightRole::operat); - if arg_subcmd_end < self.buff.len() { - self.color_array[arg_subcmd_end] = HighlightSpec::with_fg(HighlightRole::operat); - } + assert!(parens.start() < arg_str.len()); + self.color_array[arg_start..][parens.opening()] + .fill(HighlightSpec::with_fg(HighlightRole::operat)); + self.color_array[arg_start..][parens.closing()] + .fill(HighlightSpec::with_fg(HighlightRole::operat)); // Highlight it recursively. - let arg_cursor = self.cursor.map(|c| c.wrapping_sub(arg_subcmd_start)); + let arg_cursor = self + .cursor + .map(|c| c.wrapping_sub(arg_start + parens.start())); + let cmdsub_contents = &arg_str[parens.command()]; let mut cmdsub_highlighter = Highlighter::new( cmdsub_contents, arg_cursor, @@ -1070,7 +1057,7 @@ impl<'s> Highlighter<'s> { // Copy out the subcolors back into our array. assert!(subcolors.len() == cmdsub_contents.len()); - self.color_array[arg_subcmd_start + 1..arg_subcmd_end].copy_from_slice(&subcolors); + self.color_array[arg_start..][parens.command()].copy_from_slice(&subcolors); } } // Colors the source range of a node with a given color. @@ -1413,18 +1400,11 @@ impl<'s> Highlighter<'s> { /// \return whether a string contains a command substitution. fn has_cmdsub(src: &wstr) -> bool { let mut cursor = 0; - let mut start = 0; - let mut end = 0; - parse_util_locate_cmdsubst_range( - src, - &mut cursor, - None, - &mut start, - &mut end, - true, - None, - None, - ) != 0 + match parse_util_locate_cmdsubst_range(src, &mut cursor, true, None, None) { + MaybeParentheses::Error => return false, + MaybeParentheses::None => return false, + MaybeParentheses::CommandSubstitution(_) => return true, + } } fn contains_pending_variable(pending_variables: &[&wstr], haystack: &wstr) -> bool { diff --git a/src/parse_constants.rs b/src/parse_constants.rs index 50a4c0820..5deb72413 100644 --- a/src/parse_constants.rs +++ b/src/parse_constants.rs @@ -30,7 +30,7 @@ bitflags! { } bitflags! { - #[derive(Copy, Clone, Default, Eq, PartialEq)] + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] pub struct ParserTestErrorBits: u8 { const ERROR = 1; const INCOMPLETE = 2; diff --git a/src/parse_util.rs b/src/parse_util.rs index 02df38d6c..bf69e0af9 100644 --- a/src/parse_util.rs +++ b/src/parse_util.rs @@ -27,6 +27,7 @@ use crate::wchar::prelude::*; use crate::wcstringutil::count_newlines; use crate::wcstringutil::truncate; use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE}; +use std::ops::Range; use std::{iter, ops}; /// Handles slices: the square brackets in an expression like $foo[5..4] @@ -80,6 +81,37 @@ pub fn parse_util_slice_length(input: &wstr) -> Option { None } +#[derive(Debug, Default, Eq, PartialEq)] +pub struct Parentheses { + range: Range, + num_closing: usize, +} + +impl Parentheses { + pub fn start(&self) -> usize { + self.range.start + } + pub fn end(&self) -> usize { + self.range.end + } + pub fn opening(&self) -> Range { + self.range.start..self.range.start + 1 + } + pub fn closing(&self) -> Range { + self.range.end - self.num_closing..self.range.end + } + pub fn command(&self) -> Range { + self.range.start + 1..self.range.end - self.num_closing + } +} + +#[derive(Eq, PartialEq, Debug)] +pub enum MaybeParentheses { + Error, + None, + CommandSubstitution(Parentheses), +} + /// Alternative API. Iterate over command substitutions. /// /// \param str the string to search for subshells @@ -94,50 +126,32 @@ pub fn parse_util_slice_length(input: &wstr) -> Option { /// \param out_has_dollar whether the command substitution has the optional leading $. /// \return -1 on syntax error, 0 if no subshells exist and 1 on success #[allow(clippy::too_many_arguments)] -pub fn parse_util_locate_cmdsubst_range<'a>( - s: &'a wstr, +pub fn parse_util_locate_cmdsubst_range( + s: &wstr, inout_cursor_offset: &mut usize, - mut out_contents: Option<&mut &'a wstr>, - out_start: &mut usize, - out_end: &mut usize, accept_incomplete: bool, inout_is_quoted: Option<&mut bool>, out_has_dollar: Option<&mut bool>, -) -> i32 { - // Clear the return values. - out_contents.as_mut().map(|s| **s = L!("")); - *out_start = 0; - *out_end = s.len(); - +) -> MaybeParentheses { // Nothing to do if the offset is at or past the end of the string. if *inout_cursor_offset >= s.len() { - return 0; + return MaybeParentheses::None; } // Defer to the wonky version. let ret = parse_util_locate_cmdsub( s, *inout_cursor_offset, - out_start, - out_end, accept_incomplete, inout_is_quoted, out_has_dollar, ); - if ret <= 0 { - return ret; + match &ret { + MaybeParentheses::Error | MaybeParentheses::None => (), + MaybeParentheses::CommandSubstitution(parens) => { + *inout_cursor_offset = parens.end(); + } } - - // Assign the substring to the out_contents. - let interior_begin = *out_start + 1; - out_contents - .as_mut() - .map(|contents| **contents = &s[interior_begin..*out_end]); - - // Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though - // overflow is not likely. - *inout_cursor_offset = 1 + *out_end; - ret } @@ -152,51 +166,46 @@ pub fn parse_util_locate_cmdsubst_range<'a>( /// \param b the end of the searched string pub fn parse_util_cmdsubst_extent(buff: &wstr, cursor: usize) -> ops::Range { // The tightest command substitution found so far. - let mut ap = 0; - let mut bp = buff.len(); + let mut result = 0..buff.len(); let mut pos = 0; loop { - let mut begin = 0; - let mut end = 0; - if parse_util_locate_cmdsub(buff, pos, &mut begin, &mut end, true, None, None) <= 0 { + let parens = match parse_util_locate_cmdsub(buff, pos, true, None, None) { // No subshell found, all done. - break; - } + MaybeParentheses::Error | MaybeParentheses::None => break, + MaybeParentheses::CommandSubstitution(parens) => parens, + }; - if begin < cursor && end >= cursor { + let command = parens.command(); + if command.start <= cursor && command.end >= cursor { // This command substitution surrounds the cursor, so it's a tighter fit. - begin += 1; - ap = begin; - bp = end; + result = command; // pos is where to begin looking for the next one. But if we reached the end there's no // next one. - if begin >= end { + if result.start >= result.end { break; } - pos = begin + 1; - } else if begin >= cursor { + pos = result.start + 1; + } else if cursor < command.start { // This command substitution starts at or after the cursor. Since it was the first // command substitution in the string, we're done. break; } else { // This command substitution ends before the cursor. Skip it. - assert!(end < cursor); - pos = end + 1; + assert!(command.end < cursor); + pos = parens.end(); assert!(pos <= buff.len()); } } - ap..bp + result } fn parse_util_locate_cmdsub( input: &wstr, cursor: usize, - out_start: &mut usize, - out_end: &mut usize, allow_incomplete: bool, mut inout_is_quoted: Option<&mut bool>, mut out_has_dollar: Option<&mut bool>, -) -> i32 { +) -> MaybeParentheses { let input = input.as_char_slice(); let mut escaped = false; @@ -327,21 +336,25 @@ fn parse_util_locate_cmdsub( syntax_error |= paran_count > 0 && !allow_incomplete; if syntax_error { - return -1; + return MaybeParentheses::Error; } let Some(paran_begin) = paran_begin else { - return 0; + return MaybeParentheses::None; }; - *out_start = paran_begin; - *out_end = if paran_count != 0 { + let end = if paran_count != 0 { input.len() } else { - paran_end.unwrap() + paran_end.unwrap() + 1 }; - 1 + let parens = Parentheses { + range: paran_begin..end, + num_closing: if paran_count == 0 { 1 } else { 0 }, + }; + + MaybeParentheses::CommandSubstitution(parens) } /// Find the beginning and end of the process definition under the cursor @@ -1305,58 +1318,52 @@ pub fn parse_util_detect_errors_in_argument( let mut cursor = 0; let mut checked = 0; - let mut subst = L!(""); let mut do_loop = true; let mut is_quoted = false; while do_loop { - let mut paren_begin = 0; - let mut paren_end = 0; let mut has_dollar = false; match parse_util_locate_cmdsubst_range( arg_src, &mut cursor, - Some(&mut subst), - &mut paren_begin, - &mut paren_end, false, Some(&mut is_quoted), Some(&mut has_dollar), ) { - -1 => { + MaybeParentheses::Error => { err |= ParserTestErrorBits::ERROR; append_syntax_error!(out_errors, source_start, 1, "Mismatched parenthesis"); return Err(err); } - 0 => { + MaybeParentheses::None => { do_loop = false; } - 1 => { + MaybeParentheses::CommandSubstitution(parens) => { err |= check_subtoken( checked, - paren_begin - if has_dollar { 1 } else { 0 }, + parens.start() - if has_dollar { 1 } else { 0 }, out_errors, ); - assert!(paren_begin < paren_end, "Parens out of order?"); let mut subst_errors = ParseErrorList::new(); - if let Err(subst_err) = - parse_util_detect_errors(subst, Some(&mut subst_errors), false) - { + if let Err(subst_err) = parse_util_detect_errors( + &arg_src[parens.command()], + Some(&mut subst_errors), + false, + ) { err |= subst_err; } // Our command substitution produced error offsets relative to its source. Tweak the // offsets of the errors in the command substitution to account for both its offset // within the string, and the offset of the node. - let error_offset = paren_begin + 1 + source_start; + let error_offset = parens.start() + 1 + source_start; parse_error_offset_source_start(&mut subst_errors, error_offset); if let Some(ref mut out_errors) = out_errors { out_errors.extend(subst_errors); } - checked = paren_end + 1; + checked = parens.end(); } - _ => panic!("unexpected parse_util_locate_cmdsubst() return value"), } } diff --git a/src/reader.rs b/src/reader.rs index e3cfb6829..19144ec67 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -86,6 +86,7 @@ use crate::pager::{PageRendering, Pager, SelectionMotion}; use crate::parse_constants::SourceRange; use crate::parse_constants::{ParseTreeFlags, ParserTestErrorBits}; use crate::parse_tree::ParsedSource; +use crate::parse_util::MaybeParentheses; use crate::parse_util::SPACES_PER_INDENT; use crate::parse_util::{ parse_util_cmdsubst_extent, parse_util_compute_indents, parse_util_contains_wildcards, @@ -4529,29 +4530,26 @@ fn extract_tokens(s: &wstr) -> Vec { // If we have command subs, then we don't include this token; instead we recurse. let mut has_cmd_subs = false; - let mut cmdsub_contents = L!(""); let mut cmdsub_cursor = range.start(); - let mut cmdsub_start = 0; - let mut cmdsub_end = 0; - while parse_util_locate_cmdsubst_range( - s, - &mut cmdsub_cursor, - Some(&mut cmdsub_contents), - &mut cmdsub_start, - &mut cmdsub_end, - /*accept_incomplete=*/ true, - None, - None, - ) > 0 - { - if cmdsub_start >= range.end() { - break; - } - has_cmd_subs = true; - for mut t in extract_tokens(cmdsub_contents) { - // cmdsub_start is the open paren; the contents start one after it. - t.range.start += u32::try_from(cmdsub_start + 1).unwrap(); - result.push(t); + loop { + match parse_util_locate_cmdsubst_range( + s, + &mut cmdsub_cursor, + /*accept_incomplete=*/ true, + None, + None, + ) { + MaybeParentheses::Error | MaybeParentheses::None => break, + MaybeParentheses::CommandSubstitution(parens) => { + if parens.start() >= range.end() { + break; + } + has_cmd_subs = true; + for mut t in extract_tokens(&s[parens.command()]) { + t.range.start += u32::try_from(parens.command().start).unwrap(); + result.push(t); + } + } } }