parse_util_cmdsubst_extent to return an exclusive range

Given "1(23)4", this function returns an inclusive range, from the opening
to the closing parenthesis.  The subcommand is extracted by incrementing
the range start and interpreting the result as an exclusive range.

This is confusing, especially if we want to add multi-character quotes.
Change it to always return the full range (including parentheses) and provide
an easy way to access the command string.

While at it, switch to returning an enum.

This change is perhaps larger and more complex than necessary (sorry)
because it is mainly made with multi-character quotes in mind.  Let's see
if that works out.
This commit is contained in:
Johannes Altmanninger 2024-04-27 16:55:46 +02:00
parent 861feb7b46
commit 2f6ed61833
5 changed files with 154 additions and 180 deletions

View File

@ -19,7 +19,9 @@ use crate::future_feature_flags::{feature_test, FeatureFlag};
use crate::history::{history_session_id, History};
use crate::operation_context::OperationContext;
use crate::parse_constants::{ParseError, ParseErrorCode, ParseErrorList, SOURCE_LOCATION_UNKNOWN};
use crate::parse_util::{parse_util_expand_variable_error, parse_util_locate_cmdsubst_range};
use crate::parse_util::{
parse_util_expand_variable_error, parse_util_locate_cmdsubst_range, MaybeParentheses,
};
use crate::path::path_apply_working_directory;
use crate::util::wcsfilecmp_glob;
use crate::wchar::prelude::*;
@ -922,42 +924,37 @@ pub fn expand_cmdsubst(
out: &mut CompletionReceiver,
errors: &mut Option<&mut ParseErrorList>,
) -> ExpandResult {
assert!(ctx.has_parser(), "Cannot expand without a parser");
let mut cursor = 0;
let mut paren_begin = 0;
let mut paren_end = 0;
let mut subcmd = L!("");
let mut is_quoted = false;
let mut has_dollar = false;
match parse_util_locate_cmdsubst_range(
let parens = match parse_util_locate_cmdsubst_range(
&input,
&mut cursor,
Some(&mut subcmd),
&mut paren_begin,
&mut paren_end,
false,
Some(&mut is_quoted),
Some(&mut has_dollar),
) {
-1 => {
MaybeParentheses::Error => {
append_syntax_error!(errors, SOURCE_LOCATION_UNKNOWN, "Mismatched parenthesis");
return ExpandResult::make_error(STATUS_EXPAND_ERROR.unwrap());
}
0 => {
MaybeParentheses::None => {
if !out.add(input) {
return append_overflow_error(errors, None);
}
return ExpandResult::ok();
}
1 => {}
_ => panic!(),
}
MaybeParentheses::CommandSubstitution(parens) => parens,
};
let mut sub_res = vec![];
let job_group = ctx.job_group.clone();
let subshell_status =
exec_subshell_for_expand(subcmd, ctx.parser(), job_group.as_ref(), &mut sub_res);
let subshell_status = exec_subshell_for_expand(
&input[parens.command()],
ctx.parser(),
job_group.as_ref(),
&mut sub_res,
);
if subshell_status != 0 {
// TODO: Ad-hoc switch, how can we enumerate the possible errors more safely?
let err = match subshell_status {
@ -1003,12 +1000,12 @@ pub fn expand_cmdsubst(
wgettext!("Unknown error while evaluating command substitution")
}
};
append_cmdsub_error_formatted!(errors, paren_begin, paren_end, err.to_owned());
append_cmdsub_error_formatted!(errors, parens.start(), parens.end() - 1, err.to_owned());
return ExpandResult::make_error(subshell_status);
}
// Expand slices like (cat /var/words)[1]
let mut tail_begin = paren_end + 1;
let mut tail_begin = parens.end();
if input.as_char_slice().get(tail_begin) == Some(&'[') {
let mut slice_idx = vec![];
let slice_begin = tail_begin;
@ -1080,9 +1077,10 @@ pub fn expand_cmdsubst(
// substitution output into the current expansion results.
for tail_item in tail_expand {
let mut whole_item = WString::new();
whole_item
.reserve(paren_begin + 1 + sub_res_joined.len() + 1 + tail_item.completion.len());
whole_item.push_utfstr(&input[..paren_begin - if has_dollar { 1 } else { 0 }]);
whole_item.reserve(
parens.start() + 1 + sub_res_joined.len() + 1 + tail_item.completion.len(),
);
whole_item.push_utfstr(&input[..parens.start() - if has_dollar { 1 } else { 0 }]);
whole_item.push(INTERNAL_SEPARATOR);
whole_item.push_utfstr(&sub_res_joined);
whole_item.push(INTERNAL_SEPARATOR);
@ -1099,8 +1097,9 @@ pub fn expand_cmdsubst(
let sub_item2 = escape_string(&sub_item, EscapeStringStyle::Script(EscapeFlags::COMMA));
for tail_item in &*tail_expand {
let mut whole_item = WString::new();
whole_item.reserve(paren_begin + 1 + sub_item2.len() + 1 + tail_item.completion.len());
whole_item.push_utfstr(&input[..paren_begin - if has_dollar { 1 } else { 0 }]);
whole_item
.reserve(parens.start() + 1 + sub_item2.len() + 1 + tail_item.completion.len());
whole_item.push_utfstr(&input[..parens.start() - if has_dollar { 1 } else { 0 }]);
whole_item.push(INTERNAL_SEPARATOR);
whole_item.push_utfstr(&sub_item2);
whole_item.push(INTERNAL_SEPARATOR);
@ -1320,33 +1319,23 @@ impl<'a, 'b, 'c> Expander<'a, 'b, 'c> {
}
if self.flags.contains(ExpandFlags::FAIL_ON_CMDSUBST) {
let mut cursor = 0;
let mut start = 0;
let mut end = 0;
match parse_util_locate_cmdsubst_range(
&input,
&mut cursor,
None,
&mut start,
&mut end,
true,
None,
None,
) {
0 => {
match parse_util_locate_cmdsubst_range(&input, &mut cursor, true, None, None) {
MaybeParentheses::Error => {
return ExpandResult::make_error(STATUS_EXPAND_ERROR.unwrap());
}
MaybeParentheses::None => {
if !out.add(input) {
return append_overflow_error(self.errors, None);
}
return ExpandResult::ok();
}
cmdsub => {
if cmdsub == 1 {
MaybeParentheses::CommandSubstitution(parens) => {
append_cmdsub_error!(
self.errors,
start,
end,
parens.start(),
parens.end()-1,
"command substitutions not allowed in command position. Try var=(your-cmd) $var ..."
);
}
return ExpandResult::make_error(STATUS_EXPAND_ERROR.unwrap());
}
}

View File

@ -28,7 +28,9 @@ use crate::output::{parse_color, Outputter};
use crate::parse_constants::{
ParseKeyword, ParseTokenType, ParseTreeFlags, SourceRange, StatementDecoration,
};
use crate::parse_util::{parse_util_locate_cmdsubst_range, parse_util_slice_length};
use crate::parse_util::{
parse_util_locate_cmdsubst_range, parse_util_slice_length, MaybeParentheses,
};
use crate::path::{
path_apply_working_directory, path_as_implicit_cd, path_get_cdpath, path_get_path,
paths_are_same_file,
@ -1023,42 +1025,27 @@ impl<'s> Highlighter<'s> {
// Now do command substitutions.
let mut cmdsub_cursor = 0;
let mut cmdsub_start = 0;
let mut cmdsub_end = 0;
let mut cmdsub_contents = L!("");
let mut is_quoted = false;
while parse_util_locate_cmdsubst_range(
while let MaybeParentheses::CommandSubstitution(parens) = parse_util_locate_cmdsubst_range(
arg_str,
&mut cmdsub_cursor,
Some(&mut cmdsub_contents),
&mut cmdsub_start,
&mut cmdsub_end,
/*accept_incomplete=*/ true,
Some(&mut is_quoted),
None,
) > 0
{
// The cmdsub_start is the open paren. cmdsub_end is either the close paren or the end of
// the string. cmdsub_contents extends from one past cmdsub_start to cmdsub_end.
assert!(cmdsub_end > cmdsub_start);
assert!(cmdsub_end - cmdsub_start - 1 == cmdsub_contents.len());
// Found a command substitution. Compute the position of the start and end of the cmdsub
// contents, within our overall src.
let arg_subcmd_start = arg_start + cmdsub_start;
let arg_subcmd_end = arg_start + cmdsub_end;
// Highlight the parens. The open paren must exist; the closed paren may not if it was
) {
// Highlight the parens. The open parens must exist; the closed paren may not if it was
// incomplete.
assert!(cmdsub_start < arg_str.len());
self.color_array[arg_subcmd_start] = HighlightSpec::with_fg(HighlightRole::operat);
if arg_subcmd_end < self.buff.len() {
self.color_array[arg_subcmd_end] = HighlightSpec::with_fg(HighlightRole::operat);
}
assert!(parens.start() < arg_str.len());
self.color_array[arg_start..][parens.opening()]
.fill(HighlightSpec::with_fg(HighlightRole::operat));
self.color_array[arg_start..][parens.closing()]
.fill(HighlightSpec::with_fg(HighlightRole::operat));
// Highlight it recursively.
let arg_cursor = self.cursor.map(|c| c.wrapping_sub(arg_subcmd_start));
let arg_cursor = self
.cursor
.map(|c| c.wrapping_sub(arg_start + parens.start()));
let cmdsub_contents = &arg_str[parens.command()];
let mut cmdsub_highlighter = Highlighter::new(
cmdsub_contents,
arg_cursor,
@ -1070,7 +1057,7 @@ impl<'s> Highlighter<'s> {
// Copy out the subcolors back into our array.
assert!(subcolors.len() == cmdsub_contents.len());
self.color_array[arg_subcmd_start + 1..arg_subcmd_end].copy_from_slice(&subcolors);
self.color_array[arg_start..][parens.command()].copy_from_slice(&subcolors);
}
}
// Colors the source range of a node with a given color.
@ -1413,18 +1400,11 @@ impl<'s> Highlighter<'s> {
/// \return whether a string contains a command substitution.
fn has_cmdsub(src: &wstr) -> bool {
let mut cursor = 0;
let mut start = 0;
let mut end = 0;
parse_util_locate_cmdsubst_range(
src,
&mut cursor,
None,
&mut start,
&mut end,
true,
None,
None,
) != 0
match parse_util_locate_cmdsubst_range(src, &mut cursor, true, None, None) {
MaybeParentheses::Error => return false,
MaybeParentheses::None => return false,
MaybeParentheses::CommandSubstitution(_) => return true,
}
}
fn contains_pending_variable(pending_variables: &[&wstr], haystack: &wstr) -> bool {

View File

@ -30,7 +30,7 @@ bitflags! {
}
bitflags! {
#[derive(Copy, Clone, Default, Eq, PartialEq)]
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
pub struct ParserTestErrorBits: u8 {
const ERROR = 1;
const INCOMPLETE = 2;

View File

@ -27,6 +27,7 @@ use crate::wchar::prelude::*;
use crate::wcstringutil::count_newlines;
use crate::wcstringutil::truncate;
use crate::wildcard::{ANY_CHAR, ANY_STRING, ANY_STRING_RECURSIVE};
use std::ops::Range;
use std::{iter, ops};
/// Handles slices: the square brackets in an expression like $foo[5..4]
@ -80,6 +81,37 @@ pub fn parse_util_slice_length(input: &wstr) -> Option<usize> {
None
}
#[derive(Debug, Default, Eq, PartialEq)]
pub struct Parentheses {
range: Range<usize>,
num_closing: usize,
}
impl Parentheses {
pub fn start(&self) -> usize {
self.range.start
}
pub fn end(&self) -> usize {
self.range.end
}
pub fn opening(&self) -> Range<usize> {
self.range.start..self.range.start + 1
}
pub fn closing(&self) -> Range<usize> {
self.range.end - self.num_closing..self.range.end
}
pub fn command(&self) -> Range<usize> {
self.range.start + 1..self.range.end - self.num_closing
}
}
#[derive(Eq, PartialEq, Debug)]
pub enum MaybeParentheses {
Error,
None,
CommandSubstitution(Parentheses),
}
/// Alternative API. Iterate over command substitutions.
///
/// \param str the string to search for subshells
@ -94,50 +126,32 @@ pub fn parse_util_slice_length(input: &wstr) -> Option<usize> {
/// \param out_has_dollar whether the command substitution has the optional leading $.
/// \return -1 on syntax error, 0 if no subshells exist and 1 on success
#[allow(clippy::too_many_arguments)]
pub fn parse_util_locate_cmdsubst_range<'a>(
s: &'a wstr,
pub fn parse_util_locate_cmdsubst_range(
s: &wstr,
inout_cursor_offset: &mut usize,
mut out_contents: Option<&mut &'a wstr>,
out_start: &mut usize,
out_end: &mut usize,
accept_incomplete: bool,
inout_is_quoted: Option<&mut bool>,
out_has_dollar: Option<&mut bool>,
) -> i32 {
// Clear the return values.
out_contents.as_mut().map(|s| **s = L!(""));
*out_start = 0;
*out_end = s.len();
) -> MaybeParentheses {
// Nothing to do if the offset is at or past the end of the string.
if *inout_cursor_offset >= s.len() {
return 0;
return MaybeParentheses::None;
}
// Defer to the wonky version.
let ret = parse_util_locate_cmdsub(
s,
*inout_cursor_offset,
out_start,
out_end,
accept_incomplete,
inout_is_quoted,
out_has_dollar,
);
if ret <= 0 {
return ret;
match &ret {
MaybeParentheses::Error | MaybeParentheses::None => (),
MaybeParentheses::CommandSubstitution(parens) => {
*inout_cursor_offset = parens.end();
}
}
// Assign the substring to the out_contents.
let interior_begin = *out_start + 1;
out_contents
.as_mut()
.map(|contents| **contents = &s[interior_begin..*out_end]);
// Update the inout_cursor_offset. Note this may cause it to exceed str.size(), though
// overflow is not likely.
*inout_cursor_offset = 1 + *out_end;
ret
}
@ -152,51 +166,46 @@ pub fn parse_util_locate_cmdsubst_range<'a>(
/// \param b the end of the searched string
pub fn parse_util_cmdsubst_extent(buff: &wstr, cursor: usize) -> ops::Range<usize> {
// The tightest command substitution found so far.
let mut ap = 0;
let mut bp = buff.len();
let mut result = 0..buff.len();
let mut pos = 0;
loop {
let mut begin = 0;
let mut end = 0;
if parse_util_locate_cmdsub(buff, pos, &mut begin, &mut end, true, None, None) <= 0 {
let parens = match parse_util_locate_cmdsub(buff, pos, true, None, None) {
// No subshell found, all done.
break;
}
MaybeParentheses::Error | MaybeParentheses::None => break,
MaybeParentheses::CommandSubstitution(parens) => parens,
};
if begin < cursor && end >= cursor {
let command = parens.command();
if command.start <= cursor && command.end >= cursor {
// This command substitution surrounds the cursor, so it's a tighter fit.
begin += 1;
ap = begin;
bp = end;
result = command;
// pos is where to begin looking for the next one. But if we reached the end there's no
// next one.
if begin >= end {
if result.start >= result.end {
break;
}
pos = begin + 1;
} else if begin >= cursor {
pos = result.start + 1;
} else if cursor < command.start {
// This command substitution starts at or after the cursor. Since it was the first
// command substitution in the string, we're done.
break;
} else {
// This command substitution ends before the cursor. Skip it.
assert!(end < cursor);
pos = end + 1;
assert!(command.end < cursor);
pos = parens.end();
assert!(pos <= buff.len());
}
}
ap..bp
result
}
fn parse_util_locate_cmdsub(
input: &wstr,
cursor: usize,
out_start: &mut usize,
out_end: &mut usize,
allow_incomplete: bool,
mut inout_is_quoted: Option<&mut bool>,
mut out_has_dollar: Option<&mut bool>,
) -> i32 {
) -> MaybeParentheses {
let input = input.as_char_slice();
let mut escaped = false;
@ -327,21 +336,25 @@ fn parse_util_locate_cmdsub(
syntax_error |= paran_count > 0 && !allow_incomplete;
if syntax_error {
return -1;
return MaybeParentheses::Error;
}
let Some(paran_begin) = paran_begin else {
return 0;
return MaybeParentheses::None;
};
*out_start = paran_begin;
*out_end = if paran_count != 0 {
let end = if paran_count != 0 {
input.len()
} else {
paran_end.unwrap()
paran_end.unwrap() + 1
};
1
let parens = Parentheses {
range: paran_begin..end,
num_closing: if paran_count == 0 { 1 } else { 0 },
};
MaybeParentheses::CommandSubstitution(parens)
}
/// Find the beginning and end of the process definition under the cursor
@ -1305,58 +1318,52 @@ pub fn parse_util_detect_errors_in_argument(
let mut cursor = 0;
let mut checked = 0;
let mut subst = L!("");
let mut do_loop = true;
let mut is_quoted = false;
while do_loop {
let mut paren_begin = 0;
let mut paren_end = 0;
let mut has_dollar = false;
match parse_util_locate_cmdsubst_range(
arg_src,
&mut cursor,
Some(&mut subst),
&mut paren_begin,
&mut paren_end,
false,
Some(&mut is_quoted),
Some(&mut has_dollar),
) {
-1 => {
MaybeParentheses::Error => {
err |= ParserTestErrorBits::ERROR;
append_syntax_error!(out_errors, source_start, 1, "Mismatched parenthesis");
return Err(err);
}
0 => {
MaybeParentheses::None => {
do_loop = false;
}
1 => {
MaybeParentheses::CommandSubstitution(parens) => {
err |= check_subtoken(
checked,
paren_begin - if has_dollar { 1 } else { 0 },
parens.start() - if has_dollar { 1 } else { 0 },
out_errors,
);
assert!(paren_begin < paren_end, "Parens out of order?");
let mut subst_errors = ParseErrorList::new();
if let Err(subst_err) =
parse_util_detect_errors(subst, Some(&mut subst_errors), false)
{
if let Err(subst_err) = parse_util_detect_errors(
&arg_src[parens.command()],
Some(&mut subst_errors),
false,
) {
err |= subst_err;
}
// Our command substitution produced error offsets relative to its source. Tweak the
// offsets of the errors in the command substitution to account for both its offset
// within the string, and the offset of the node.
let error_offset = paren_begin + 1 + source_start;
let error_offset = parens.start() + 1 + source_start;
parse_error_offset_source_start(&mut subst_errors, error_offset);
if let Some(ref mut out_errors) = out_errors {
out_errors.extend(subst_errors);
}
checked = paren_end + 1;
checked = parens.end();
}
_ => panic!("unexpected parse_util_locate_cmdsubst() return value"),
}
}

View File

@ -86,6 +86,7 @@ use crate::pager::{PageRendering, Pager, SelectionMotion};
use crate::parse_constants::SourceRange;
use crate::parse_constants::{ParseTreeFlags, ParserTestErrorBits};
use crate::parse_tree::ParsedSource;
use crate::parse_util::MaybeParentheses;
use crate::parse_util::SPACES_PER_INDENT;
use crate::parse_util::{
parse_util_cmdsubst_extent, parse_util_compute_indents, parse_util_contains_wildcards,
@ -4529,31 +4530,28 @@ fn extract_tokens(s: &wstr) -> Vec<PositionedToken> {
// If we have command subs, then we don't include this token; instead we recurse.
let mut has_cmd_subs = false;
let mut cmdsub_contents = L!("");
let mut cmdsub_cursor = range.start();
let mut cmdsub_start = 0;
let mut cmdsub_end = 0;
while parse_util_locate_cmdsubst_range(
loop {
match parse_util_locate_cmdsubst_range(
s,
&mut cmdsub_cursor,
Some(&mut cmdsub_contents),
&mut cmdsub_start,
&mut cmdsub_end,
/*accept_incomplete=*/ true,
None,
None,
) > 0
{
if cmdsub_start >= range.end() {
) {
MaybeParentheses::Error | MaybeParentheses::None => break,
MaybeParentheses::CommandSubstitution(parens) => {
if parens.start() >= range.end() {
break;
}
has_cmd_subs = true;
for mut t in extract_tokens(cmdsub_contents) {
// cmdsub_start is the open paren; the contents start one after it.
t.range.start += u32::try_from(cmdsub_start + 1).unwrap();
for mut t in extract_tokens(&s[parens.command()]) {
t.range.start += u32::try_from(parens.command().start).unwrap();
result.push(t);
}
}
}
}
if !has_cmd_subs {
// Common case of no command substitutions in this leaf node.