//! Constants used in the programmatic representation of fish code. use crate::ffi::{fish_wcswidth, fish_wcwidth, wcharz_t}; use crate::tokenizer::variable_assignment_equals_pos; use crate::wchar::prelude::*; use crate::wchar_ffi::{AsWstr, WCharFromFFI, WCharToFFI}; use bitflags::bitflags; use cxx::{type_id, ExternType}; use cxx::{CxxWString, UniquePtr}; pub type SourceOffset = u32; pub const SOURCE_OFFSET_INVALID: usize = SourceOffset::MAX as _; pub const SOURCE_LOCATION_UNKNOWN: usize = usize::MAX; bitflags! { pub struct ParseTreeFlags: u8 { /// attempt to build a "parse tree" no matter what. this may result in a 'forest' of /// disconnected trees. this is intended to be used by syntax highlighting. const CONTINUE_AFTER_ERROR = 1 << 0; /// include comment tokens. const INCLUDE_COMMENTS = 1 << 1; /// indicate that the tokenizer should accept incomplete tokens */ const ACCEPT_INCOMPLETE_TOKENS = 1 << 2; /// indicate that the parser should not generate the terminate token, allowing an 'unfinished' /// tree where some nodes may have no productions. const LEAVE_UNTERMINATED = 1 << 3; /// indicate that the parser should generate job_list entries for blank lines. const SHOW_BLANK_LINES = 1 << 4; /// indicate that extra semis should be generated. const SHOW_EXTRA_SEMIS = 1 << 5; } } bitflags! { #[derive(Default)] pub struct ParserTestErrorBits: u8 { const ERROR = 1; const INCOMPLETE = 2; } } #[cxx::bridge] mod parse_constants_ffi { extern "C++" { include!("wutil.h"); type wcharz_t = super::wcharz_t; } /// A range of source code. #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub struct SourceRange { start: u32, length: u32, } extern "Rust" { #[cxx_name = "end"] fn end_ffi(self: &SourceRange) -> u32; #[cxx_name = "contains_inclusive"] fn contains_inclusive_ffi(self: &SourceRange, loc: u32) -> bool; } #[derive(Clone, Copy, Debug)] pub enum ParseTokenType { invalid = 1, // Terminal types. string, pipe, redirection, background, andand, oror, end, // Special terminal type that means no more tokens forthcoming. terminate, // Very special terminal types that don't appear in the production list. error, tokenizer_error, comment, } #[repr(u8)] #[derive(Clone, Copy, Debug)] pub enum ParseKeyword { // 'none' is not a keyword, it is a sentinel indicating nothing. none, kw_and, kw_begin, kw_builtin, kw_case, kw_command, kw_else, kw_end, kw_exclam, kw_exec, kw_for, kw_function, kw_if, kw_in, kw_not, kw_or, kw_switch, kw_time, kw_while, } // Statement decorations like 'command' or 'exec'. pub enum StatementDecoration { none, command, builtin, exec, } // Parse error code list. pub enum ParseErrorCode { none, // Matching values from enum parser_error. syntax, cmdsubst, generic, // unclassified error types // Tokenizer errors. tokenizer_unterminated_quote, tokenizer_unterminated_subshell, tokenizer_unterminated_slice, tokenizer_unterminated_escape, tokenizer_other, unbalancing_end, // end outside of block unbalancing_else, // else outside of if unbalancing_case, // case outside of switch bare_variable_assignment, // a=b without command andor_in_pipeline, // "and" or "or" after a pipe } struct parse_error_t { text: UniquePtr, code: ParseErrorCode, source_start: usize, source_length: usize, } extern "Rust" { type ParseError; fn code(self: &ParseError) -> ParseErrorCode; fn source_start(self: &ParseError) -> usize; fn text(self: &ParseError) -> UniquePtr; #[cxx_name = "describe"] fn describe_ffi( self: &ParseError, src: &CxxWString, is_interactive: bool, ) -> UniquePtr; #[cxx_name = "describe_with_prefix"] fn describe_with_prefix_ffi( self: &ParseError, src: &CxxWString, prefix: &CxxWString, is_interactive: bool, skip_caret: bool, ) -> UniquePtr; fn describe_with_prefix( self: &parse_error_t, src: &CxxWString, prefix: &CxxWString, is_interactive: bool, skip_caret: bool, ) -> UniquePtr; type ParseErrorListFfi; fn new_parse_error_list() -> Box; #[cxx_name = "offset_source_start"] fn offset_source_start_ffi(self: &mut ParseErrorListFfi, amt: usize); fn size(self: &ParseErrorListFfi) -> usize; fn at(self: &ParseErrorListFfi, offset: usize) -> *const ParseError; fn empty(self: &ParseErrorListFfi) -> bool; fn push_back(self: &mut ParseErrorListFfi, error: &parse_error_t); fn append(self: &mut ParseErrorListFfi, other: *mut ParseErrorListFfi); fn erase(self: &mut ParseErrorListFfi, index: usize); fn clear(self: &mut ParseErrorListFfi); } // The location of a pipeline. pub enum PipelinePosition { none, // not part of a pipeline first, // first command in a pipeline subsequent, // second or further command in a pipeline } } pub use parse_constants_ffi::{ parse_error_t, ParseErrorCode, ParseKeyword, ParseTokenType, PipelinePosition, SourceRange, StatementDecoration, }; impl SourceRange { pub fn new(start: usize, length: usize) -> Self { SourceRange { start: start.try_into().unwrap(), length: length.try_into().unwrap(), } } pub fn start(&self) -> usize { self.start.try_into().unwrap() } pub fn length(&self) -> usize { self.length.try_into().unwrap() } pub fn end(&self) -> usize { self.start .checked_add(self.length) .expect("Overflow") .try_into() .unwrap() } pub fn combine(&self, other: Self) -> Self { let start = std::cmp::min(self.start, other.start); SourceRange { start, length: std::cmp::max(self.end(), other.end()) .checked_sub(start.try_into().unwrap()) .expect("Overflow") .try_into() .unwrap(), } } fn end_ffi(&self) -> u32 { self.start.checked_add(self.length).expect("Overflow") } // \return true if a location is in this range, including one-past-the-end. pub fn contains_inclusive(&self, loc: usize) -> bool { self.start() <= loc && loc - self.start() <= self.length() } fn contains_inclusive_ffi(&self, loc: u32) -> bool { self.start <= loc && loc - self.start <= self.length } } impl Default for ParseTokenType { fn default() -> Self { ParseTokenType::invalid } } impl From for &'static wstr { #[widestrs] fn from(token_type: ParseTokenType) -> Self { match token_type { ParseTokenType::comment => "ParseTokenType::comment"L, ParseTokenType::error => "ParseTokenType::error"L, ParseTokenType::tokenizer_error => "ParseTokenType::tokenizer_error"L, ParseTokenType::background => "ParseTokenType::background"L, ParseTokenType::end => "ParseTokenType::end"L, ParseTokenType::pipe => "ParseTokenType::pipe"L, ParseTokenType::redirection => "ParseTokenType::redirection"L, ParseTokenType::string => "ParseTokenType::string"L, ParseTokenType::andand => "ParseTokenType::andand"L, ParseTokenType::oror => "ParseTokenType::oror"L, ParseTokenType::terminate => "ParseTokenType::terminate"L, ParseTokenType::invalid => "ParseTokenType::invalid"L, _ => "unknown token type"L, } } } impl Default for ParseKeyword { fn default() -> Self { ParseKeyword::none } } impl From for &'static wstr { #[widestrs] fn from(keyword: ParseKeyword) -> Self { match keyword { ParseKeyword::kw_exclam => "!"L, ParseKeyword::kw_and => "and"L, ParseKeyword::kw_begin => "begin"L, ParseKeyword::kw_builtin => "builtin"L, ParseKeyword::kw_case => "case"L, ParseKeyword::kw_command => "command"L, ParseKeyword::kw_else => "else"L, ParseKeyword::kw_end => "end"L, ParseKeyword::kw_exec => "exec"L, ParseKeyword::kw_for => "for"L, ParseKeyword::kw_function => "function"L, ParseKeyword::kw_if => "if"L, ParseKeyword::kw_in => "in"L, ParseKeyword::kw_not => "not"L, ParseKeyword::kw_or => "or"L, ParseKeyword::kw_switch => "switch"L, ParseKeyword::kw_time => "time"L, ParseKeyword::kw_while => "while"L, _ => "unknown_keyword"L, } } } impl printf_compat::args::ToArg<'static> for ParseKeyword { fn to_arg(self) -> printf_compat::args::Arg<'static> { printf_compat::args::Arg::Str(self.into()) } } impl From<&wstr> for ParseKeyword { #[widestrs] fn from(s: &wstr) -> Self { match s { _ if s == "!"L => ParseKeyword::kw_exclam, _ if s == "and"L => ParseKeyword::kw_and, _ if s == "begin"L => ParseKeyword::kw_begin, _ if s == "builtin"L => ParseKeyword::kw_builtin, _ if s == "case"L => ParseKeyword::kw_case, _ if s == "command"L => ParseKeyword::kw_command, _ if s == "else"L => ParseKeyword::kw_else, _ if s == "end"L => ParseKeyword::kw_end, _ if s == "exec"L => ParseKeyword::kw_exec, _ if s == "for"L => ParseKeyword::kw_for, _ if s == "function"L => ParseKeyword::kw_function, _ if s == "if"L => ParseKeyword::kw_if, _ if s == "in"L => ParseKeyword::kw_in, _ if s == "not"L => ParseKeyword::kw_not, _ if s == "or"L => ParseKeyword::kw_or, _ if s == "switch"L => ParseKeyword::kw_switch, _ if s == "time"L => ParseKeyword::kw_time, _ if s == "while"L => ParseKeyword::kw_while, _ => ParseKeyword::none, } } } impl Default for ParseErrorCode { fn default() -> Self { ParseErrorCode::none } } #[derive(Clone, Default)] pub struct ParseError { /// Text of the error. pub text: WString, /// Code for the error. pub code: ParseErrorCode, /// Offset and length of the token in the source code that triggered this error. pub source_start: usize, pub source_length: usize, } impl ParseError { /// Return a string describing the error, suitable for presentation to the user. If /// is_interactive is true, the offending line with a caret is printed as well. pub fn describe(self: &ParseError, src: &wstr, is_interactive: bool) -> WString { self.describe_with_prefix(src, L!(""), is_interactive, false) } /// Return a string describing the error, suitable for presentation to the user, with the given /// prefix. If skip_caret is false, the offending line with a caret is printed as well. pub fn describe_with_prefix( self: &ParseError, src: &wstr, prefix: &wstr, is_interactive: bool, skip_caret: bool, ) -> WString { let mut result = prefix.to_owned(); // Some errors don't have their message passed in, so we construct them here. // This affects e.g. `eval "a=(foo)"` match self.code { ParseErrorCode::andor_in_pipeline => { let context = wstr::from_char_slice( &src.as_char_slice()[self.source_start..self.source_start + self.source_length], ); result += wstr::from_char_slice( wgettext_fmt!(INVALID_PIPELINE_CMD_ERR_MSG, context).as_char_slice(), ); } ParseErrorCode::bare_variable_assignment => { let context = wstr::from_char_slice( &src.as_char_slice()[self.source_start..self.source_start + self.source_length], ); let assignment_src = context; #[allow(clippy::explicit_auto_deref)] let equals_pos = variable_assignment_equals_pos(assignment_src).unwrap(); let variable = &assignment_src[..equals_pos]; let value = &assignment_src[equals_pos + 1..]; result += wstr::from_char_slice( wgettext_fmt!(ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, variable, value) .as_char_slice(), ); } _ => { if skip_caret && self.text.is_empty() { return L!("").to_owned(); } result += wstr::from_char_slice(self.text.as_char_slice()); } } let mut start = self.source_start; let mut len = self.source_length; if start >= src.len() { // If we are past the source, we clamp it to the end. start = src.len() - 1; len = 0; } if start + len > src.len() { len = src.len() - self.source_start; } if skip_caret { return result; } // Locate the beginning of this line of source. let mut line_start = 0; // Look for a newline prior to source_start. If we don't find one, start at the beginning of // the string; otherwise start one past the newline. Note that source_start may itself point // at a newline; we want to find the newline before it. if start > 0 { let prefix = &src.as_char_slice()[..start]; let newline_left_of_start = prefix.iter().rev().position(|c| *c == '\n'); if let Some(left_of_start) = newline_left_of_start { line_start = start - left_of_start; } } // Look for the newline after the source range. If the source range itself includes a // newline, that's the one we want, so start just before the end of the range. let last_char_in_range = if len == 0 { start } else { start + len - 1 }; let line_end = src.as_char_slice()[last_char_in_range..] .iter() .position(|c| *c == '\n') .map(|pos| pos + last_char_in_range) .unwrap_or(src.len()); assert!(line_end >= line_start); assert!(start >= line_start); // Don't include the caret and line if we're interactive and this is the first line, because // then it's obvious. let interactive_skip_caret = is_interactive && start == 0; if interactive_skip_caret { return result; } // Append the line of text. if !result.is_empty() { result += "\n"; } result += wstr::from_char_slice(&src.as_char_slice()[line_start..line_end]); // Append the caret line. The input source may include tabs; for that reason we // construct a "caret line" that has tabs in corresponding positions. let mut caret_space_line = WString::new(); caret_space_line.reserve(start - line_start); for i in line_start..start { let wc = src.as_char_slice()[i]; if wc == '\t' { caret_space_line += "\t"; } else if wc == '\n' { // It's possible that the start points at a newline itself. In that case, // pretend it's a space. We only expect this to be at the end of the string. caret_space_line += " "; } else { let width = fish_wcwidth(wc.into()).0; if width > 0 { caret_space_line += " ".repeat(width as usize).as_str(); } } } result += "\n"; result += wstr::from_char_slice(caret_space_line.as_char_slice()); result += "^"; if len > 1 { // Add a squiggle under the error location. // We do it like this // ^~~^ // With a "^" under the start and end, and squiggles in-between. let width = fish_wcswidth(unsafe { src.as_ptr().add(start) }, len).0; if width >= 2 { // Subtract one for each of the carets - this is important in case // the starting char has a width of > 1. result += "~".repeat(width as usize - 2).as_str(); result += "^"; } } result } } impl From<&parse_error_t> for ParseError { fn from(error: &parse_error_t) -> Self { ParseError { text: error.text.from_ffi(), code: error.code, source_start: error.source_start, source_length: error.source_length, } } } impl parse_error_t { fn describe_with_prefix( self: &parse_error_t, src: &CxxWString, prefix: &CxxWString, is_interactive: bool, skip_caret: bool, ) -> UniquePtr { ParseError::from(self).describe_with_prefix_ffi(src, prefix, is_interactive, skip_caret) } } impl ParseError { fn code(&self) -> ParseErrorCode { self.code } fn source_start(&self) -> usize { self.source_start } fn text(&self) -> UniquePtr { self.text.to_ffi() } fn describe_ffi( self: &ParseError, src: &CxxWString, is_interactive: bool, ) -> UniquePtr { self.describe(src.as_wstr(), is_interactive).to_ffi() } fn describe_with_prefix_ffi( self: &ParseError, src: &CxxWString, prefix: &CxxWString, is_interactive: bool, skip_caret: bool, ) -> UniquePtr { self.describe_with_prefix(src.as_wstr(), prefix.as_wstr(), is_interactive, skip_caret) .to_ffi() } } #[widestrs] pub fn token_type_user_presentable_description( type_: ParseTokenType, keyword: ParseKeyword, ) -> WString { if keyword != ParseKeyword::none { return sprintf!("keyword: '%ls'"L, Into::<&'static wstr>::into(keyword)); } match type_ { ParseTokenType::string => "a string"L.to_owned(), ParseTokenType::pipe => "a pipe"L.to_owned(), ParseTokenType::redirection => "a redirection"L.to_owned(), ParseTokenType::background => "a '&'"L.to_owned(), ParseTokenType::andand => "'&&'"L.to_owned(), ParseTokenType::oror => "'||'"L.to_owned(), ParseTokenType::end => "end of the statement"L.to_owned(), ParseTokenType::terminate => "end of the input"L.to_owned(), ParseTokenType::error => "a parse error"L.to_owned(), ParseTokenType::tokenizer_error => "an incomplete token"L.to_owned(), ParseTokenType::comment => "a comment"L.to_owned(), _ => sprintf!("a %ls"L, Into::<&'static wstr>::into(type_)), } } pub type ParseErrorList = Vec; #[derive(Clone)] pub struct ParseErrorListFfi(pub ParseErrorList); unsafe impl ExternType for ParseErrorListFfi { type Id = type_id!("ParseErrorListFfi"); type Kind = cxx::kind::Opaque; } /// Helper function to offset error positions by the given amount. This is used when determining /// errors in a substring of a larger source buffer. pub fn parse_error_offset_source_start(errors: &mut ParseErrorList, amt: usize) { if amt > 0 { for ref mut error in errors.iter_mut() { // Preserve the special meaning of -1 as 'unknown'. if error.source_start != SOURCE_LOCATION_UNKNOWN { error.source_start += amt; } } } } fn new_parse_error_list() -> Box { Box::new(ParseErrorListFfi(Vec::new())) } impl ParseErrorListFfi { fn offset_source_start_ffi(&mut self, amt: usize) { parse_error_offset_source_start(&mut self.0, amt) } fn size(&self) -> usize { self.0.len() } fn at(&self, offset: usize) -> *const ParseError { &self.0[offset] } fn empty(&self) -> bool { self.0.is_empty() } fn push_back(&mut self, error: &parse_error_t) { self.0.push(error.into()) } fn append(&mut self, other: *mut ParseErrorListFfi) { self.0.append(&mut (unsafe { &*other }.0.clone())); } fn erase(&mut self, index: usize) { self.0.remove(index); } fn clear(&mut self) { self.0.clear() } } /// Maximum number of function calls. pub const FISH_MAX_STACK_DEPTH: usize = 128; /// Maximum number of nested string substitutions (in lieu of evals) /// Reduced under TSAN: our CI test creates 500 jobs and this is very slow with TSAN. #[cfg(feature = "FISH_TSAN_WORKAROUNDS")] pub const FISH_MAX_EVAL_DEPTH: usize = 250; #[cfg(not(feature = "FISH_TSAN_WORKAROUNDS"))] pub const FISH_MAX_EVAL_DEPTH: usize = 500; /// Error message on a function that calls itself immediately. pub const INFINITE_FUNC_RECURSION_ERR_MSG: &str = "The function '%ls' calls itself immediately, which would result in an infinite loop."; /// Error message on reaching maximum call stack depth. pub const CALL_STACK_LIMIT_EXCEEDED_ERR_MSG: &str = "The call stack limit has been exceeded. Do you have an accidental infinite loop?"; /// Error message when encountering an unknown builtin name. pub const UNKNOWN_BUILTIN_ERR_MSG: &str = "Unknown builtin '%ls'"; /// Error message when encountering a failed expansion, e.g. for the variable name in for loops. pub const FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG: &str = "Unable to expand variable name '%ls'"; /// Error message when encountering an illegal file descriptor. pub const ILLEGAL_FD_ERR_MSG: &str = "Illegal file descriptor in redirection '%ls'"; /// Error message for wildcards with no matches. pub const WILDCARD_ERR_MSG: &str = "No matches for wildcard '%ls'. See `help wildcards-globbing`."; /// Error when using break outside of loop. pub const INVALID_BREAK_ERR_MSG: &str = "'break' while not inside of loop"; /// Error when using continue outside of loop. pub const INVALID_CONTINUE_ERR_MSG: &str = "'continue' while not inside of loop"; /// Error message when a command may not be in a pipeline. pub const INVALID_PIPELINE_CMD_ERR_MSG: &str = "The '%ls' command can not be used in a pipeline"; // Error messages. The number is a reminder of how many format specifiers are contained. /// Error for $^. pub const ERROR_BAD_VAR_CHAR1: &str = "$%lc is not a valid variable in fish."; /// Error for ${a}. pub const ERROR_BRACKETED_VARIABLE1: &str = "Variables cannot be bracketed. In fish, please use {$%ls}."; /// Error for "${a}". pub const ERROR_BRACKETED_VARIABLE_QUOTED1: &str = "Variables cannot be bracketed. In fish, please use \"$%ls\"."; /// Error issued on $?. pub const ERROR_NOT_STATUS: &str = "$? is not the exit status. In fish, please use $status."; /// Error issued on $$. pub const ERROR_NOT_PID: &str = "$$ is not the pid. In fish, please use $fish_pid."; /// Error issued on $#. pub const ERROR_NOT_ARGV_COUNT: &str = "$# is not supported. In fish, please use 'count $argv'."; /// Error issued on $@. pub const ERROR_NOT_ARGV_AT: &str = "$@ is not supported. In fish, please use $argv."; /// Error issued on $*. pub const ERROR_NOT_ARGV_STAR: &str = "$* is not supported. In fish, please use $argv."; /// Error issued on $. pub const ERROR_NO_VAR_NAME: &str = "Expected a variable name after this $."; /// Error message for Posix-style assignment: foo=bar. pub const ERROR_BAD_COMMAND_ASSIGN_ERR_MSG: &str = "Unsupported use of '='. In fish, please use 'set %ls %ls'."; /// Error message for a command like `time foo &`. pub const ERROR_TIME_BACKGROUND: &str = "'time' is not supported for background jobs. Consider using 'command time'."; /// Error issued on { echo; echo }. pub const ERROR_NO_BRACE_GROUPING: &str = "'{ ... }' is not supported for grouping commands. Please use 'begin; ...; end'";