From 88e0c2137a14220109eaa3bb66c87b583baaa86d Mon Sep 17 00:00:00 2001 From: Victor Song Date: Fri, 10 Mar 2023 21:47:41 -0500 Subject: [PATCH] Added constants for expansions --- fish-rust/src/expand.rs | 39 +++++++++++++++++++++++++++++++++++++++ fish-rust/src/lib.rs | 1 + fish-rust/src/path.rs | 8 ++++++-- fish-rust/src/wchar.rs | 28 +++++++++++++++++++++++++--- 4 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 fish-rust/src/expand.rs diff --git a/fish-rust/src/expand.rs b/fish-rust/src/expand.rs new file mode 100644 index 000000000..1d8e136bf --- /dev/null +++ b/fish-rust/src/expand.rs @@ -0,0 +1,39 @@ +use crate::wchar::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END}; + +/// Private use area characters used in expansions +#[repr(u32)] +pub enum ExpandChars { + /// Character representing a home directory. + HomeDirectory = EXPAND_RESERVED_BASE as u32, + /// Character representing process expansion for %self. + ProcessExpandSelf, + /// Character representing variable expansion. + VariableExpand, + /// Character representing variable expansion into a single element. + VariableExpandSingle, + /// Character representing the start of a bracket expansion. + BraceBegin, + /// Character representing the end of a bracket expansion. + BraceEnd, + /// Character representing separation between two bracket elements. + BraceSep, + /// Character that takes the place of any whitespace within non-quoted text in braces + BraceSpace, + /// Separate subtokens in a token with this character. + InternalSeparator, + /// Character representing an empty variable expansion. Only used transitively while expanding + /// variables. + VariableExpandEmpty, +} + +const _: () = assert!( + EXPAND_RESERVED_END as u32 > ExpandChars::VariableExpandEmpty as u32, + "Characters used in expansions must stay within private use area" +); + +impl From for char { + fn from(val: ExpandChars) -> Self { + // We know this is safe because we limit the the range of this enum + unsafe { char::from_u32_unchecked(val as _) } + } +} diff --git a/fish-rust/src/lib.rs b/fish-rust/src/lib.rs index dd50f7fc5..f5a559aa9 100644 --- a/fish-rust/src/lib.rs +++ b/fish-rust/src/lib.rs @@ -46,6 +46,7 @@ mod builtins; mod env; mod re; +mod expand; mod path; // Don't use `#[cfg(test)]` here to make sure ffi tests are built and tested diff --git a/fish-rust/src/path.rs b/fish-rust/src/path.rs index 417be5272..d1fda9eb5 100644 --- a/fish-rust/src/path.rs +++ b/fish-rust/src/path.rs @@ -1,4 +1,7 @@ -use crate::wchar::{wstr, WExt, WString, L}; +use crate::{ + expand::ExpandChars::HomeDirectory, + wchar::{wstr, WExt, WString, L}, +}; /// If the given path looks like it's relative to the working directory, then prepend that working /// directory. This operates on unescaped paths only (so a ~ means a literal ~). @@ -9,7 +12,8 @@ pub fn path_apply_working_directory(path: &wstr, working_directory: &wstr) -> WS // We're going to make sure that if we want to prepend the wd, that the string has no leading // "/". - let prepend_wd = path.as_char_slice()[0] != '/' && path.as_char_slice()[0] != '\u{FDD0}'; + let prepend_wd = + path.as_char_slice()[0] != '/' && path.as_char_slice()[0] != HomeDirectory.into(); if !prepend_wd { // No need to prepend the wd, so just return the path we were given. diff --git a/fish-rust/src/wchar.rs b/fish-rust/src/wchar.rs index f32a05c94..a01db1782 100644 --- a/fish-rust/src/wchar.rs +++ b/fish-rust/src/wchar.rs @@ -30,6 +30,25 @@ pub use widestring_suffix::widestrs; /// Pull in our extensions. pub use crate::wchar_ext::{CharPrefixSuffix, WExt}; +// Use Unicode "non-characters" for internal characters as much as we can. This +// gives us 32 "characters" for internal use that we can guarantee should not +// appear in our input stream. See http://www.unicode.org/faq/private_use.html. +pub const RESERVED_CHAR_BASE: char = '\u{FDD0}'; +pub const RESERVED_CHAR_END: char = '\u{FDF0}'; +// Split the available non-character values into two ranges to ensure there are +// no conflicts among the places we use these special characters. +pub const EXPAND_RESERVED_BASE: char = RESERVED_CHAR_BASE; +pub const EXPAND_RESERVED_END: char = match char::from_u32(EXPAND_RESERVED_BASE as u32 + 16u32) { + Some(c) => c, + None => panic!("private use codepoint in expansion region should be valid char"), +}; +pub const WILDCARD_RESERVED_BASE: char = EXPAND_RESERVED_END; +pub const WILDCARD_RESERVED_END: char = match char::from_u32(WILDCARD_RESERVED_BASE as u32 + 16u32) +{ + Some(c) => c, + None => panic!("private use codepoint in wildcard region should be valid char"), +}; + // These are in the Unicode private-use range. We really shouldn't use this // range but have little choice in the matter given how our lexer/parser works. // We can't use non-characters for these two ranges because there are only 66 of @@ -42,8 +61,11 @@ pub use crate::wchar_ext::{CharPrefixSuffix, WExt}; // Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know // of at least one use of a codepoint in that range: the Apple symbol (0xF8FF) // on Mac OS X. See http://www.unicode.org/faq/private_use.html. -const ENCODE_DIRECT_BASE: u32 = 0xF600; -const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256; +const ENCODE_DIRECT_BASE: char = '\u{F600}'; +const ENCODE_DIRECT_END: char = match char::from_u32(ENCODE_DIRECT_BASE as u32 + 256) { + Some(c) => c, + None => panic!("private use codepoint in encode direct region should be valid char"), +}; /// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose /// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g. @@ -53,6 +75,6 @@ const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256; /// /// See https://github.com/fish-shell/fish-shell/issues/1894. pub fn wchar_literal_byte(byte: u8) -> char { - char::from_u32(ENCODE_DIRECT_BASE + u32::from(byte)) + char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte)) .expect("private-use codepoint should be valid char") }