fish-shell/fish-rust/src/abbrs.rs
ridiculousfish b8189da011 Use the rust-pcre2 crate for regex
This adds support for our (forked) rust-pcre2 crate.
2023-03-25 17:01:50 -07:00

473 lines
14 KiB
Rust

#![allow(clippy::extra_unused_lifetimes, clippy::needless_lifetimes)]
use std::{
collections::HashSet,
sync::{Arc, Mutex, MutexGuard},
};
use crate::wchar::{wstr, WString};
use crate::{
wchar::L,
wchar_ffi::{WCharFromFFI, WCharToFFI},
};
use cxx::CxxWString;
use once_cell::sync::Lazy;
use crate::abbrs::abbrs_ffi::abbrs_replacer_t;
use crate::parse_constants::SourceRange;
use pcre2::utf32::Regex;
use self::abbrs_ffi::{abbreviation_t, abbrs_position_t, abbrs_replacement_t};
#[cxx::bridge]
mod abbrs_ffi {
extern "C++" {
include!("re.h");
include!("parse_constants.h");
type SourceRange = crate::parse_constants::SourceRange;
}
enum abbrs_position_t {
command,
anywhere,
}
struct abbrs_replacer_t {
replacement: UniquePtr<CxxWString>,
is_function: bool,
set_cursor_marker: UniquePtr<CxxWString>,
has_cursor_marker: bool,
}
struct abbrs_replacement_t {
range: SourceRange,
text: UniquePtr<CxxWString>,
cursor: usize,
has_cursor: bool,
}
struct abbreviation_t {
key: UniquePtr<CxxWString>,
replacement: UniquePtr<CxxWString>,
is_regex: bool,
}
extern "Rust" {
type GlobalAbbrs<'a>;
#[cxx_name = "abbrs_list"]
fn abbrs_list_ffi() -> Vec<abbreviation_t>;
#[cxx_name = "abbrs_match"]
fn abbrs_match_ffi(token: &CxxWString, position: abbrs_position_t)
-> Vec<abbrs_replacer_t>;
#[cxx_name = "abbrs_has_match"]
fn abbrs_has_match_ffi(token: &CxxWString, position: abbrs_position_t) -> bool;
#[cxx_name = "abbrs_replacement_from"]
fn abbrs_replacement_from_ffi(
range: SourceRange,
text: &CxxWString,
set_cursor_marker: &CxxWString,
has_cursor_marker: bool,
) -> abbrs_replacement_t;
#[cxx_name = "abbrs_get_set"]
unsafe fn abbrs_get_set_ffi<'a>() -> Box<GlobalAbbrs<'a>>;
unsafe fn add<'a>(
self: &mut GlobalAbbrs<'_>,
name: &CxxWString,
key: &CxxWString,
replacement: &CxxWString,
position: abbrs_position_t,
from_universal: bool,
);
unsafe fn erase<'a>(self: &mut GlobalAbbrs<'_>, name: &CxxWString);
}
}
static abbrs: Lazy<Arc<Mutex<AbbreviationSet>>> =
Lazy::new(|| Arc::new(Mutex::new(Default::default())));
pub fn with_abbrs<R>(cb: impl FnOnce(&AbbreviationSet) -> R) -> R {
let abbrs_g = abbrs.lock().unwrap();
cb(&abbrs_g)
}
pub fn with_abbrs_mut<R>(cb: impl FnOnce(&mut AbbreviationSet) -> R) -> R {
let mut abbrs_g = abbrs.lock().unwrap();
cb(&mut abbrs_g)
}
/// Controls where in the command line abbreviations may expand.
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum Position {
Command, // expand in command position
Anywhere, // expand in any token
}
impl From<abbrs_position_t> for Position {
fn from(value: abbrs_position_t) -> Self {
match value {
abbrs_position_t::anywhere => Position::Anywhere,
abbrs_position_t::command => Position::Command,
_ => panic!("invalid abbrs_position_t"),
}
}
}
#[derive(Debug)]
pub struct Abbreviation {
// Abbreviation name. This is unique within the abbreviation set.
// This is used as the token to match unless we have a regex.
pub name: WString,
/// The key (recognized token) - either a literal or a regex pattern.
pub key: WString,
/// If set, use this regex to recognize tokens.
/// If unset, the key is to be interpreted literally.
/// Note that the fish interface enforces that regexes match the entire token;
/// we accomplish this by surrounding the regex in ^ and $.
pub regex: Option<Regex>,
/// Replacement string.
pub replacement: WString,
/// If set, the replacement is a function name.
pub replacement_is_function: bool,
/// Expansion position.
pub position: Position,
/// If set, then move the cursor to the first instance of this string in the expansion.
pub set_cursor_marker: Option<WString>,
/// Mark if we came from a universal variable.
pub from_universal: bool,
}
impl Abbreviation {
// Construct from a name, a key which matches a token, a replacement token, a position, and
// whether we are derived from a universal variable.
pub fn new(
name: WString,
key: WString,
replacement: WString,
position: Position,
from_universal: bool,
) -> Self {
Self {
name,
key,
regex: None,
replacement,
replacement_is_function: false,
position,
set_cursor_marker: None,
from_universal,
}
}
// \return true if this is a regex abbreviation.
pub fn is_regex(&self) -> bool {
self.regex.is_some()
}
// \return true if we match a token at a given position.
pub fn matches(&self, token: &wstr, position: Position) -> bool {
if !self.matches_position(position) {
return false;
}
match &self.regex {
Some(r) => r
.is_match(token.as_char_slice())
.expect("regex match should not error"),
None => self.key == token,
}
}
// \return if we expand in a given position.
fn matches_position(&self, position: Position) -> bool {
return self.position == Position::Anywhere || self.position == position;
}
}
/// The result of an abbreviation expansion.
pub struct Replacer {
/// The string to use to replace the incoming token, either literal or as a function name.
replacement: WString,
/// If true, treat 'replacement' as the name of a function.
is_function: bool,
/// If set, the cursor should be moved to the first instance of this string in the expansion.
set_cursor_marker: Option<WString>,
}
impl From<Replacer> for abbrs_replacer_t {
fn from(value: Replacer) -> Self {
let has_cursor_marker = value.set_cursor_marker.is_some();
Self {
replacement: value.replacement.to_ffi(),
is_function: value.is_function,
set_cursor_marker: value.set_cursor_marker.unwrap_or_default().to_ffi(),
has_cursor_marker,
}
}
}
struct Replacement {
/// The original range of the token in the command line.
range: SourceRange,
/// The string to replace with.
text: WString,
/// The new cursor location, or none to use the default.
/// This is relative to the original range.
cursor: Option<usize>,
}
impl Replacement {
/// Construct a replacement from a replacer.
/// The \p range is the range of the text matched by the replacer in the command line.
/// The text is passed in separately as it may be the output of the replacer's function.
fn from(range: SourceRange, mut text: WString, set_cursor_marker: Option<WString>) -> Self {
let mut cursor = None;
if let Some(set_cursor_marker) = set_cursor_marker {
let matched = text
.as_char_slice()
.windows(set_cursor_marker.len())
.position(|w| w == set_cursor_marker.as_char_slice());
if let Some(start) = matched {
text.replace_range(start..(start + set_cursor_marker.len()), L!(""));
cursor = Some(start + range.start as usize)
}
}
Self {
range,
text,
cursor,
}
}
}
#[derive(Default)]
pub struct AbbreviationSet {
/// List of abbreviations, in definition order.
abbrs: Vec<Abbreviation>,
/// Set of used abbrevation names.
/// This is to avoid a linear scan when adding new abbreviations.
used_names: HashSet<WString>,
}
impl AbbreviationSet {
/// \return the list of replacers for an input token, in priority order.
/// The \p position is given to describe where the token was found.
pub fn r#match(&self, token: &wstr, position: Position) -> Vec<Replacer> {
let mut result = vec![];
// Later abbreviations take precedence so walk backwards.
for abbr in self.abbrs.iter().rev() {
if abbr.matches(token, position) {
result.push(Replacer {
replacement: abbr.replacement.clone(),
is_function: abbr.replacement_is_function,
set_cursor_marker: abbr.set_cursor_marker.clone(),
});
}
}
return result;
}
/// \return whether we would have at least one replacer for a given token.
pub fn has_match(&self, token: &wstr, position: Position) -> bool {
self.abbrs.iter().any(|abbr| abbr.matches(token, position))
}
/// Add an abbreviation. Any abbreviation with the same name is replaced.
pub fn add(&mut self, abbr: Abbreviation) {
assert!(!abbr.name.is_empty(), "Invalid name");
let inserted = self.used_names.insert(abbr.name.clone());
if !inserted {
// Name was already used, do a linear scan to find it.
let index = self
.abbrs
.iter()
.position(|a| a.name == abbr.name)
.expect("Abbreviation not found though its name was present");
self.abbrs.remove(index);
}
self.abbrs.push(abbr);
}
/// Rename an abbreviation. This asserts that the old name is used, and the new name is not; the
/// caller should check these beforehand with has_name().
pub fn rename(&mut self, old_name: &wstr, new_name: &wstr) {
let erased = self.used_names.remove(old_name);
let inserted = self.used_names.insert(new_name.to_owned());
assert!(
erased && inserted,
"Old name not found or new name already present"
);
for abbr in self.abbrs.iter_mut() {
if abbr.name == old_name {
abbr.name = new_name.to_owned();
break;
}
}
}
/// Erase an abbreviation by name.
/// \return true if erased, false if not found.
pub fn erase(&mut self, name: &wstr) -> bool {
let erased = self.used_names.remove(name);
if !erased {
return false;
}
for (index, abbr) in self.abbrs.iter().enumerate().rev() {
if abbr.name == name {
self.abbrs.remove(index);
return true;
}
}
panic!("Unable to find named abbreviation");
}
/// \return true if we have an abbreviation with the given name.
pub fn has_name(&self, name: &wstr) -> bool {
self.used_names.contains(name)
}
/// \return a reference to the abbreviation list.
pub fn list(&self) -> &[Abbreviation] {
&self.abbrs
}
}
/// \return the list of replacers for an input token, in priority order, using the global set.
/// The \p position is given to describe where the token was found.
fn abbrs_match_ffi(token: &CxxWString, position: abbrs_position_t) -> Vec<abbrs_replacer_t> {
with_abbrs(|set| set.r#match(&token.from_ffi(), position.into()))
.into_iter()
.map(|r| r.into())
.collect()
}
fn abbrs_has_match_ffi(token: &CxxWString, position: abbrs_position_t) -> bool {
with_abbrs(|set| set.has_match(&token.from_ffi(), position.into()))
}
fn abbrs_list_ffi() -> Vec<abbreviation_t> {
with_abbrs(|set| -> Vec<abbreviation_t> {
let list = set.list();
let mut result = Vec::with_capacity(list.len());
for abbr in list {
result.push(abbreviation_t {
key: abbr.key.to_ffi(),
replacement: abbr.replacement.to_ffi(),
is_regex: abbr.is_regex(),
})
}
result
})
}
fn abbrs_get_set_ffi<'a>() -> Box<GlobalAbbrs<'a>> {
let abbrs_g = abbrs.lock().unwrap();
Box::new(GlobalAbbrs { g: abbrs_g })
}
fn abbrs_replacement_from_ffi(
range: SourceRange,
text: &CxxWString,
set_cursor_marker: &CxxWString,
has_cursor_marker: bool,
) -> abbrs_replacement_t {
let cursor_marker = if has_cursor_marker {
Some(set_cursor_marker.from_ffi())
} else {
None
};
let replacement = Replacement::from(range, text.from_ffi(), cursor_marker);
abbrs_replacement_t {
range,
text: replacement.text.to_ffi(),
cursor: replacement.cursor.unwrap_or_default(),
has_cursor: replacement.cursor.is_some(),
}
}
pub struct GlobalAbbrs<'a> {
g: MutexGuard<'a, AbbreviationSet>,
}
impl<'a> GlobalAbbrs<'a> {
fn add(
&mut self,
name: &CxxWString,
key: &CxxWString,
replacement: &CxxWString,
position: abbrs_position_t,
from_universal: bool,
) {
self.g.add(Abbreviation::new(
name.from_ffi(),
key.from_ffi(),
replacement.from_ffi(),
position.into(),
from_universal,
));
}
fn erase(&mut self, name: &CxxWString) {
self.g.erase(&name.from_ffi());
}
}
use crate::ffi_tests::add_test;
add_test!("rename_abbrs", || {
use crate::wchar::wstr;
use crate::{
abbrs::{Abbreviation, Position},
wchar::L,
};
with_abbrs_mut(|abbrs_g| {
let mut add = |name: &wstr, repl: &wstr, position: Position| {
abbrs_g.add(Abbreviation {
name: name.into(),
key: name.into(),
regex: None,
replacement: repl.into(),
replacement_is_function: false,
position,
set_cursor_marker: None,
from_universal: false,
})
};
add(L!("gc"), L!("git checkout"), Position::Command);
add(L!("foo"), L!("bar"), Position::Command);
add(L!("gx"), L!("git checkout"), Position::Command);
add(L!("yin"), L!("yang"), Position::Anywhere);
assert!(!abbrs_g.has_name(L!("gcc")));
assert!(abbrs_g.has_name(L!("gc")));
abbrs_g.rename(L!("gc"), L!("gcc"));
assert!(abbrs_g.has_name(L!("gcc")));
assert!(!abbrs_g.has_name(L!("gc")));
assert!(!abbrs_g.erase(L!("gc")));
assert!(abbrs_g.erase(L!("gcc")));
assert!(!abbrs_g.erase(L!("gcc")));
})
});