Add option to speed up string match/replace with --max-matches

I've often needed a way to get the last bit of performance out of unwieldy
completions that involve a lot of string processing (apt completions come to
mind, and I ran into it just now with parsing man pages for kldload
completions).

Since many times we are looking for just one exact string in the haystack, an
easy optimization here is to introduce a way for `string match` or `string
replace` to early exit after a specific number of matches (typically one) have
been found.

Depending on the size of the input, this can be a huge boon. For example,
parsing the description from FreeBSD kernel module man pages with

    zcat /usr/share/man/man4/zfs.4.gz | string match -m1 '.Nd *'

runs 35% faster with -m1 than without, while processing all files under
/usr/share/man/man4/*.4.gz in a loop (so a mix of files ranging from very short
to moderately long) runs about 10% faster overall with -m1.
This commit is contained in:
Mahmoud Al-Qudsi 2024-06-24 16:21:32 -05:00
parent 204663f1b8
commit 93f8385448
2 changed files with 54 additions and 6 deletions

View File

@ -1,6 +1,7 @@
use fish_printf::sprintf;
use pcre2::utf32::{Captures, Regex, RegexBuilder};
use std::collections::HashMap;
use std::num::NonZeroUsize;
use super::*;
use crate::env::{EnvMode, EnvVar, EnvVarFlags};
@ -19,6 +20,7 @@ pub struct Match<'args> {
regex: bool,
index: bool,
pattern: &'args wstr,
max_matches: Option<NonZeroUsize>,
}
impl<'args> StringSubCommand<'args> for Match<'args> {
@ -31,10 +33,11 @@ impl<'args> StringSubCommand<'args> for Match<'args> {
wopt(L!("quiet"), NoArgument, 'q'),
wopt(L!("regex"), NoArgument, 'r'),
wopt(L!("index"), NoArgument, 'n'),
wopt(L!("max-matches"), RequiredArgument, 'm'),
];
const SHORT_OPTIONS: &'static wstr = L!(":aegivqrn");
const SHORT_OPTIONS: &'static wstr = L!(":aegivqrnm:");
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
fn parse_opt(&mut self, _n: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
match c {
'a' => self.all = true,
'e' => self.entire = true,
@ -44,6 +47,22 @@ impl<'args> StringSubCommand<'args> for Match<'args> {
'q' => self.quiet = true,
'r' => self.regex = true,
'n' => self.index = true,
'm' => {
self.max_matches = {
let arg = arg.expect("Option -m requires a non-zero argument");
let max = fish_wcstoul(arg)
.ok()
.and_then(|v| NonZeroUsize::new(v as usize))
.ok_or_else(|| {
StringError::InvalidArgs(wgettext_fmt!(
"%ls: Invalid max matches value '%ls'\n",
_n,
arg
))
})?;
Some(max)
}
}
_ => return Err(StringError::UnknownOption),
}
return Ok(());
@ -113,13 +132,15 @@ impl<'args> StringSubCommand<'args> for Match<'args> {
if let Err(e) = matcher.report_matches(arg.as_ref(), streams) {
FLOG!(error, "pcre2_match unexpected error:", e.error_message())
}
if self.quiet && matcher.match_count() > 0 {
let match_count = matcher.match_count();
if self.quiet && match_count > 0
|| self.max_matches.is_some_and(|m| m.get() == match_count)
{
break;
}
}
let match_count = matcher.match_count();
if let StringMatcher::Regex(RegexMatcher {
first_match_captures,
..

View File

@ -1,3 +1,5 @@
use std::num::NonZeroUsize;
use pcre2::utf32::{Regex, RegexBuilder};
use super::*;
@ -12,6 +14,7 @@ pub struct Replace<'args> {
regex: bool,
pattern: &'args wstr,
replacement: &'args wstr,
max_matches: Option<NonZeroUsize>,
}
impl<'args> StringSubCommand<'args> for Replace<'args> {
@ -21,16 +24,33 @@ impl<'args> StringSubCommand<'args> for Replace<'args> {
wopt(L!("ignore-case"), NoArgument, 'i'),
wopt(L!("quiet"), NoArgument, 'q'),
wopt(L!("regex"), NoArgument, 'r'),
wopt(L!("max-matches"), RequiredArgument, 'm'),
];
const SHORT_OPTIONS: &'static wstr = L!(":afiqr");
const SHORT_OPTIONS: &'static wstr = L!(":afiqrm:");
fn parse_opt(&mut self, _n: &wstr, c: char, _arg: Option<&wstr>) -> Result<(), StringError> {
fn parse_opt(&mut self, _n: &wstr, c: char, arg: Option<&wstr>) -> Result<(), StringError> {
match c {
'a' => self.all = true,
'f' => self.filter = true,
'i' => self.ignore_case = true,
'q' => self.quiet = true,
'r' => self.regex = true,
'm' => {
self.max_matches = {
let arg = arg.expect("Option -m requires a non-zero argument");
let max = fish_wcstoul(arg)
.ok()
.and_then(|v| NonZeroUsize::new(v as usize))
.ok_or_else(|| {
StringError::InvalidArgs(wgettext_fmt!(
"%ls: Invalid max matches value '%ls'\n",
_n,
arg
))
})?;
Some(max)
}
}
_ => return Err(StringError::UnknownOption),
}
return Ok(());
@ -103,6 +123,12 @@ impl<'args> StringSubCommand<'args> for Replace<'args> {
if self.quiet && replace_count > 0 {
return STATUS_CMD_OK;
}
if self
.max_matches
.is_some_and(|max| max.get() == replace_count)
{
return STATUS_CMD_OK;
}
}
if replace_count > 0 {
@ -189,6 +215,7 @@ impl<'args, 'opts> StringReplacer<'args, 'opts> {
opts,
},
};
Ok(r)
}