mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-02-12 23:39:45 +08:00
Further optimize unescape_yaml_fish_2_0()
This hot function dominates the flamegraphs for the completions thread, and any optimizations are worthwhile. A variety of different approaches were tested and benchmarked against real-world fish-history file inputs and this is the one that won out across all rustc target-cpu variations tried. Benchmarks and code at https://github.com/mqudsi/fish-yaml-unescape-benchmark
This commit is contained in:
parent
9ab1ec2a9e
commit
bcb1e2ed85
|
@ -250,43 +250,64 @@ fn escape_yaml_fish_2_0(s: &mut Vec<u8>) {
|
||||||
replace_all(s, b"\n", b"\\n"); // replace newline with backslash + literal n
|
replace_all(s, b"\n", b"\\n"); // replace newline with backslash + literal n
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This function is called frequently, so it ought to be fast.
|
#[inline(always)]
|
||||||
fn unescape_yaml_fish_2_0(s: &mut Vec<u8>) {
|
/// Unescapes the fish-specific yaml variant, if it requires it.
|
||||||
let mut cursor = 0;
|
fn maybe_unescape_yaml_fish_2_0(s: &[u8]) -> Cow<[u8]> {
|
||||||
while cursor < s.len() {
|
// This is faster than s.contains(b'\\') and can be auto-vectorized to SIMD. See benchmark note
|
||||||
// Look for a backslash.
|
// on unescape_yaml_fish_2_0().
|
||||||
let Some(backslash) = s[cursor..].iter().position(|&c| c == b'\\') else {
|
if !s
|
||||||
// No more backslashes
|
.into_iter()
|
||||||
break;
|
.copied()
|
||||||
};
|
.fold(false, |acc, b| acc | (b == b'\\'))
|
||||||
|
{
|
||||||
// Add back the start offset
|
return s.into();
|
||||||
let backslash = backslash + cursor;
|
|
||||||
|
|
||||||
// Backslash found. Maybe we'll do something about it.
|
|
||||||
let Some(escaped_char) = s.get(backslash + 1) else {
|
|
||||||
// Backslash was final character
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
|
|
||||||
match escaped_char {
|
|
||||||
b'\\' => {
|
|
||||||
// Two backslashes in a row. Delete the second one.
|
|
||||||
s.remove(backslash + 1);
|
|
||||||
}
|
|
||||||
b'n' => {
|
|
||||||
// Backslash + n. Replace with a newline.
|
|
||||||
s.splice(backslash..(backslash + 2), [b'\n']);
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// Unknown backslash escape, keep as-is
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// The character at index backslash has now been made whole; start at the next
|
|
||||||
// character.
|
|
||||||
cursor = backslash + 1;
|
|
||||||
}
|
}
|
||||||
|
unescape_yaml_fish_2_0(s).into()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unescapes the fish-specific yaml variant. Use [`maybe_unescape_yaml_fish_2_0()`] if you're not
|
||||||
|
/// positive the input contains an escape.
|
||||||
|
|
||||||
|
// This function is called on every input event and shows up heavily in all flamegraphs.
|
||||||
|
// Various approaches were benchmarked against real-world fish-history files on lines with escapes,
|
||||||
|
// and this implementation (chunk_loop_box) won out. Make changes with care!
|
||||||
|
//
|
||||||
|
// Benchmarks and code: https://github.com/mqudsi/fish-yaml-unescape-benchmark
|
||||||
|
fn unescape_yaml_fish_2_0(s: &[u8]) -> Vec<u8> {
|
||||||
|
// Safety: we never read from this box; we only write to it then read what we've written.
|
||||||
|
// Rationale: This function is in a very hot loop and this benchmarks around 8% faster on
|
||||||
|
// real-world escaped yaml samples from the fish history file.
|
||||||
|
//
|
||||||
|
// This is a very long way around of writing `Box::new_uninit_slice(s.len())`, which
|
||||||
|
// requires the unstablized nightly-only feature new_unit (#63291). It optimizes away.
|
||||||
|
let mut result: Box<[u8]> = std::iter::repeat_with(std::mem::MaybeUninit::uninit)
|
||||||
|
.take(s.len())
|
||||||
|
.map(|b| unsafe { b.assume_init() })
|
||||||
|
.collect();
|
||||||
|
let mut chars = s.iter().copied();
|
||||||
|
let mut src_idx = 0;
|
||||||
|
let mut dst_idx = 0;
|
||||||
|
loop {
|
||||||
|
// While inspecting the asm reveals the compiler does not elide the bounds check from
|
||||||
|
// the writes to `result`, benchmarking shows that using `result.get_unchecked_mut()`
|
||||||
|
// everywhere does not result in a statistically significant improvement to the
|
||||||
|
// performance of this function.
|
||||||
|
let to_copy = chars.by_ref().take_while(|b| *b != b'\\').count();
|
||||||
|
result[dst_idx..][..to_copy].copy_from_slice(&s[src_idx..][..to_copy]);
|
||||||
|
dst_idx += to_copy;
|
||||||
|
|
||||||
|
match chars.next() {
|
||||||
|
Some(b'\\') => result[dst_idx] = b'\\',
|
||||||
|
Some(b'n') => result[dst_idx] = b'\n',
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
src_idx += to_copy + 2;
|
||||||
|
dst_idx += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut result = Vec::from(result);
|
||||||
|
result.truncate(dst_idx);
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read one line, stripping off any newline, returning the number of bytes consumed.
|
/// Read one line, stripping off any newline, returning the number of bytes consumed.
|
||||||
|
@ -320,23 +341,11 @@ fn extract_prefix_and_unescape_yaml(line: &[u8]) -> Option<(Cow<[u8]>, Cow<[u8]>
|
||||||
let value = split.next()?;
|
let value = split.next()?;
|
||||||
debug_assert!(split.next().is_none());
|
debug_assert!(split.next().is_none());
|
||||||
|
|
||||||
let key: Cow<[u8]> = if key.iter().copied().any(|b| b == b'\\') {
|
let key = maybe_unescape_yaml_fish_2_0(key);
|
||||||
let mut key = key.to_owned();
|
|
||||||
unescape_yaml_fish_2_0(&mut key);
|
|
||||||
key.into()
|
|
||||||
} else {
|
|
||||||
key.into()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Skip a space after the : if necessary.
|
// Skip a space after the : if necessary.
|
||||||
let value = trim_start(value);
|
let value = trim_start(value);
|
||||||
let value: Cow<[u8]> = if value.iter().copied().any(|b| b == b'\\') {
|
let value = maybe_unescape_yaml_fish_2_0(value);
|
||||||
let mut value = value.to_owned();
|
|
||||||
unescape_yaml_fish_2_0(&mut value);
|
|
||||||
value.into()
|
|
||||||
} else {
|
|
||||||
value.into()
|
|
||||||
};
|
|
||||||
|
|
||||||
Some((key, value))
|
Some((key, value))
|
||||||
}
|
}
|
||||||
|
@ -399,8 +408,7 @@ fn decode_item_fish_2_0(mut data: &[u8]) -> Option<HistoryItem> {
|
||||||
// We're going to consume this line.
|
// We're going to consume this line.
|
||||||
data = &data[advance..];
|
data = &data[advance..];
|
||||||
|
|
||||||
let mut line = line.to_owned();
|
let line = maybe_unescape_yaml_fish_2_0(&line);
|
||||||
unescape_yaml_fish_2_0(&mut line);
|
|
||||||
paths.push(str2wcstring(&line));
|
paths.push(str2wcstring(&line));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user