From bcb1e2ed85f16c402bf5f688d872486dfa816b36 Mon Sep 17 00:00:00 2001 From: Mahmoud Al-Qudsi Date: Fri, 17 May 2024 16:06:08 -0500 Subject: [PATCH] Further optimize unescape_yaml_fish_2_0() This hot function dominates the flamegraphs for the completions thread, and any optimizations are worthwhile. A variety of different approaches were tested and benchmarked against real-world fish-history file inputs and this is the one that won out across all rustc target-cpu variations tried. Benchmarks and code at https://github.com/mqudsi/fish-yaml-unescape-benchmark --- src/history/file.rs | 112 ++++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 52 deletions(-) diff --git a/src/history/file.rs b/src/history/file.rs index ea226f417..8b1c4c621 100644 --- a/src/history/file.rs +++ b/src/history/file.rs @@ -250,43 +250,64 @@ fn escape_yaml_fish_2_0(s: &mut Vec) { replace_all(s, b"\n", b"\\n"); // replace newline with backslash + literal n } -/// This function is called frequently, so it ought to be fast. -fn unescape_yaml_fish_2_0(s: &mut Vec) { - let mut cursor = 0; - while cursor < s.len() { - // Look for a backslash. - let Some(backslash) = s[cursor..].iter().position(|&c| c == b'\\') else { - // No more backslashes - break; - }; - - // Add back the start offset - let backslash = backslash + cursor; - - // Backslash found. Maybe we'll do something about it. - let Some(escaped_char) = s.get(backslash + 1) else { - // Backslash was final character - break; - }; - - match escaped_char { - b'\\' => { - // Two backslashes in a row. Delete the second one. - s.remove(backslash + 1); - } - b'n' => { - // Backslash + n. Replace with a newline. - s.splice(backslash..(backslash + 2), [b'\n']); - } - _ => { - // Unknown backslash escape, keep as-is - } - }; - - // The character at index backslash has now been made whole; start at the next - // character. - cursor = backslash + 1; +#[inline(always)] +/// Unescapes the fish-specific yaml variant, if it requires it. +fn maybe_unescape_yaml_fish_2_0(s: &[u8]) -> Cow<[u8]> { + // This is faster than s.contains(b'\\') and can be auto-vectorized to SIMD. See benchmark note + // on unescape_yaml_fish_2_0(). + if !s + .into_iter() + .copied() + .fold(false, |acc, b| acc | (b == b'\\')) + { + return s.into(); } + unescape_yaml_fish_2_0(s).into() +} + +/// Unescapes the fish-specific yaml variant. Use [`maybe_unescape_yaml_fish_2_0()`] if you're not +/// positive the input contains an escape. + +// This function is called on every input event and shows up heavily in all flamegraphs. +// Various approaches were benchmarked against real-world fish-history files on lines with escapes, +// and this implementation (chunk_loop_box) won out. Make changes with care! +// +// Benchmarks and code: https://github.com/mqudsi/fish-yaml-unescape-benchmark +fn unescape_yaml_fish_2_0(s: &[u8]) -> Vec { + // Safety: we never read from this box; we only write to it then read what we've written. + // Rationale: This function is in a very hot loop and this benchmarks around 8% faster on + // real-world escaped yaml samples from the fish history file. + // + // This is a very long way around of writing `Box::new_uninit_slice(s.len())`, which + // requires the unstablized nightly-only feature new_unit (#63291). It optimizes away. + let mut result: Box<[u8]> = std::iter::repeat_with(std::mem::MaybeUninit::uninit) + .take(s.len()) + .map(|b| unsafe { b.assume_init() }) + .collect(); + let mut chars = s.iter().copied(); + let mut src_idx = 0; + let mut dst_idx = 0; + loop { + // While inspecting the asm reveals the compiler does not elide the bounds check from + // the writes to `result`, benchmarking shows that using `result.get_unchecked_mut()` + // everywhere does not result in a statistically significant improvement to the + // performance of this function. + let to_copy = chars.by_ref().take_while(|b| *b != b'\\').count(); + result[dst_idx..][..to_copy].copy_from_slice(&s[src_idx..][..to_copy]); + dst_idx += to_copy; + + match chars.next() { + Some(b'\\') => result[dst_idx] = b'\\', + Some(b'n') => result[dst_idx] = b'\n', + _ => break, + } + src_idx += to_copy + 2; + dst_idx += 1; + } + + let mut result = Vec::from(result); + result.truncate(dst_idx); + result } /// Read one line, stripping off any newline, returning the number of bytes consumed. @@ -320,23 +341,11 @@ fn extract_prefix_and_unescape_yaml(line: &[u8]) -> Option<(Cow<[u8]>, Cow<[u8]> let value = split.next()?; debug_assert!(split.next().is_none()); - let key: Cow<[u8]> = if key.iter().copied().any(|b| b == b'\\') { - let mut key = key.to_owned(); - unescape_yaml_fish_2_0(&mut key); - key.into() - } else { - key.into() - }; + let key = maybe_unescape_yaml_fish_2_0(key); // Skip a space after the : if necessary. let value = trim_start(value); - let value: Cow<[u8]> = if value.iter().copied().any(|b| b == b'\\') { - let mut value = value.to_owned(); - unescape_yaml_fish_2_0(&mut value); - value.into() - } else { - value.into() - }; + let value = maybe_unescape_yaml_fish_2_0(value); Some((key, value)) } @@ -399,8 +408,7 @@ fn decode_item_fish_2_0(mut data: &[u8]) -> Option { // We're going to consume this line. data = &data[advance..]; - let mut line = line.to_owned(); - unescape_yaml_fish_2_0(&mut line); + let line = maybe_unescape_yaml_fish_2_0(&line); paths.push(str2wcstring(&line)); } }