Improve codegen of line_offset_of_character_at_offset

This function is a hotspot, but it has inefficient codegen: 1. For whatever reason, the chars() iterator of wstr is slower than that of a slice. Use the slice. 2. Unnecessary overflow checks were preventing vectorization. Switch to a more optimized implementation. This improves aliases benchmark time by about 9%.
2024-11-23 06:41:42 +08:00 · 2024-01-13 17:06:35 -08:00 · 2024-01-13 17:06:35 -08:00 · 4ea222cd34
commit 4ea222cd34
parent e8ebeedfca
2 changed files with 28 additions and 9 deletions
--- a/src/parse_execution.rs
+++ b/src/parse_execution.rs
@ -48,6 +48,7 @@ use crate::tokenizer::{variable_assignment_equals_pos, PipeOrRedir};
 use crate::trace::{trace_if_enabled, trace_if_enabled_with_args};
 use crate::wchar::{wstr, WString, L};
 use crate::wchar_ext::WExt;
+use crate::wcstringutil::count_newlines;
 use crate::wildcard::wildcard_match;
 use crate::wutil::{wgettext, wgettext_maybe_fmt};
 use libc::{c_int, ENOTDIR, EXIT_SUCCESS, STDERR_FILENO, STDOUT_FILENO};
@ -1946,19 +1947,13 @@ impl<'a> ParseExecutionContext {
        let mut cached_lineno = self.cached_lineno.borrow_mut();
        if offset > cached_lineno.offset {
            // Add one for every newline we find in the range [cached_lineno.offset, offset).
+            // The codegen is substantially better when using a char slice than the char iterator.
            let offset = std::cmp::min(offset, src.len());
-            let i = src[cached_lineno.offset..offset]
-                .chars()
-                .filter(|c| *c == '\n')
-                .count();
-            cached_lineno.count += i;
+            cached_lineno.count += count_newlines(&src[cached_lineno.offset..offset]);
            cached_lineno.offset = offset;
        } else if offset < cached_lineno.offset {
            // Subtract one for every newline we find in the range [offset, cached_range.start).
-            cached_lineno.count -= src[offset..cached_lineno.offset]
-                .chars()
-                .filter(|c| *c == '\n')
-                .count();
+            cached_lineno.count -= count_newlines(&src[offset..cached_lineno.offset]);
            cached_lineno.offset = offset;
        }
        cached_lineno.count
--- a/src/wcstringutil.rs
+++ b/src/wcstringutil.rs
@ -8,6 +8,20 @@ use crate::libc::MB_CUR_MAX;
 use crate::wchar::{decode_byte_from_char, prelude::*};
 use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};

+/// Return the number of newlines in a string.
+pub fn count_newlines(s: &wstr) -> usize {
+    // This is a performance-sensitive function.
+    // The native filter().count() produces sub-optimal codegen because of overflow checks,
+    // which we currently enable in release mode. Implement it more efficiently.
+    let mut count: usize = 0;
+    for c in s.as_char_slice() {
+        if *c == '\n' {
+            count = count.wrapping_add(1);
+        }
+    }
+    count
+}
+
 /// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.
 pub fn string_prefixes_string_case_insensitive(proposed_prefix: &wstr, value: &wstr) -> bool {
    let prefix_size = proposed_prefix.len();
@ -643,3 +657,13 @@ fn test_line_iterator() {
        ]
    );
 }
+
+#[test]
+fn test_count_newlines() {
+    assert_eq!(count_newlines(L!("")), 0);
+    assert_eq!(count_newlines(L!("foo")), 0);
+    assert_eq!(count_newlines(L!("foo\nbar")), 1);
+    assert_eq!(count_newlines(L!("foo\nbar\nbaz")), 2);
+    assert_eq!(count_newlines(L!("\n")), 1);
+    assert_eq!(count_newlines(L!("\n\n")), 2);
+}