Improve codegen of line_offset_of_character_at_offset

This function is a hotspot, but it has inefficient codegen:

1. For whatever reason, the chars() iterator of wstr is slower
   than that of a slice. Use the slice.

2. Unnecessary overflow checks were preventing vectorization.

Switch to a more optimized implementation.

This improves aliases benchmark time by about 9%.
This commit is contained in:
ridiculousfish 2024-01-13 17:06:35 -08:00
parent e8ebeedfca
commit 4ea222cd34
2 changed files with 28 additions and 9 deletions

View File

@ -48,6 +48,7 @@ use crate::tokenizer::{variable_assignment_equals_pos, PipeOrRedir};
use crate::trace::{trace_if_enabled, trace_if_enabled_with_args};
use crate::wchar::{wstr, WString, L};
use crate::wchar_ext::WExt;
use crate::wcstringutil::count_newlines;
use crate::wildcard::wildcard_match;
use crate::wutil::{wgettext, wgettext_maybe_fmt};
use libc::{c_int, ENOTDIR, EXIT_SUCCESS, STDERR_FILENO, STDOUT_FILENO};
@ -1946,19 +1947,13 @@ impl<'a> ParseExecutionContext {
let mut cached_lineno = self.cached_lineno.borrow_mut();
if offset > cached_lineno.offset {
// Add one for every newline we find in the range [cached_lineno.offset, offset).
// The codegen is substantially better when using a char slice than the char iterator.
let offset = std::cmp::min(offset, src.len());
let i = src[cached_lineno.offset..offset]
.chars()
.filter(|c| *c == '\n')
.count();
cached_lineno.count += i;
cached_lineno.count += count_newlines(&src[cached_lineno.offset..offset]);
cached_lineno.offset = offset;
} else if offset < cached_lineno.offset {
// Subtract one for every newline we find in the range [offset, cached_range.start).
cached_lineno.count -= src[offset..cached_lineno.offset]
.chars()
.filter(|c| *c == '\n')
.count();
cached_lineno.count -= count_newlines(&src[offset..cached_lineno.offset]);
cached_lineno.offset = offset;
}
cached_lineno.count

View File

@ -8,6 +8,20 @@ use crate::libc::MB_CUR_MAX;
use crate::wchar::{decode_byte_from_char, prelude::*};
use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};
/// Return the number of newlines in a string.
pub fn count_newlines(s: &wstr) -> usize {
// This is a performance-sensitive function.
// The native filter().count() produces sub-optimal codegen because of overflow checks,
// which we currently enable in release mode. Implement it more efficiently.
let mut count: usize = 0;
for c in s.as_char_slice() {
if *c == '\n' {
count = count.wrapping_add(1);
}
}
count
}
/// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.
pub fn string_prefixes_string_case_insensitive(proposed_prefix: &wstr, value: &wstr) -> bool {
let prefix_size = proposed_prefix.len();
@ -643,3 +657,13 @@ fn test_line_iterator() {
]
);
}
#[test]
fn test_count_newlines() {
assert_eq!(count_newlines(L!("")), 0);
assert_eq!(count_newlines(L!("foo")), 0);
assert_eq!(count_newlines(L!("foo\nbar")), 1);
assert_eq!(count_newlines(L!("foo\nbar\nbaz")), 2);
assert_eq!(count_newlines(L!("\n")), 1);
assert_eq!(count_newlines(L!("\n\n")), 2);
}