From 84c72f28179aa1f4be2b18f3eb0da085576c6787 Mon Sep 17 00:00:00 2001 From: Mahmoud Al-Qudsi Date: Mon, 7 Sep 2020 19:31:07 -0500 Subject: [PATCH] Add str2wcs optimization for ascii-only inputs This avoids the heavy hit of __gconv_transform_utf8_internal. In the worst case, after `is_ascii` returns the string is guaranteed to be in the CPU cache (assuming realistic input sizes). In the best (and hopefully extremely common) case, the conversion table lookups are completely avoided. In terms of real world gains, simply calling `history` is anywhere from 2x to 3x faster for large history files composed of mostly ascii content under glibc 2.31 on AMD64. --- src/common.cpp | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/common.cpp b/src/common.cpp index 215c0f29a..c4db7013f 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -237,6 +237,33 @@ bool is_windows_subsystem_for_linux() { } #endif // HAVE_BACKTRACE_SYMBOLS +template +inline __attribute__((always_inline)) +constexpr const T * aligned_start(const T *in, uint8_t alignment) { + return (const T *) (((uintptr_t)in + (uintptr_t)(alignment-1)) & ~((uintptr_t) (alignment -1))); +} + +inline __attribute__((always_inline)) +bool is_ascii (const char *in, int len) { + const char *aligned = aligned_start(in, 64); + char bitmask1 = 0; + for (auto ptr = in; ptr < aligned && ptr < (in + len); ++ptr) { + bitmask1 |= *ptr; + } + uint64_t bitmask2 = 0; + for (auto ptr = (const uint64_t *)aligned; (uintptr_t) ptr < (uintptr_t) (in + len); ++ptr) { + bitmask2 |= *ptr; + } + char bitmask3 = 0; + for (auto ptr = std::max(in, (const char *) ((uintptr_t)(in + len) & ~((uintptr_t)(64-1)))); + ptr < (in + len); ++ptr) { + bitmask3 |= *ptr; + } + + return (uint64_t(bitmask1 & 0x80) | uint64_t(bitmask3 & 0x80) + | (bitmask2 & 0x8080808080808080ULL)) == 0ULL; +} + /// Converts the narrow character string \c in into its wide equivalent, and return it. /// /// The string may contain embedded nulls. @@ -251,7 +278,7 @@ static wcstring str2wcs_internal(const char *in, const size_t in_len) { result.reserve(in_len); size_t in_pos = 0; - if (MB_CUR_MAX == 1) { + if (MB_CUR_MAX == 1 || is_ascii(in, in_len)) { // Single-byte locale, all values are legal. while (in_pos < in_len) { result.push_back(static_cast(in[in_pos]));