From 8dc3982408df2b45d610e3d12d5e4a671ad06cbc Mon Sep 17 00:00:00 2001 From: Fabian Homborg Date: Wed, 13 Oct 2021 21:09:40 +0200 Subject: [PATCH] Always use LC_NUMERIC=C internally (#8204) In most cases, like math, we want C-semantics for floating point numbers. In particular "." needs to be the decimal separator. Instead, we pay the price in printf, which is currently the sole place to output in locale-specific numbers and attempt to read them and C-style ones. --- cmake/ConfigureChecks.cmake | 2 ++ config_cmake.h.in | 3 +++ src/builtin_math.cpp | 11 ----------- src/builtin_printf.cpp | 20 ++++++++++++++++++++ src/env_dispatch.cpp | 6 ++++++ src/wutil.cpp | 25 +++++++++++++++++++++---- src/wutil.h | 3 +++ 7 files changed, 55 insertions(+), 15 deletions(-) diff --git a/cmake/ConfigureChecks.cmake b/cmake/ConfigureChecks.cmake index af7edd37b..9c75ae668 100644 --- a/cmake/ConfigureChecks.cmake +++ b/cmake/ConfigureChecks.cmake @@ -146,6 +146,8 @@ endif() list(APPEND WCSTOD_L_INCLUDES "wchar.h") check_cxx_symbol_exists(wcstod_l "${WCSTOD_L_INCLUDES}" HAVE_WCSTOD_L) +check_cxx_symbol_exists(uselocale "locale.h;xlocale.h" HAVE_USELOCALE) + cmake_push_check_state() set(CMAKE_EXTRA_INCLUDE_FILES termios.h sys/ioctl.h) check_type_size("struct winsize" STRUCT_WINSIZE LANGUAGE CXX) diff --git a/config_cmake.h.in b/config_cmake.h.in index 46a7499f4..73571fc04 100644 --- a/config_cmake.h.in +++ b/config_cmake.h.in @@ -156,6 +156,9 @@ /* Define if xlocale.h is required for locale_t or wide character support */ #cmakedefine HAVE_XLOCALE_H 1 +/* Define if uselocale is available */ +#cmakedefine HAVE_USELOCALE 1 + /* Enable large inode numbers on Mac OS X 10.5. */ #ifndef _DARWIN_USE_64_BIT_INODE # define _DARWIN_USE_64_BIT_INODE 1 diff --git a/src/builtin_math.cpp b/src/builtin_math.cpp index 9c43bc6a7..275639ec8 100644 --- a/src/builtin_math.cpp +++ b/src/builtin_math.cpp @@ -228,15 +228,6 @@ static int evaluate_expression(const wchar_t *cmd, const parser_t &parser, io_st int retval = STATUS_CMD_OK; te_error_t error; - // Switch locale while computing stuff. - // This means that the "." is always the radix character, - // so numbers work the same across locales. - // - // TODO: Technically this is only needed for *output* currently, - // because we already use wcstod_l while computing, - // but we can't have math print numbers that it won't then also read. - char *saved_locale = strdup(setlocale(LC_NUMERIC, nullptr)); - setlocale(LC_NUMERIC, "C"); double v = te_interp(expression.c_str(), &error); if (error.position == 0) { @@ -266,8 +257,6 @@ static int evaluate_expression(const wchar_t *cmd, const parser_t &parser, io_st streams.err.append_format(L"%*ls%ls\n", error.position - 1, L" ", L"^"); retval = STATUS_CMD_ERROR; } - setlocale(LC_NUMERIC, saved_locale); - free(saved_locale); return retval; } diff --git a/src/builtin_printf.cpp b/src/builtin_printf.cpp index 57434225d..51b9566c8 100644 --- a/src/builtin_printf.cpp +++ b/src/builtin_printf.cpp @@ -751,6 +751,19 @@ maybe_t builtin_printf(parser_t &parser, io_streams_t &streams, const wchar return STATUS_INVALID_ARGS; } +#if defined(HAVE_USELOCALE) + // We use a locale-dependent LC_NUMERIC here, + // unlike the rest of fish (which uses LC_NUMERIC=C). + // Because we do output as well as wcstod (which would have wcstod_l), + // we need to set the locale here. + locale_t prev_locale = uselocale(fish_numeric_locale()); +#else + // NetBSD does not have uselocale, + // so the best we can do is setlocale. + auto prev_locale = setlocale(LC_NUMERIC, nullptr); + setlocale(LC_NUMERIC, ""); +#endif + builtin_printf_state_t state(streams); int args_used; const wchar_t *format = argv[0]; @@ -762,5 +775,12 @@ maybe_t builtin_printf(parser_t &parser, io_streams_t &streams, const wchar argc -= args_used; argv += args_used; } while (args_used > 0 && argc > 0 && !state.early_exit); + +#if defined(HAVE_USELOCALE) + uselocale(prev_locale); +#else + setlocale(LC_NUMERIC, prev_locale); +#endif + return state.exit_code; } diff --git a/src/env_dispatch.cpp b/src/env_dispatch.cpp index 404f939bd..08bc5d6a0 100644 --- a/src/env_dispatch.cpp +++ b/src/env_dispatch.cpp @@ -599,6 +599,12 @@ static void init_locale(const environment_t &vars) { FLOGF(env_locale, L"Failed to fix locale"); } } + // We *always* use a C-locale for numbers, + // because we always want "." except for in printf. + setlocale(LC_NUMERIC, "C"); + + // See that we regenerate our special locale for numbers. + fish_invalidate_numeric_locale(); fish_setlocale(); FLOGF(env_locale, L"init_locale() setlocale(): '%s'", locale); diff --git a/src/wutil.cpp b/src/wutil.cpp index 0611742b8..300da2ecb 100644 --- a/src/wutil.cpp +++ b/src/wutil.cpp @@ -539,6 +539,23 @@ locale_t fish_c_locale() { return loc; } +static bool fish_numeric_locale_is_valid = false; + +void fish_invalidate_numeric_locale() { + fish_numeric_locale_is_valid = false; +} + +locale_t fish_numeric_locale() { + // The current locale, except LC_NUMERIC isn't forced to C. + static locale_t loc = 0; + if (!fish_numeric_locale_is_valid) { + if (loc != 0) freelocale(loc); + auto cur = duplocale(LC_GLOBAL_LOCALE); + loc = newlocale(LC_NUMERIC_MASK, "", cur); + fish_numeric_locale_is_valid = true; + } + return loc; +} /// Like fish_wcstol(), but fails on a value outside the range of an int. /// /// This is needed because BSD and GNU implementations differ in several ways that make it really @@ -676,14 +693,14 @@ unsigned long long fish_wcstoull(const wchar_t *str, const wchar_t **endptr, int /// Like wcstod(), but wcstod() is enormously expensive on some platforms so this tries to have a /// fast path. double fish_wcstod(const wchar_t *str, wchar_t **endptr) { + // We can ignore the locale because we use LC_NUMERIC=C! // The "fast path." If we're all ASCII and we fit inline, use strtod(). char narrow[128]; size_t len = std::wcslen(str); size_t len_plus_0 = 1 + len; - auto is_digit = [](wchar_t c) { return '0' <= c && c <= '9'; }; - if (len_plus_0 <= sizeof narrow && std::all_of(str, str + len, is_digit)) { + auto is_ascii = [](wchar_t c) { return 0 <= c && c <= 127; }; + if (len_plus_0 <= sizeof narrow && std::all_of(str, str + len, is_ascii)) { // Fast path. Copy the string into a local buffer and run strtod() on it. - // We can ignore the locale-taking version because we are limited to ASCII digits. std::copy(str, str + len_plus_0, narrow); char *narrow_endptr = nullptr; double ret = strtod(narrow, endptr ? &narrow_endptr : nullptr); @@ -693,7 +710,7 @@ double fish_wcstod(const wchar_t *str, wchar_t **endptr) { } return ret; } - return wcstod_l(str, endptr, fish_c_locale()); + return std::wcstod(str, endptr); } file_id_t file_id_t::from_stat(const struct stat &buf) { diff --git a/src/wutil.h b/src/wutil.h index 21f54d860..07be9b8ef 100644 --- a/src/wutil.h +++ b/src/wutil.h @@ -124,6 +124,9 @@ int fish_wcswidth(const wcstring &str); // returns an immortal locale_t corresponding to the C locale. locale_t fish_c_locale(); +void fish_invalidate_numeric_locale(); +locale_t fish_numeric_locale(); + int fish_wcstoi(const wchar_t *str, const wchar_t **endptr = nullptr, int base = 10); long fish_wcstol(const wchar_t *str, const wchar_t **endptr = nullptr, int base = 10); long long fish_wcstoll(const wchar_t *str, const wchar_t **endptr = nullptr, int base = 10);