mirror of
https://github.com/fish-shell/fish-shell.git
synced 2024-11-22 15:37:59 +08:00
Allow underscores as separators in the math
builtin (#8611)
* Implement fish_wcstod_underscores * Add fish_wcstod_underscores unit tests * Switch to using fish_wcstod_underscores in tinyexpr * Add tests for math builtin underscore separator functionality * Add documentation for underscore separators for math builtin * Add a changelog entry for underscore numeric separators
This commit is contained in:
parent
9ada7d9aad
commit
59e50f77bc
|
@ -69,6 +69,7 @@ Scripting improvements
|
|||
- ``$fish_user_paths`` is now automatically deduplicated to fix a common user error of appending to it in config.fish when it is universal (:issue:`8117`). :ref:`fish_add_path <cmd-fish_add_path>` remains the recommended way to add to $PATH.
|
||||
- ``return`` can now be used outside functions. In scripts, it does the same thing as ``exit``. In interactive mode,it sets ``$status`` without exiting (:issue:`8148`).
|
||||
- An oversight prevented all syntax checks from running on commands given to ``fish -c`` (:issue:`8171`). This includes checks such as ``exec`` not being allowed in a pipeline, and ``$$`` not being a valid variable. Generally, another error was generated anyway.
|
||||
- ``math`` now allows you to use underscores as visual separators when writing numbers - for example, ``math 1_000 + 2_000`` returns ``3000`` (:issue:`8496`).
|
||||
- ``fish_indent`` now correctly reformats tokens that end with a backslash followed by a newline (:issue:`8197`).
|
||||
- ``commandline`` gained an ``--is-valid`` option to check if the command line is syntactically valid and complete. This allows basic implementation of transient prompts (:issue:`8142`).
|
||||
- ``commandline`` gained a ``--paging-full-mode`` option to check if the pager is showing all the possible lines (no "7 more rows" message) (:issue:`8485`).
|
||||
|
|
|
@ -64,6 +64,8 @@ Syntax
|
|||
For numbers, ``.`` is always the radix character regardless of locale - ``2.5``, not ``2,5``.
|
||||
Scientific notation (``10e5``) and hexadecimal (``0xFF``) are also available.
|
||||
|
||||
``math`` allows you to use underscores as visual separators for digit grouping. For example, you can write ``1_000_000``, ``0x_89_AB_CD_EF``, and ``1.234_567_e89``.
|
||||
|
||||
Operators
|
||||
---------
|
||||
|
||||
|
|
|
@ -3000,6 +3000,49 @@ static void test_wcstod() {
|
|||
tod_test(L"nope", "nope");
|
||||
}
|
||||
|
||||
static void test_fish_wcstod_underscores() {
|
||||
say(L"Testing fish_wcstod_underscores");
|
||||
|
||||
auto test_case = [](const wchar_t *s, size_t expected_num_consumed) {
|
||||
wchar_t *endptr = nullptr;
|
||||
fish_wcstod_underscores(s, &endptr);
|
||||
size_t num_consumed = (size_t)(endptr - (wchar_t *)s);
|
||||
do_test(expected_num_consumed == num_consumed);
|
||||
};
|
||||
|
||||
test_case(L"123", 3);
|
||||
test_case(L"1_2.3_4.5_6", 7);
|
||||
test_case(L"1_2", 3);
|
||||
test_case(L"1_._2", 5);
|
||||
test_case(L"1__2", 4);
|
||||
test_case(L" 1__2 3__4 ", 5);
|
||||
test_case(L"1_2 3_4", 3);
|
||||
test_case(L" 1", 2);
|
||||
test_case(L" 1_", 3);
|
||||
test_case(L" 1__", 4);
|
||||
test_case(L" 1___", 5);
|
||||
test_case(L" 1___ 2___", 5);
|
||||
test_case(L" _1", 3);
|
||||
test_case(L"1 ", 1);
|
||||
test_case(L"infinity_", 8);
|
||||
test_case(L" -INFINITY", 10);
|
||||
test_case(L"_infinity", 0);
|
||||
test_case(L"nan(0)", 6);
|
||||
test_case(L"nan(0)_", 6);
|
||||
test_case(L"_nan(0)", 0);
|
||||
// We don't strip the underscores in this commented-out test case, and the behavior is
|
||||
// implementation-defined, so we don't actually know how many characters will get consumed. On
|
||||
// macOS the strtod man page only says what happens with an alphanumeric string passed to nan(),
|
||||
// but the strtod consumes all of the characters even if there are underscores.
|
||||
// test_case(L"nan(0_1_2)", 3);
|
||||
test_case(L" _ 1", 0);
|
||||
test_case(L"0x_dead_beef", 12);
|
||||
test_case(L"None", 0);
|
||||
test_case(L" None", 0);
|
||||
test_case(L"Also none", 0);
|
||||
test_case(L" Also none", 0);
|
||||
}
|
||||
|
||||
static void test_dup2s() {
|
||||
using std::make_shared;
|
||||
io_chain_t chain;
|
||||
|
@ -6801,6 +6844,7 @@ static const test_t s_tests[]{
|
|||
{TEST_GROUP("abbreviations"), test_abbreviations},
|
||||
{TEST_GROUP("builtins/test"), test_test},
|
||||
{TEST_GROUP("wcstod"), test_wcstod},
|
||||
{TEST_GROUP("fish_wcstod_underscores"), test_fish_wcstod_underscores},
|
||||
{TEST_GROUP("dup2s"), test_dup2s},
|
||||
{TEST_GROUP("dup2s"), test_dup2s_fd_for_target_fd},
|
||||
{TEST_GROUP("path"), test_path},
|
||||
|
|
|
@ -272,7 +272,7 @@ static void next_token(state *s) {
|
|||
|
||||
/* Try reading a number. */
|
||||
if ((s->next[0] >= '0' && s->next[0] <= '9') || s->next[0] == '.') {
|
||||
s->value = fish_wcstod(s->next, const_cast<wchar_t **>(&s->next));
|
||||
s->value = fish_wcstod_underscores(s->next, const_cast<wchar_t **>(&s->next));
|
||||
s->type = TOK_NUMBER;
|
||||
} else {
|
||||
/* Look for a function call. */
|
||||
|
|
|
@ -18,9 +18,11 @@
|
|||
#include <unistd.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <cwchar>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
|
@ -711,6 +713,63 @@ double fish_wcstod(const wchar_t *str, wchar_t **endptr) {
|
|||
return std::wcstod(str, endptr);
|
||||
}
|
||||
|
||||
/// Like wcstod(), but allows underscore separators. Leading, trailing, and multiple underscores are
|
||||
/// allowed, as are underscores next to decimal (.), exponent (E/e/P/p), and hexadecimal (X/x)
|
||||
/// delimiters. This consumes trailing underscores -- endptr will point past the last underscore
|
||||
/// which is legal to include in a parse (according to the above rules). Free-floating leading
|
||||
/// underscores ("_ 3") are not allowed and will result in a no-parse. Underscores are not allowed
|
||||
/// before or inside of "infinity" or "nan" input. Trailing underscores after "infinity" or "nan"
|
||||
/// are not consumed.
|
||||
double fish_wcstod_underscores(const wchar_t *str, wchar_t **endptr) {
|
||||
const wchar_t *orig = str;
|
||||
while (iswspace(*str)) str++; // Skip leading whitespace.
|
||||
size_t leading_whitespace = size_t(str - orig);
|
||||
auto is_sign = [](wchar_t c) { return c == L'+' || c == L'-'; };
|
||||
auto is_inf_or_nan_char = [](wchar_t c) {
|
||||
return c == L'i' || c == L'I' || c == L'n' || c == L'N';
|
||||
};
|
||||
// We don't do any underscore-stripping for infinity/NaN.
|
||||
if (is_inf_or_nan_char(*str) || (is_sign(*str) && is_inf_or_nan_char(*(str + 1)))) {
|
||||
return fish_wcstod(orig, endptr);
|
||||
}
|
||||
// We build a string to pass to the system wcstod, pruned of underscores. We will take all
|
||||
// leading alphanumeric characters that can appear in a strtod numeric literal, dots (.), and
|
||||
// signs (+/-). In order to be more clever, for example to stop earlier in the case of strings
|
||||
// like "123xxxxx", we would need to do a full parse, because sometimes 'a' is a hex digit and
|
||||
// sometimes it is the end of the parse, sometimes a dot '.' is a decimal delimiter and
|
||||
// sometimes it is the end of the valid parse, as in "1_2.3_4.5_6", etc.
|
||||
wcstring pruned;
|
||||
// We keep track of the positions *in the pruned string* where there used to be underscores. We
|
||||
// will pass the pruned version of the input string to the system wcstod, which in turn will
|
||||
// tell us how many characters it consumed. Then we will set our own endptr based on (1) the
|
||||
// number of characters consumed from the pruned string, and (2) how many underscores came
|
||||
// before the last consumed character. The alternative to doing it this way (for example, "only
|
||||
// deleting the correct underscores") would require actually parsing the input string, so that
|
||||
// we can know when to stop grabbing characters and dropping underscores, as in "1_2.3_4.5_6".
|
||||
std::vector<size_t> underscores;
|
||||
// If we wanted to future-proof against a strtod from the future that, say, allows octal
|
||||
// literals using 0o, etc., we could just use iswalnum, instead of iswxdigit and P/p/X/x checks.
|
||||
while (iswxdigit(*str) || *str == L'P' || *str == L'p' || *str == L'X' || *str == L'x' ||
|
||||
is_sign(*str) || *str == L'.' || *str == L'_') {
|
||||
if (*str == L'_') underscores.push_back(pruned.length());
|
||||
else pruned.push_back(*str);
|
||||
str++;
|
||||
}
|
||||
const wchar_t *pruned_begin = pruned.c_str();
|
||||
const wchar_t *pruned_end = nullptr;
|
||||
double result = fish_wcstod(pruned_begin, (wchar_t **)(&pruned_end));
|
||||
if (pruned_end == pruned_begin) {
|
||||
if (endptr) *endptr = (wchar_t *)orig;
|
||||
return result;
|
||||
}
|
||||
auto consumed_underscores_end = std::upper_bound(underscores.begin(), underscores.end(),
|
||||
size_t(pruned_end - pruned_begin));
|
||||
size_t num_underscores_consumed = std::distance(underscores.begin(), consumed_underscores_end);
|
||||
if (endptr) *endptr = (wchar_t *)(orig + leading_whitespace + (pruned_end - pruned_begin)
|
||||
+ num_underscores_consumed);
|
||||
return result;
|
||||
}
|
||||
|
||||
file_id_t file_id_t::from_stat(const struct stat &buf) {
|
||||
file_id_t result = {};
|
||||
result.device = buf.st_dev;
|
||||
|
|
|
@ -133,6 +133,7 @@ long long fish_wcstoll(const wchar_t *str, const wchar_t **endptr = nullptr, int
|
|||
unsigned long long fish_wcstoull(const wchar_t *str, const wchar_t **endptr = nullptr,
|
||||
int base = 10);
|
||||
double fish_wcstod(const wchar_t *str, wchar_t **endptr);
|
||||
double fish_wcstod_underscores(const wchar_t *str, wchar_t **endptr);
|
||||
|
||||
/// Class for representing a file's inode. We use this to detect and avoid symlink loops, among
|
||||
/// other things. While an inode / dev pair is sufficient to distinguish co-existing files, Linux
|
||||
|
|
|
@ -261,3 +261,24 @@ math pow 2 x cos'(-pi)', 2
|
|||
math 'ncr(0/0, 1)'
|
||||
# CHECKERR: math: Error: Result is infinite
|
||||
# CHECKERR: 'ncr(0/0, 1)'
|
||||
|
||||
math 0_1
|
||||
# CHECK: 1
|
||||
math 0x0_A
|
||||
# CHECK: 10
|
||||
math 1_000 + 2_000
|
||||
# CHECK: 3000
|
||||
math 1_0_0_0
|
||||
# CHECK: 1000
|
||||
math 0_0.5_0 + 0_1.0_0
|
||||
# CHECK: 1.5
|
||||
math 2e0_0_2
|
||||
# CHECK: 200
|
||||
math -0_0.5_0_0E0_0_3
|
||||
# CHECK: -500
|
||||
math 20e-0_1
|
||||
# CHECK: 2
|
||||
math 0x0_2.0_0_0P0_2
|
||||
# CHECK: 8
|
||||
math -0x8p-0_3
|
||||
# CHECK: -1
|
||||
|
|
Loading…
Reference in New Issue
Block a user