From 52dcfe11af028c4b36aaedc75de57041c69d3e9c Mon Sep 17 00:00:00 2001 From: Fabian Boehm Date: Thu, 29 Sep 2022 19:27:18 +0200 Subject: [PATCH] Make \x the same as \X Up to now, in normal locales \x was essentially the same as \X, except that it errored if given a value > 0x7f. That's kind of annoying and useless. A subtle change is that `\xHH` now represents the character (if any) encoded by the byte value "HH", so even for values <= 0x7f if that's not the same as the ASCII value we would diverge. I do not believe anyone has ever run fish on a system where that distinction matters. It isn't a thing for UTF-8, it isn't a thing for ASCII, it isn't a thing for UTF-16, it isn't a thing for any extended ASCII scheme - ISO8859-X, it isn't a thing for SHIFT-JIS. I am reasonably certain we are making that same assumption in other places. Fixes #1352 --- doc_src/language.rst | 7 +++---- src/common.cpp | 9 +++------ src/fish_tests.cpp | 12 ------------ src/highlight.cpp | 5 +---- tests/checks/locale.fish | 5 +++++ 5 files changed, 12 insertions(+), 26 deletions(-) diff --git a/doc_src/language.rst b/doc_src/language.rst index 20f9a48c9..719e9f679 100644 --- a/doc_src/language.rst +++ b/doc_src/language.rst @@ -107,11 +107,10 @@ Some characters cannot be written directly on the command line. For these charac - ``\r`` represents the carriage return character. - ``\t`` represents the tab character. - ``\v`` represents the vertical tab character. -- ``\xHH``, where ``HH`` is a hexadecimal number, represents the ASCII character with the specified value. For example, ``\x9`` is the tab character. -- ``\XHH``, where ``HH`` is a hexadecimal number, represents a byte of data with the specified value. If you are using a multibyte encoding, this can be used to enter invalid strings. Only use this if you know what you are doing. -- ``\ooo``, where ``ooo`` is an octal number, represents the ASCII character with the specified value. For example, ``\011`` is the tab character. +- ``\xHH`` or ``\XHH``, where ``HH`` is a hexadecimal number, represents a byte of data with the specified value. For example, ``\x9`` is the tab character. If you are using a multibyte encoding, this can be used to enter invalid strings. Typically fish is run with the ASCII or UTF-8 encoding, so anything up to ``\X7f`` is an ASCII character. +- ``\ooo``, where ``ooo`` is an octal number, represents the ASCII character with the specified value. For example, ``\011`` is the tab character. The highest allowed value is ``\177``. - ``\uXXXX``, where ``XXXX`` is a hexadecimal number, represents the 16-bit Unicode character with the specified value. For example, ``\u9`` is the tab character. -- ``\UXXXXXXXX``, where ``XXXXXXXX`` is a hexadecimal number, represents the 32-bit Unicode character with the specified value. For example, ``\U9`` is the tab character. +- ``\UXXXXXXXX``, where ``XXXXXXXX`` is a hexadecimal number, represents the 32-bit Unicode character with the specified value. For example, ``\U9`` is the tab character. The highest allowed value is \U10FFFF. - ``\cX``, where ``X`` is a letter of the alphabet, represents the control sequence generated by pressing the control key and the specified letter. For example, ``\ci`` is the tab character Some characters have special meaning to the shell. For example, an apostrophe ``'`` disables expansion (see :ref:`Quotes`). To tell the shell to treat these characters literally, escape them with a backslash. For example, the command:: diff --git a/src/common.cpp b/src/common.cpp index 50b63edaf..c9f7d40c6 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1198,11 +1198,7 @@ maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, boo if (0x10FFFF < max_val) max_val = static_cast(0x10FFFF); break; } - case L'x': { - chars = 2; - max_val = ASCII_MAX; - break; - } + case L'x': case L'X': { byte_literal = true; max_val = BYTE_MAX; @@ -1239,7 +1235,8 @@ maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, boo // that are valid on their own, which is true for UTF-8) byte_buff.push_back(static_cast(res)); result_char_or_none = none(); - if (input[in_pos] == L'\\' && input[in_pos + 1] == L'X') { + if (input[in_pos] == L'\\' + && (input[in_pos + 1] == L'X' || input[in_pos + 1] == L'x')) { in_pos++; continue; } diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index bb194127a..c5c8335b9 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -1213,18 +1213,6 @@ static void test_parser() { err(L"Bad command substitution not reported as error"); } - if (!(detect_argument_errors(L"foo\\xFF9") & PARSER_TEST_ERROR)) { - err(L"Bad escape not reported as error"); - } - - if (!(detect_argument_errors(L"foo(echo \\xFF9)") & PARSER_TEST_ERROR)) { - err(L"Bad escape in command substitution not reported as error"); - } - - if (!(detect_argument_errors(L"foo(echo (echo (echo \\xFF9)))") & PARSER_TEST_ERROR)) { - err(L"Bad escape in nested command substitution not reported as error"); - } - if (!detect_errors(L"false & ; and cat")) { err(L"'and' command after background not reported as error"); } diff --git a/src/highlight.cpp b/src/highlight.cpp index d55a17768..89218f84d 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -604,10 +604,7 @@ static void color_string_internal(const wcstring &buffstr, highlight_spec_t base in_pos++; break; } - case L'x': { - in_pos++; - break; - } + case L'x': case L'X': { max_val = BYTE_MAX; in_pos++; diff --git a/tests/checks/locale.fish b/tests/checks/locale.fish index d6f88d089..5f8cf6290 100644 --- a/tests/checks/locale.fish +++ b/tests/checks/locale.fish @@ -97,3 +97,8 @@ math 5 \X2b 5 math 7 \x2b 7 #CHECK: 14 + +echo \xc3\xb6 +# CHECK: ö +echo \Xc3\Xb6 +# CHECK: ö