Report errors on invalid replacements in string replace

If the replacement in `string replace` is invalid, prior to this fix we would enter into an infinite loop trying to parse it. Instead report errors correctly. Fixes #3381
2025-03-15 23:22:53 +08:00 · 2018-09-27 22:28:39 -04:00 · 2018-09-27 22:28:39 -04:00 · a1728d61af
commit a1728d61af
parent ae54b66799
4 changed files with 64 additions and 65 deletions
--- a/src/builtin_string.cpp
+++ b/src/builtin_string.cpp
@ -133,7 +133,7 @@ class arg_iterator_t {
        }
    }
 };
-}
+}  // namespace

 // This is used by the string subcommands to communicate with the option parser which flags are
 // valid and get the result of parsing the command for flags.
@ -411,32 +411,23 @@ static wcstring construct_short_opts(options_t *opts) {  //!OCLINT(high npath co

 // Note that several long flags share the same short flag. That is okay. The caller is expected
 // to indicate that a max of one of the long flags sharing a short flag is valid.
-static const struct woption long_options[] = {{L"all", no_argument, NULL, 'a'},
-                                              {L"chars", required_argument, NULL, 'c'},
-                                              {L"count", required_argument, NULL, 'n'},
-                                              {L"entire", no_argument, NULL, 'e'},
-                                              {L"filter", no_argument, NULL, 'f'},
-                                              {L"ignore-case", no_argument, NULL, 'i'},
-                                              {L"index", no_argument, NULL, 'n'},
-                                              {L"invert", no_argument, NULL, 'v'},
-                                              {L"left", no_argument, NULL, 'l'},
-                                              {L"length", required_argument, NULL, 'l'},
-                                              {L"max", required_argument, NULL, 'm'},
-                                              {L"no-empty", no_argument, NULL, 'n'},
-                                              {L"no-newline", no_argument, NULL, 'N'},
-                                              {L"no-quoted", no_argument, NULL, 'n'},
-                                              {L"quiet", no_argument, NULL, 'q'},
-                                              {L"regex", no_argument, NULL, 'r'},
-                                              {L"right", no_argument, NULL, 'r'},
-                                              {L"start", required_argument, NULL, 's'},
-                                              {L"style", required_argument, NULL, 1},
-                                              {NULL, 0, NULL, 0}};
+static const struct woption long_options[] = {
+    {L"all", no_argument, NULL, 'a'},         {L"chars", required_argument, NULL, 'c'},
+    {L"count", required_argument, NULL, 'n'}, {L"entire", no_argument, NULL, 'e'},
+    {L"filter", no_argument, NULL, 'f'},      {L"ignore-case", no_argument, NULL, 'i'},
+    {L"index", no_argument, NULL, 'n'},       {L"invert", no_argument, NULL, 'v'},
+    {L"left", no_argument, NULL, 'l'},        {L"length", required_argument, NULL, 'l'},
+    {L"max", required_argument, NULL, 'm'},   {L"no-empty", no_argument, NULL, 'n'},
+    {L"no-newline", no_argument, NULL, 'N'},  {L"no-quoted", no_argument, NULL, 'n'},
+    {L"quiet", no_argument, NULL, 'q'},       {L"regex", no_argument, NULL, 'r'},
+    {L"right", no_argument, NULL, 'r'},       {L"start", required_argument, NULL, 's'},
+    {L"style", required_argument, NULL, 1},   {NULL, 0, NULL, 0}};

 static std::unordered_map<char, decltype(*handle_flag_N)> flag_to_function = {
    {'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e},
    {'f', handle_flag_f}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m},
    {'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r}, {'s', handle_flag_s},
-    {'v', handle_flag_v}, {1, handle_flag_1} };
+    {'v', handle_flag_v}, {1, handle_flag_1}};

 /// Parse the arguments for flags recognized by a specific string subcommand.
 static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, wchar_t **argv,
@ -689,8 +680,9 @@ struct compiled_regex_t {
        int err_code = 0;
        PCRE2_SIZE err_offset = 0;

-        code = pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
-                             options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0);
+        code =
+            pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
+                          options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0);
        if (code == 0) {
            string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), argv0,
                         pcre2_strerror(err_code).c_str());
@ -789,7 +781,7 @@ class pcre2_matcher_t : public string_matcher_t {
        // See pcre2demo.c for an explanation of this logic.
        PCRE2_SIZE arglen = arg.length();
        int rc = report_match(
-                              arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0));
+            arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0));
        if (rc < 0) {  // pcre2 match error.
            return false;
        } else if (rc == 0) {  // no match
@ -815,8 +807,8 @@ class pcre2_matcher_t : public string_matcher_t {
                options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
            }

-            rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset, options,
-                                               regex.match, 0));
+            rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset,
+                                               options, regex.match, 0));
            if (rc < 0) {
                return false;
            }
@ -955,7 +947,8 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
        const wchar_t *cur = arg.c_str();
        const wchar_t *end = cur + arg.size();
        while (cur < end) {
-            if ((opts.all || !replacement_occurred) && cmp_func(cur, pattern.c_str(), patlen) == 0) {
+            if ((opts.all || !replacement_occurred) &&
+                cmp_func(cur, pattern.c_str(), patlen) == 0) {
                result += replacement;
                cur += patlen;
                replacement_occurred = true;
@ -978,7 +971,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
 /// A return value of true means all is well (even if no replacements were performed), false
 /// indicates an unrecoverable error.
 bool regex_replacer_t::replace_matches(const wcstring &arg) {
-    if (!regex.code) return false;  // pcre2_compile() failed
+    if (!regex.code) return false;   // pcre2_compile() failed
    if (!replacement) return false;  // replacement was an invalid string

    uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED |
@ -1076,9 +1069,11 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc
    arg_iterator_t aiter(argv, optind, streams, is_split0);
    while (const wcstring *arg = aiter.nextstr()) {
        if (opts.right) {
-            split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max, opts.no_empty);
+            split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max,
+                        opts.no_empty);
        } else {
-            split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max, opts.no_empty);
+            split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max,
+                        opts.no_empty);
        }
        arg_count++;
    }
@ -1247,7 +1242,8 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, wchar_
 }

 // A helper function for lower and upper.
-static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv, std::wint_t (*func)(std::wint_t)) {
+static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv,
+                            std::wint_t (*func)(std::wint_t)) {
    options_t opts;
    opts.quiet_valid = true;
    int optind;
--- a/src/common.cpp
+++ b/src/common.cpp
@ -180,8 +180,7 @@ show_stackframe(const wchar_t msg_level, int frame_count, int skip_levels) {

 #else   // HAVE_BACKTRACE_SYMBOLS

-void __attribute__((noinline))
-show_stackframe(const wchar_t msg_level, int, int) {
+void __attribute__((noinline)) show_stackframe(const wchar_t msg_level, int, int) {
    debug_shared(msg_level, L"Sorry, but your system does not support backtraces");
 }
 #endif  // HAVE_BACKTRACE_SYMBOLS
@ -331,8 +330,8 @@ char *wcs2str(const wchar_t *in) {
    // Here we probably allocate a buffer probably much larger than necessary.
    char *out = (char *)malloc(MAX_UTF8_BYTES * wcslen(in) + 1);
    assert(out);
-    //Instead of returning the return value of wcs2str_internal, return `out` directly.
-    //This eliminates false warnings in coverity about resource leaks.
+    // Instead of returning the return value of wcs2str_internal, return `out` directly.
+    // This eliminates false warnings in coverity about resource leaks.
    wcs2str_internal(in, out);
    return out;
 }
@ -521,18 +520,16 @@ void fish_setlocale() {
    if (can_be_encoded(L'\u2026')) {
        ellipsis_char = L'\u2026';
        ellipsis_str = L"\u2026";
-    }
-    else {
-        ellipsis_char = L'$'; // "horizontal ellipsis"
+    } else {
+        ellipsis_char = L'$';  // "horizontal ellipsis"
        ellipsis_str = L"...";
    }
    if (is_windows_subsystem_for_linux()) {
-        //neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
-        //they can be *encoded* just fine. Use alternative glyphs.
+        // neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
+        // they can be *encoded* just fine. Use alternative glyphs.
        omitted_newline_char = can_be_encoded(L'\u00b6') ? L'\u00b6' : L'~';   // "pilcrow"
        obfuscation_read_char = can_be_encoded(L'\u2022') ? L'\u2022' : L'*';  // "bullet"
-    }
-    else {
+    } else {
        omitted_newline_char = can_be_encoded(L'\u23CE') ? L'\u23CE' : L'~';   // "return"
        obfuscation_read_char = can_be_encoded(L'\u25CF') ? L'\u25CF' : L'#';  // "black circle"
    }
@ -590,7 +587,6 @@ bool should_suppress_stderr_for_tests() {
    return program_name && !wcscmp(program_name, TESTS_PROGRAM_NAME);
 }

-
 static void debug_shared(const wchar_t level, const wcstring &msg) {
    pid_t current_pid = getpid();

@ -805,7 +801,7 @@ wcstring reformat_for_screen(const wcstring &msg) {

 /// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
 static void escape_string_url(const wcstring &in, wcstring &out) {
-    for (auto& c1 : in) {
+    for (auto &c1 : in) {
        // This silliness is so we get the correct result whether chars are signed or unsigned.
        unsigned int c2 = (unsigned int)c1 & 0xFF;
        if (!(c2 & 0x80) &&
@ -1027,7 +1023,8 @@ static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring
                case L';':
                case L'"':
                case L'~': {
-                    bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) || (c == L'?' && no_qmark);
+                    bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) ||
+                                          (c == L'?' && no_qmark);
                    if (!char_is_normal) {
                        need_escape = 1;
                        if (escape_all) out += L'\\';
@ -1346,7 +1343,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
    int brace_count = 0;

    bool errored = false;
-    enum { mode_unquoted, mode_single_quotes, mode_double_quotes, mode_braces } mode = mode_unquoted;
+    enum {
+        mode_unquoted,
+        mode_single_quotes,
+        mode_double_quotes,
+        mode_braces
+    } mode = mode_unquoted;

    for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
        const wchar_t c = input[input_position];
@ -1418,7 +1420,8 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
                        // We can't parse them properly, but it shouldn't hurt,
                        // so we don't assert here.
                        // See #4954.
-                        // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here");
+                        // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
+                        // shouldn't be able to get here");
                        brace_count--;
                        brace_text_start = brace_text_start && brace_count > 0;
                        to_append_or_none = BRACE_END;
@ -1748,10 +1751,11 @@ bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &valu
           value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0;
 }

-bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value) {
+bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
+                                             const wcstring &value) {
    size_t suffix_size = proposed_suffix.size();
-    return suffix_size <= value.size() &&
-           wcsncasecmp(value.c_str() + (value.size() - suffix_size), proposed_suffix.c_str(), suffix_size) == 0;
+    return suffix_size <= value.size() && wcsncasecmp(value.c_str() + (value.size() - suffix_size),
+                                                      proposed_suffix.c_str(), suffix_size) == 0;
 }

 /// Returns true if seq, represented as a subsequence, is contained within string.
@ -2061,9 +2065,7 @@ void setup_fork_guards() {
    initial_pid = getpid();
 }

-void save_term_foreground_process_group() {
-    initial_fg_process_group = tcgetpgrp(STDIN_FILENO);
-}
+void save_term_foreground_process_group() { initial_fg_process_group = tcgetpgrp(STDIN_FILENO); }

 void restore_term_foreground_process_group() {
    if (initial_fg_process_group == -1) return;
--- a/src/common.h
+++ b/src/common.h
@ -22,9 +22,9 @@
 #include <mutex>
 #include <sstream>
 #include <string>
+#include <tuple>
 #include <type_traits>
 #include <unordered_map>
-#include <tuple>
 #include <vector>

 #include "fallback.h"  // IWYU pragma: keep
@ -187,7 +187,8 @@ extern struct termios shell_modes;
 /// The character to use where the text has been truncated. Is an ellipsis on unicode system and a $
 /// on other systems.
 extern wchar_t ellipsis_char;
-/// The character or string to use where text has been truncated (ellipsis if possible, otherwise ...)
+/// The character or string to use where text has been truncated (ellipsis if possible, otherwise
+/// ...)
 extern const wchar_t *ellipsis_str;

 /// Character representing an omitted newline at the end of text.
@ -328,7 +329,8 @@ bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value
 /// Test if a string is a suffix of another.
 bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value);
 bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value);
-bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value);
+bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
+                                             const wcstring &value);

 /// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.
 bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix,
@ -534,7 +536,7 @@ class null_terminated_array_t {
    void operator=(null_terminated_array_t rhs);
    null_terminated_array_t(const null_terminated_array_t &);

-    typedef std::vector<std::basic_string<CharType_t> > string_list_t;
+    typedef std::vector<std::basic_string<CharType_t>> string_list_t;

    size_t size() const {
        size_t len = 0;
@ -882,7 +884,6 @@ struct enum_map {
    const wchar_t *const str;
 };

-
 /// Given a string return the matching enum. Return the sentinal enum if no match is made. The map
 /// must be sorted by the `str` member. A binary search is twice as fast as a linear search with 16
 /// elements in the map.
@ -916,13 +917,13 @@ static const wchar_t *enum_to_str(T enum_val, const enum_map<T> map[]) {
    return NULL;
 };

-template<typename... Args>
+template <typename... Args>
 using tuple_list = std::vector<std::tuple<Args...>>;

-//Given a container mapping one X to many Y, return a list of {X,Y}
-template<typename X, typename Y>
+// Given a container mapping one X to many Y, return a list of {X,Y}
+template <typename X, typename Y>
 inline tuple_list<X, Y> flatten(const std::unordered_map<X, std::vector<Y>> &list) {
-    tuple_list<X, Y> results(list.size() * 1.5); //just a guess as to the initial size
+    tuple_list<X, Y> results(list.size() * 1.5);  // just a guess as to the initial size
    for (auto &kv : list) {
        for (auto &v : kv.second) {
            results.emplace_back(std::make_tuple(kv.first, v));
@ -1004,7 +1005,7 @@ struct hash<const wcstring> {
        return hasher((wcstring)w);
    }
 };
-}
+}  // namespace std
 #endif

 #endif