From 54a844b08edb3d5ba62a28d2fad3d8ab756c91e6 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 27 Nov 2021 12:46:15 -0800 Subject: [PATCH] Clean up wildcard_has wildcard_has was a "conservative" function which would sometimes falsely report wildcards. Make it exact and add some tests. --- src/common.cpp | 36 ++++++++++-------------------------- src/common.h | 4 ++++ src/complete.cpp | 2 +- src/expand.cpp | 2 +- src/fish_tests.cpp | 24 ++++++++++++++++++++++++ src/wildcard.cpp | 42 ++++++++++++++++++------------------------ src/wildcard.h | 12 +++++++++--- 7 files changed, 67 insertions(+), 55 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index cbf381dda..0d7af786d 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1301,7 +1301,7 @@ maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, boo } /// Returns the unescaped version of input_str into output_str (by reference). Returns true if -/// successful. If false, the contents of output_str are undefined (!). +/// successful. If false, the contents of output_str are unchanged. static bool unescape_string_internal(const wchar_t *const input, const size_t input_len, wcstring *output_str, unescape_flags_t flags) { // Set up result string, which we'll swap with the output on success. @@ -1579,12 +1579,12 @@ bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) { return success; } -bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special, - escape_string_style_t style) { +bool unescape_string(const wchar_t *input, size_t len, wcstring *output, + unescape_flags_t escape_special, escape_string_style_t style) { bool success = false; switch (style) { case STRING_STYLE_SCRIPT: { - success = unescape_string_internal(input, std::wcslen(input), output, escape_special); + success = unescape_string_internal(input, len, output, escape_special); break; } case STRING_STYLE_URL: { @@ -1605,30 +1605,14 @@ bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t es return success; } +bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special, + escape_string_style_t style) { + return unescape_string(input, std::wcslen(input), output, escape_special, style); +} + bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special, escape_string_style_t style) { - bool success = false; - switch (style) { - case STRING_STYLE_SCRIPT: { - success = unescape_string_internal(input.c_str(), input.size(), output, escape_special); - break; - } - case STRING_STYLE_URL: { - success = unescape_string_url(input.c_str(), output); - break; - } - case STRING_STYLE_VAR: { - success = unescape_string_var(input.c_str(), output); - break; - } - case STRING_STYLE_REGEX: { - // unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that - success = false; - break; - } - } - if (!success) output->clear(); - return success; + return unescape_string(input.c_str(), input.size(), output, escape_special, style); } [[gnu::noinline]] void bugreport() { diff --git a/src/common.h b/src/common.h index 91feaa2d7..5b46e3636 100644 --- a/src/common.h +++ b/src/common.h @@ -504,6 +504,10 @@ bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special); bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special, escape_string_style_t style = STRING_STYLE_SCRIPT); +bool unescape_string(const wchar_t *input, size_t len, wcstring *output, + unescape_flags_t escape_special, + escape_string_style_t style = STRING_STYLE_SCRIPT); + bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special, escape_string_style_t style = STRING_STYLE_SCRIPT); diff --git a/src/complete.cpp b/src/complete.cpp index ae9681b61..9bda3c61e 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -594,7 +594,7 @@ void completer_t::complete_cmd_desc(const wcstring &str) { // least two characters if we don't know the location of the whatis-database. if (cmd.length() < 2) return; - if (wildcard_has(cmd, false)) { + if (wildcard_has(cmd)) { return; } diff --git a/src/expand.cpp b/src/expand.cpp index 0bbaf6905..ca5392431 100644 --- a/src/expand.cpp +++ b/src/expand.cpp @@ -1041,7 +1041,7 @@ expand_result_t expander_t::stage_wildcards(wcstring path_to_expand, completion_ expand_result_t result = expand_result_t::ok; remove_internal_separator(&path_to_expand, flags & expand_flag::skip_wildcards); - const bool has_wildcard = wildcard_has(path_to_expand, true /* internal, i.e. ANY_STRING */); + const bool has_wildcard = wildcard_has_internal(path_to_expand); // e.g. ANY_STRING const bool for_completions = flags & expand_flag::for_completions; const bool skip_wildcards = flags & expand_flag::skip_wildcards; diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 2cd7c5051..6d63eba94 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -3147,6 +3147,29 @@ static std::shared_ptr make_test_func_props() { return ret; } +static void test_wildcards() { + say(L"Testing wildcards"); + do_test(!wildcard_has(L"")); + do_test(wildcard_has(L"*")); + do_test(!wildcard_has(L"\\*")); + do_test(!wildcard_has(L"\"*\"")); + + wcstring wc = L"foo*bar"; + do_test(wildcard_has(wc) && !wildcard_has_internal(wc)); + unescape_string_in_place(&wc, UNESCAPE_SPECIAL); + do_test(!wildcard_has(wc) && wildcard_has_internal(wc)); + + auto &feat = mutable_fish_features(); + auto saved = feat.test(features_t::flag_t::qmark_noglob); + feat.set(features_t::flag_t::qmark_noglob, false); + do_test(wildcard_has(L"?")); + do_test(!wildcard_has(L"\\?")); + feat.set(features_t::flag_t::qmark_noglob, true); + do_test(!wildcard_has(L"?")); + do_test(!wildcard_has(L"\\?")); + feat.set(features_t::flag_t::qmark_noglob, saved); +} + static void test_complete() { say(L"Testing complete"); @@ -6765,6 +6788,7 @@ static const test_t s_tests[]{ {TEST_GROUP("word_motion"), test_word_motion}, {TEST_GROUP("is_potential_path"), test_is_potential_path}, {TEST_GROUP("colors"), test_colors}, + {TEST_GROUP("wildcard"), test_wildcards}, {TEST_GROUP("complete"), test_complete}, {TEST_GROUP("autoload"), test_autoload}, {TEST_GROUP("input"), test_input}, diff --git a/src/wildcard.cpp b/src/wildcard.cpp index 899f95ab9..f468d0e35 100644 --- a/src/wildcard.cpp +++ b/src/wildcard.cpp @@ -38,34 +38,29 @@ static size_t wildcard_find(const wchar_t *wc) { return wcstring::npos; } -/// Implementation of wildcard_has. Needs to take the length to handle embedded nulls (issue #1631). -static bool wildcard_has_impl(const wchar_t *str, size_t len, bool internal) { - assert(str != nullptr); - bool qmark_is_wild = !feature_test(features_t::qmark_noglob); - const wchar_t *end = str + len; - if (internal) { - for (; str < end; str++) { - if ((*str == ANY_CHAR) || (*str == ANY_STRING) || (*str == ANY_STRING_RECURSIVE)) - return true; - } - } else { - wchar_t prev = 0; - for (; str < end; str++) { - if (((*str == L'*') || (*str == L'?' && qmark_is_wild)) && (prev != L'\\')) return true; - prev = *str; +bool wildcard_has_internal(const wchar_t *s, size_t len) { + for (size_t i = 0; i < len; i++) { + wchar_t c = s[i]; + if (c == ANY_CHAR || c == ANY_STRING || c == ANY_STRING_RECURSIVE) { + return true; } } - return false; } -bool wildcard_has(const wchar_t *str, bool internal) { +// Note we want to handle embedded nulls (issue #1631). +bool wildcard_has(const wchar_t *str, size_t len) { assert(str != nullptr); - return wildcard_has_impl(str, std::wcslen(str), internal); -} - -bool wildcard_has(const wcstring &str, bool internal) { - return wildcard_has_impl(str.data(), str.size(), internal); + const wchar_t *end = str + len; + bool qmark_is_wild = !feature_test(features_t::qmark_noglob); + // Fast check for * or ?; if none there is no wildcard. + // Note some strings contain * but no wildcards, e.g. if they are quoted. + if (std::find(str, end, L'*') == end && (!qmark_is_wild || std::find(str, end, L'?') == end)) { + return false; + } + wcstring unescaped; + unescape_string(str, len, &unescaped, UNESCAPE_SPECIAL); + return wildcard_has_internal(unescaped); } /// Check whether the string str matches the wildcard string wc. @@ -876,8 +871,7 @@ void wildcard_expander_t::expand(const wcstring &base_dir, const wchar_t *wc, const bool is_last_segment = (next_slash == nullptr); const size_t wc_segment_len = next_slash ? next_slash - wc : std::wcslen(wc); const wcstring wc_segment = wcstring(wc, wc_segment_len); - const bool segment_has_wildcards = - wildcard_has(wc_segment, true /* internal, i.e. look for ANY_CHAR instead of ? */); + const bool segment_has_wildcards = wildcard_has_internal(wc_segment); // e.g. ANY_STRING. const wchar_t *const wc_remainder = next_slash ? next_slash + 1 : nullptr; if (wc_segment.empty()) { diff --git a/src/wildcard.h b/src/wildcard.h index 3829f39ed..4e4babc8b 100644 --- a/src/wildcard.h +++ b/src/wildcard.h @@ -86,9 +86,15 @@ wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &wor bool wildcard_match(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match = false); -/// Check if the specified string contains wildcards. -bool wildcard_has(const wcstring &, bool internal); -bool wildcard_has(const wchar_t *, bool internal); +// Check if the string has any unescaped wildcards (e.g. ANY_STRING). +bool wildcard_has_internal(const wchar_t *s, size_t len); +inline bool wildcard_has_internal(const wcstring &s) { + return wildcard_has_internal(s.c_str(), s.size()); +} + +/// Check if the specified string contains wildcards (e.g. *). +bool wildcard_has(const wchar_t *s, size_t len); +inline bool wildcard_has(const wcstring &s) { return wildcard_has(s.c_str(), s.size()); } /// Test wildcard completion. wildcard_result_t wildcard_complete(const wcstring &str, const wchar_t *wc,