Abbreviations to support matching via regex

This adds the --regex option to abbreviations, allowing them to match a pattern of tokens.
2025-03-15 23:22:53 +08:00 · 2022-07-17 13:27:35 -07:00 · 2022-07-17 13:27:35 -07:00 · d15855d3e3
commit d15855d3e3
parent 470153c0df
7 changed files with 118 additions and 18 deletions
--- a/src/abbrs.cpp
+++ b/src/abbrs.cpp
@ -6,13 +6,22 @@
 #include "global_safety.h"
 #include "wcstringutil.h"

-abbreviation_t::abbreviation_t(wcstring name, wcstring replacement, abbrs_position_t position,
-                               bool from_universal)
+abbreviation_t::abbreviation_t(wcstring name, wcstring key, wcstring replacement,
+                               abbrs_position_t position, bool from_universal)
    : name(std::move(name)),
+      key(std::move(key)),
      replacement(std::move(replacement)),
      position(position),
      from_universal(from_universal) {}

+bool abbreviation_t::matches(const wcstring &token) const {
+    if (this->is_regex()) {
+        return this->regex->match(token).has_value();
+    } else {
+        return this->key == token;
+    }
+}
+
 acquired_lock<abbrs_set_t> abbrs_get_set() {
    static owning_lock<abbrs_set_t> abbrs;
    return abbrs.acquire();
@ -28,7 +37,7 @@ maybe_t<wcstring> abbrs_set_t::expand(const wcstring &token, abbrs_position_t po
        }

        // Expand only if the name matches.
-        if (token != abbr.name) {
+        if (!abbr.matches(token)) {
            continue;
        }

@ -89,8 +98,9 @@ void abbrs_set_t::import_from_uvars(const std::unordered_map<wcstring, env_var_t
            wcstring escaped_name = kv.first.substr(prefix_len);
            wcstring name;
            if (unescape_string(escaped_name, &name, unescape_flags_t{}, STRING_STYLE_VAR)) {
+                wcstring key = name;
                wcstring replacement = join_strings(kv.second.as_list(), L' ');
-                this->add(abbreviation_t{std::move(name), std::move(replacement),
+                this->add(abbreviation_t{std::move(name), std::move(key), std::move(replacement),
                                         abbrs_position_t::command, from_universal});
            }
        }
--- a/src/abbrs.h
+++ b/src/abbrs.h
@ -8,6 +8,7 @@

 #include "common.h"
 #include "maybe.h"
+#include "re.h"

 class env_var_t;

@ -22,19 +23,33 @@ struct abbreviation_t {
    // This is used as the token to match unless we have a regex.
    wcstring name{};

+    /// The key (recognized token) - either a literal or a regex pattern.
+    wcstring key{};
+
+    /// If set, use this regex to recognize tokens.
+    /// If unset, the key is to be interpreted literally.
+    /// Note that the fish interface enforces that regexes match the entire token;
+    /// we accomplish this by surrounding the regex in ^ and $.
+    maybe_t<re::regex_t> regex{};
+
    // Replacement string.
    wcstring replacement{};

-    // Expansion position.
+    /// Expansion position.
    abbrs_position_t position{abbrs_position_t::command};

-    // Mark if we came from a universal variable.
+    /// Mark if we came from a universal variable.
    bool from_universal{};

    // \return true if this is a regex abbreviation.
-    bool is_regex() const { return false; }
+    bool is_regex() const { return this->regex.has_value(); }

-    explicit abbreviation_t(wcstring name, wcstring replacement,
+    // \return true if we match a token.
+    bool matches(const wcstring &token) const;
+
+    // Construct from a name, a key which matches a token, a replacement token, a position, and
+    // whether we are derived from a universal variable.
+    explicit abbreviation_t(wcstring name, wcstring key, wcstring replacement,
                            abbrs_position_t position = abbrs_position_t::command,
                            bool from_universal = false);

--- a/src/builtins/abbr.cpp
+++ b/src/builtins/abbr.cpp
@ -22,6 +22,7 @@
 #include "../common.h"
 #include "../env.h"
 #include "../io.h"
+#include "../re.h"
 #include "../wcstringutil.h"
 #include "../wgetopt.h"
 #include "../wutil.h"
@ -37,6 +38,7 @@ struct abbr_options_t {
    bool list{};
    bool erase{};
    bool query{};
+    maybe_t<wcstring> regex_pattern;
    maybe_t<abbrs_position_t> position{};

    wcstring_list_t args;
@ -55,7 +57,6 @@ struct abbr_options_t {
                                      join_strings(cmds, L", ").c_str());
            return false;
        }
-
        // If run with no options, treat it like --add if we have arguments,
        // or --show if we do not have any arguments.
        if (cmds.empty()) {
@ -67,6 +68,10 @@ struct abbr_options_t {
            streams.err.append_format(_(L"%ls: --position option requires --add\n"), CMD);
            return false;
        }
+        if (!add && regex_pattern.has_value()) {
+            streams.err.append_format(_(L"%ls: --regex option requires --add\n"), CMD);
+            return false;
+        }
        return true;
    }
 };
@ -78,7 +83,16 @@ static int abbr_show(const abbr_options_t &, io_streams_t &streams) {
        wcstring name = escape_string(abbr.name);
        wcstring value = escape_string(abbr.replacement);
        const wchar_t *scope = (abbr.from_universal ? L"-U " : L"");
-        streams.out.append_format(L"abbr -a %ls-- %ls %ls\n", scope, name.c_str(), value.c_str());
+        // Literal abbreviations share both name and key.
+        // Regex abbreviations have a pattern separate from the name.
+        if (!abbr.is_regex()) {
+            streams.out.append_format(L"abbr -a %ls-- %ls %ls\n", scope, name.c_str(),
+                                      value.c_str());
+        } else {
+            wcstring pattern = escape_string(abbr.key);
+            streams.out.append_format(L"abbr -a %ls-- %ls --regex %ls %ls\n", scope, name.c_str(),
+                                      pattern.c_str(), value.c_str());
+        }
    }
    return STATUS_CMD_OK;
 }
@ -167,15 +181,39 @@ static int abbr_add(const abbr_options_t &opts, io_streams_t &streams) {
            name.c_str());
        return STATUS_INVALID_ARGS;
    }
+
+    maybe_t<re::regex_t> regex;
+    wcstring key;
+    if (!opts.regex_pattern.has_value()) {
+        // The name plays double-duty as the token to replace.
+        key = name;
+    } else {
+        key = *opts.regex_pattern;
+        re::re_error_t error{};
+        // Compile the regex as given; if that succeeds then wrap it in our ^$ so it matches the
+        // entire token.
+        if (!re::regex_t::try_compile(*opts.regex_pattern, re::flags_t{}, &error)) {
+            streams.err.append_format(_(L"%ls: Regular expression compile error: %ls\n"), CMD,
+                                      error.message().c_str());
+            streams.err.append_format(L"%ls: %ls\n", CMD, opts.regex_pattern->c_str());
+            streams.err.append_format(L"%ls: %*ls\n", CMD, static_cast<int>(error.offset), L"^");
+            return STATUS_INVALID_ARGS;
+        }
+        wcstring anchored = re::make_anchored(*opts.regex_pattern);
+        regex = re::regex_t::try_compile(anchored, re::flags_t{}, &error);
+        assert(regex.has_value() && "Anchored compilation should have succeeded");
+    }
+
    wcstring replacement;
    for (auto iter = opts.args.begin() + 1; iter != opts.args.end(); ++iter) {
        if (!replacement.empty()) replacement.push_back(L' ');
        replacement.append(*iter);
    }
    abbrs_position_t position = opts.position ? *opts.position : abbrs_position_t::command;
-    abbreviation_t abbr{name, std::move(replacement), position};

    // Note historically we have allowed overwriting existing abbreviations.
+    abbreviation_t abbr{std::move(name), std::move(key), std::move(replacement), position};
+    abbr.regex = std::move(regex);
    abbrs_get_set()->add(std::move(abbr));
    return STATUS_CMD_OK;
 }
@ -212,6 +250,7 @@ maybe_t<int> builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t
    static const wchar_t *const short_options = L"-arseqgUh";
    static const struct woption long_options[] = {{L"add", no_argument, 'a'},
                                                  {L"position", required_argument, 'p'},
+                                                  {L"regex", required_argument, REGEX_SHORT},
                                                  {L"rename", no_argument, 'r'},
                                                  {L"erase", no_argument, 'e'},
                                                  {L"query", no_argument, 'q'},
@ -260,6 +299,16 @@ maybe_t<int> builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t
                }
                break;
            }
+            case REGEX_SHORT: {
+                if (opts.regex_pattern.has_value()) {
+                    streams.err.append_format(_(L"%ls: Cannot specify multiple regex patterns\n"),
+                                              CMD);
+                    return STATUS_INVALID_ARGS;
+                }
+                opts.regex_pattern = w.woptarg;
+                break;
+            }
+
            case 'r':
                opts.rename = true;
                break;
--- a/src/complete.cpp
+++ b/src/complete.cpp
@ -677,8 +677,8 @@ void completer_t::complete_abbr(const wcstring &cmd) {
        auto abbrs = abbrs_get_set();
        for (const auto &abbr : abbrs->list()) {
            if (!abbr.is_regex()) {
-                possible_comp.emplace_back(abbr.name);
-                descs[abbr.name] = abbr.replacement;
+                possible_comp.emplace_back(abbr.key);
+                descs[abbr.key] = abbr.replacement;
            }
        }
    }
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -2468,11 +2468,15 @@ static void test_ifind_fuzzy() {
 static void test_abbreviations() {
    say(L"Testing abbreviations");
    {
+        auto literal_abbr = [](const wchar_t *name, const wchar_t *repl,
+                               abbrs_position_t pos = abbrs_position_t::command) {
+            return abbreviation_t(name, name /* key */, repl, pos);
+        };
        auto abbrs = abbrs_get_set();
-        abbrs->add(abbreviation_t(L"gc", L"git checkout"));
-        abbrs->add(abbreviation_t(L"foo", L"bar"));
-        abbrs->add(abbreviation_t(L"gx", L"git checkout"));
-        abbrs->add(abbreviation_t(L"yin", L"yang", abbrs_position_t::anywhere));
+        abbrs->add(literal_abbr(L"gc", L"git checkout"));
+        abbrs->add(literal_abbr(L"foo", L"bar"));
+        abbrs->add(literal_abbr(L"gx", L"git checkout"));
+        abbrs->add(literal_abbr(L"yin", L"yang", abbrs_position_t::anywhere));
    }

    auto cmd = abbrs_position_t::command;
@ -3519,7 +3523,7 @@ static void test_complete() {

    // Test abbreviations.
    function_add(L"testabbrsonetwothreefour", func_props);
-    abbrs_get_set()->add(abbreviation_t(L"testabbrsonetwothreezero", L"expansion"));
+    abbrs_get_set()->add(abbreviation_t(L"somename", L"testabbrsonetwothreezero", L"expansion"));
    completions = complete(L"testabbrsonetwothree", {}, parser->context());
    do_test(completions.size() == 2);
    do_test(completions.at(0).completion == L"four");
--- a/tests/checks/abbr.fish
+++ b/tests/checks/abbr.fish
@ -132,3 +132,14 @@ abbr --query banana --position anywhere
 echo $status
 # CHECKERR: abbr: --position option requires --add
 # CHECK: 2
+
+# Erase all abbreviations
+abbr --erase (abbr --list)
+abbr --show
+# Should be no output
+
+abbr --add nonregex_name foo
+abbr --add regex_name --regex 'A[0-9]B' bar
+abbr --show
+# CHECK: abbr -a -- nonregex_name foo
+# CHECK: abbr -a -- regex_name --regex 'A[0-9]B' bar
--- a/tests/pexpects/abbrs.py
+++ b/tests/pexpects/abbrs.py
@ -59,3 +59,14 @@ expect_str(r"<beta2 >")

 send(r"echo alpha ?")
 expect_str(r"<echo beta2 >")
+
+# Support regex.
+sendline(r"abbr alpha --regex 'A[0-9]+Z' beta3")
+send(r"A123Z ?")
+expect_str(r"<beta3 >")
+send(r"AZ ?")
+expect_str(r"<AZ >")
+send(r"QA123Z ?")
+expect_str(r"<QA123Z >")
+send(r"A0000000000000000000009Z ?")
+expect_str(r"<beta3 >")