Abbreviations to support matching via regex

This adds the --regex option to abbreviations, allowing them to match a
pattern of tokens.
This commit is contained in:
ridiculousfish 2022-07-17 13:27:35 -07:00
parent 470153c0df
commit d15855d3e3
7 changed files with 118 additions and 18 deletions

View File

@ -6,13 +6,22 @@
#include "global_safety.h"
#include "wcstringutil.h"
abbreviation_t::abbreviation_t(wcstring name, wcstring replacement, abbrs_position_t position,
bool from_universal)
abbreviation_t::abbreviation_t(wcstring name, wcstring key, wcstring replacement,
abbrs_position_t position, bool from_universal)
: name(std::move(name)),
key(std::move(key)),
replacement(std::move(replacement)),
position(position),
from_universal(from_universal) {}
bool abbreviation_t::matches(const wcstring &token) const {
if (this->is_regex()) {
return this->regex->match(token).has_value();
} else {
return this->key == token;
}
}
acquired_lock<abbrs_set_t> abbrs_get_set() {
static owning_lock<abbrs_set_t> abbrs;
return abbrs.acquire();
@ -28,7 +37,7 @@ maybe_t<wcstring> abbrs_set_t::expand(const wcstring &token, abbrs_position_t po
}
// Expand only if the name matches.
if (token != abbr.name) {
if (!abbr.matches(token)) {
continue;
}
@ -89,8 +98,9 @@ void abbrs_set_t::import_from_uvars(const std::unordered_map<wcstring, env_var_t
wcstring escaped_name = kv.first.substr(prefix_len);
wcstring name;
if (unescape_string(escaped_name, &name, unescape_flags_t{}, STRING_STYLE_VAR)) {
wcstring key = name;
wcstring replacement = join_strings(kv.second.as_list(), L' ');
this->add(abbreviation_t{std::move(name), std::move(replacement),
this->add(abbreviation_t{std::move(name), std::move(key), std::move(replacement),
abbrs_position_t::command, from_universal});
}
}

View File

@ -8,6 +8,7 @@
#include "common.h"
#include "maybe.h"
#include "re.h"
class env_var_t;
@ -22,19 +23,33 @@ struct abbreviation_t {
// This is used as the token to match unless we have a regex.
wcstring name{};
/// The key (recognized token) - either a literal or a regex pattern.
wcstring key{};
/// If set, use this regex to recognize tokens.
/// If unset, the key is to be interpreted literally.
/// Note that the fish interface enforces that regexes match the entire token;
/// we accomplish this by surrounding the regex in ^ and $.
maybe_t<re::regex_t> regex{};
// Replacement string.
wcstring replacement{};
// Expansion position.
/// Expansion position.
abbrs_position_t position{abbrs_position_t::command};
// Mark if we came from a universal variable.
/// Mark if we came from a universal variable.
bool from_universal{};
// \return true if this is a regex abbreviation.
bool is_regex() const { return false; }
bool is_regex() const { return this->regex.has_value(); }
explicit abbreviation_t(wcstring name, wcstring replacement,
// \return true if we match a token.
bool matches(const wcstring &token) const;
// Construct from a name, a key which matches a token, a replacement token, a position, and
// whether we are derived from a universal variable.
explicit abbreviation_t(wcstring name, wcstring key, wcstring replacement,
abbrs_position_t position = abbrs_position_t::command,
bool from_universal = false);

View File

@ -22,6 +22,7 @@
#include "../common.h"
#include "../env.h"
#include "../io.h"
#include "../re.h"
#include "../wcstringutil.h"
#include "../wgetopt.h"
#include "../wutil.h"
@ -37,6 +38,7 @@ struct abbr_options_t {
bool list{};
bool erase{};
bool query{};
maybe_t<wcstring> regex_pattern;
maybe_t<abbrs_position_t> position{};
wcstring_list_t args;
@ -55,7 +57,6 @@ struct abbr_options_t {
join_strings(cmds, L", ").c_str());
return false;
}
// If run with no options, treat it like --add if we have arguments,
// or --show if we do not have any arguments.
if (cmds.empty()) {
@ -67,6 +68,10 @@ struct abbr_options_t {
streams.err.append_format(_(L"%ls: --position option requires --add\n"), CMD);
return false;
}
if (!add && regex_pattern.has_value()) {
streams.err.append_format(_(L"%ls: --regex option requires --add\n"), CMD);
return false;
}
return true;
}
};
@ -78,7 +83,16 @@ static int abbr_show(const abbr_options_t &, io_streams_t &streams) {
wcstring name = escape_string(abbr.name);
wcstring value = escape_string(abbr.replacement);
const wchar_t *scope = (abbr.from_universal ? L"-U " : L"");
streams.out.append_format(L"abbr -a %ls-- %ls %ls\n", scope, name.c_str(), value.c_str());
// Literal abbreviations share both name and key.
// Regex abbreviations have a pattern separate from the name.
if (!abbr.is_regex()) {
streams.out.append_format(L"abbr -a %ls-- %ls %ls\n", scope, name.c_str(),
value.c_str());
} else {
wcstring pattern = escape_string(abbr.key);
streams.out.append_format(L"abbr -a %ls-- %ls --regex %ls %ls\n", scope, name.c_str(),
pattern.c_str(), value.c_str());
}
}
return STATUS_CMD_OK;
}
@ -167,15 +181,39 @@ static int abbr_add(const abbr_options_t &opts, io_streams_t &streams) {
name.c_str());
return STATUS_INVALID_ARGS;
}
maybe_t<re::regex_t> regex;
wcstring key;
if (!opts.regex_pattern.has_value()) {
// The name plays double-duty as the token to replace.
key = name;
} else {
key = *opts.regex_pattern;
re::re_error_t error{};
// Compile the regex as given; if that succeeds then wrap it in our ^$ so it matches the
// entire token.
if (!re::regex_t::try_compile(*opts.regex_pattern, re::flags_t{}, &error)) {
streams.err.append_format(_(L"%ls: Regular expression compile error: %ls\n"), CMD,
error.message().c_str());
streams.err.append_format(L"%ls: %ls\n", CMD, opts.regex_pattern->c_str());
streams.err.append_format(L"%ls: %*ls\n", CMD, static_cast<int>(error.offset), L"^");
return STATUS_INVALID_ARGS;
}
wcstring anchored = re::make_anchored(*opts.regex_pattern);
regex = re::regex_t::try_compile(anchored, re::flags_t{}, &error);
assert(regex.has_value() && "Anchored compilation should have succeeded");
}
wcstring replacement;
for (auto iter = opts.args.begin() + 1; iter != opts.args.end(); ++iter) {
if (!replacement.empty()) replacement.push_back(L' ');
replacement.append(*iter);
}
abbrs_position_t position = opts.position ? *opts.position : abbrs_position_t::command;
abbreviation_t abbr{name, std::move(replacement), position};
// Note historically we have allowed overwriting existing abbreviations.
abbreviation_t abbr{std::move(name), std::move(key), std::move(replacement), position};
abbr.regex = std::move(regex);
abbrs_get_set()->add(std::move(abbr));
return STATUS_CMD_OK;
}
@ -212,6 +250,7 @@ maybe_t<int> builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t
static const wchar_t *const short_options = L"-arseqgUh";
static const struct woption long_options[] = {{L"add", no_argument, 'a'},
{L"position", required_argument, 'p'},
{L"regex", required_argument, REGEX_SHORT},
{L"rename", no_argument, 'r'},
{L"erase", no_argument, 'e'},
{L"query", no_argument, 'q'},
@ -260,6 +299,16 @@ maybe_t<int> builtin_abbr(parser_t &parser, io_streams_t &streams, const wchar_t
}
break;
}
case REGEX_SHORT: {
if (opts.regex_pattern.has_value()) {
streams.err.append_format(_(L"%ls: Cannot specify multiple regex patterns\n"),
CMD);
return STATUS_INVALID_ARGS;
}
opts.regex_pattern = w.woptarg;
break;
}
case 'r':
opts.rename = true;
break;

View File

@ -677,8 +677,8 @@ void completer_t::complete_abbr(const wcstring &cmd) {
auto abbrs = abbrs_get_set();
for (const auto &abbr : abbrs->list()) {
if (!abbr.is_regex()) {
possible_comp.emplace_back(abbr.name);
descs[abbr.name] = abbr.replacement;
possible_comp.emplace_back(abbr.key);
descs[abbr.key] = abbr.replacement;
}
}
}

View File

@ -2468,11 +2468,15 @@ static void test_ifind_fuzzy() {
static void test_abbreviations() {
say(L"Testing abbreviations");
{
auto literal_abbr = [](const wchar_t *name, const wchar_t *repl,
abbrs_position_t pos = abbrs_position_t::command) {
return abbreviation_t(name, name /* key */, repl, pos);
};
auto abbrs = abbrs_get_set();
abbrs->add(abbreviation_t(L"gc", L"git checkout"));
abbrs->add(abbreviation_t(L"foo", L"bar"));
abbrs->add(abbreviation_t(L"gx", L"git checkout"));
abbrs->add(abbreviation_t(L"yin", L"yang", abbrs_position_t::anywhere));
abbrs->add(literal_abbr(L"gc", L"git checkout"));
abbrs->add(literal_abbr(L"foo", L"bar"));
abbrs->add(literal_abbr(L"gx", L"git checkout"));
abbrs->add(literal_abbr(L"yin", L"yang", abbrs_position_t::anywhere));
}
auto cmd = abbrs_position_t::command;
@ -3519,7 +3523,7 @@ static void test_complete() {
// Test abbreviations.
function_add(L"testabbrsonetwothreefour", func_props);
abbrs_get_set()->add(abbreviation_t(L"testabbrsonetwothreezero", L"expansion"));
abbrs_get_set()->add(abbreviation_t(L"somename", L"testabbrsonetwothreezero", L"expansion"));
completions = complete(L"testabbrsonetwothree", {}, parser->context());
do_test(completions.size() == 2);
do_test(completions.at(0).completion == L"four");

View File

@ -132,3 +132,14 @@ abbr --query banana --position anywhere
echo $status
# CHECKERR: abbr: --position option requires --add
# CHECK: 2
# Erase all abbreviations
abbr --erase (abbr --list)
abbr --show
# Should be no output
abbr --add nonregex_name foo
abbr --add regex_name --regex 'A[0-9]B' bar
abbr --show
# CHECK: abbr -a -- nonregex_name foo
# CHECK: abbr -a -- regex_name --regex 'A[0-9]B' bar

View File

@ -59,3 +59,14 @@ expect_str(r"<beta2 >")
send(r"echo alpha ?")
expect_str(r"<echo beta2 >")
# Support regex.
sendline(r"abbr alpha --regex 'A[0-9]+Z' beta3")
send(r"A123Z ?")
expect_str(r"<beta3 >")
send(r"AZ ?")
expect_str(r"<AZ >")
send(r"QA123Z ?")
expect_str(r"<QA123Z >")
send(r"A0000000000000000000009Z ?")
expect_str(r"<beta3 >")