string: Add "--groups-only" to match

This adds a simple way of picking bits from a string that might be a
bit nicer than having to resort to a full `replace`.

Fixes #6056
This commit is contained in:
Fabian Homborg 2019-06-11 16:05:24 +02:00
parent 801d7e3e11
commit f3f6e4a982
3 changed files with 59 additions and 4 deletions

View File

@ -8,9 +8,9 @@ Synopsis
::
string match [(-a | --all)] [(-e | --entire)] [(-i | --ignore-case)] [(-r | --regex)] [(-n | --index)] [(-q | --quiet)] [(-v | --invert)] PATTERN [STRING...]
string match [(-a | --all)] [(-e | --entire)] [(-i | --ignore-case)] [(-g | --groups-only)] [(-r | --regex)] [(-n | --index)] [(-q | --quiet)] [(-v | --invert)] PATTERN [STRING...]
.. END SYNOPSIS
. END SYNOPSIS
Description
-----------
@ -23,6 +23,8 @@ If you specify the ``-e`` or ``--entire`` then each matching string is printed i
Matching can be made case-insensitive with ``--ignore-case`` or ``-i``.
If ``--groups-only`` or ``-g`` is given, only the capturing groups will be reported - meaning the full match will be skipped. This is incompatible with ``--entire`` and ``--invert``, and requires ``--regex``. It is useful as a simple cutting tool instead of ``string replace``, so you can simply choose "this part" of a string.
If ``--index`` or ``-n`` is given, each match is reported as a 1-based start position and a length. By default, PATTERN is interpreted as a glob pattern matched against each entire STRING argument. A glob pattern is only considered a valid match if it matches the entire STRING.
If ``--regex`` or ``-r`` is given, PATTERN is interpreted as a Perl-compatible regular expression, which does not have to match the entire STRING. For a regular expression containing capturing groups, multiple items will be reported for each match, one for the entire match and one for each capturing group. With this, only the matching part of the STRING will be reported, unless ``--entire`` is given.

View File

@ -142,6 +142,7 @@ struct options_t { //!OCLINT(too many fields)
bool count_valid = false;
bool entire_valid = false;
bool filter_valid = false;
bool groups_only_valid = false;
bool ignore_case_valid = false;
bool index_valid = false;
bool invert_valid = false;
@ -165,6 +166,7 @@ struct options_t { //!OCLINT(too many fields)
bool all = false;
bool entire = false;
bool filter = false;
bool groups_only = false;
bool ignore_case = false;
bool index = false;
bool invert_match = false;
@ -340,6 +342,16 @@ static int handle_flag_f(const wchar_t **argv, parser_t &parser, io_streams_t &s
return STATUS_INVALID_ARGS;
}
static int handle_flag_g(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
const wgetopter_t &w, options_t *opts) {
if (opts->groups_only_valid) {
opts->groups_only = true;
return STATUS_CMD_OK;
}
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
return STATUS_INVALID_ARGS;
}
static int handle_flag_i(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
const wgetopter_t &w, options_t *opts) {
if (opts->ignore_case_valid) {
@ -496,6 +508,7 @@ static wcstring construct_short_opts(options_t *opts) { //!OCLINT(high npath co
if (opts->count_valid) short_opts.append(L"n:");
if (opts->entire_valid) short_opts.append(L"e");
if (opts->filter_valid) short_opts.append(L"f");
if (opts->groups_only_valid) short_opts.append(L"g");
if (opts->ignore_case_valid) short_opts.append(L"i");
if (opts->index_valid) short_opts.append(L"n");
if (opts->invert_valid) short_opts.append(L"v");
@ -526,6 +539,7 @@ static const struct woption long_options[] = {{L"all", no_argument, nullptr, 'a'
{L"entire", no_argument, nullptr, 'e'},
{L"end", required_argument, nullptr, 'e'},
{L"filter", no_argument, nullptr, 'f'},
{L"groups-only", no_argument, nullptr, 'g'},
{L"ignore-case", no_argument, nullptr, 'i'},
{L"index", no_argument, nullptr, 'n'},
{L"invert", no_argument, nullptr, 'v'},
@ -548,7 +562,7 @@ static const struct woption long_options[] = {{L"all", no_argument, nullptr, 'a'
static const std::unordered_map<char, decltype(*handle_flag_N)> flag_to_function = {
{'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e},
{'f', handle_flag_f}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m},
{'f', handle_flag_f}, {'g', handle_flag_g}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m},
{'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r}, {'s', handle_flag_s},
{'v', handle_flag_v}, {'w', handle_flag_w}, {1, handle_flag_1}};
@ -954,7 +968,8 @@ class pcre2_matcher_t final : public string_matcher_t {
}
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
for (int j = (opts.entire ? 1 : 0); j < pcre2_rc; j++) {
// If we have groups-only, we skip the first match, which is the full one.
for (int j = (opts.entire || opts.groups_only ? 1 : 0); j < pcre2_rc; j++) {
PCRE2_SIZE begin = ovector[2 * j];
PCRE2_SIZE end = ovector[2 * j + 1];
@ -1133,6 +1148,7 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, const
options_t opts;
opts.all_valid = true;
opts.entire_valid = true;
opts.groups_only_valid = true;
opts.ignore_case_valid = true;
opts.invert_valid = true;
opts.quiet_valid = true;
@ -1149,6 +1165,18 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, const
return STATUS_INVALID_ARGS;
}
if (opts.invert_match && opts.groups_only) {
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
_(L"--invert and --groups-only are mutually exclusive"));
return STATUS_INVALID_ARGS;
}
if (opts.entire && opts.groups_only) {
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
_(L"--entire and --groups-only are mutually exclusive"));
return STATUS_INVALID_ARGS;
}
std::unique_ptr<string_matcher_t> matcher;
if (opts.regex) {
matcher = make_unique<pcre2_matcher_t>(cmd, pattern, opts, streams, parser);

View File

@ -730,3 +730,28 @@ string escape \x7F
string pad -w 8 he \eh
# CHECK: he
# CHECK: {{\x1bh}}
string match -rg '(.*)fish' catfish
# CHECK: cat
string match -rg '(.*)fish' shellfish
# CHECK: shell
# An empty match
string match -rg '(.*)fish' fish
# No match at all
string match -rg '(.*)fish' banana
# Make sure it doesn't start matching something
string match -r --groups-only '(.+)fish' fish
echo $status
# CHECK: 1
# Multiple groups
string match -r --groups-only '(.+)fish(.*)' catfishcolor
# CHECK: cat
# CHECK: color
# Examples specifically called out in #6056.
echo "foo bar baz" | string match -rg 'foo (bar) baz'
# CHECK: bar
echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x'
# CHECK: 1
# CHECK: 2
# CHECK: 3