From f3f6e4a98227aa115c9cfb30f6a8d6df1a187546 Mon Sep 17 00:00:00 2001 From: Fabian Homborg Date: Tue, 11 Jun 2019 16:05:24 +0200 Subject: [PATCH] string: Add "--groups-only" to match This adds a simple way of picking bits from a string that might be a bit nicer than having to resort to a full `replace`. Fixes #6056 --- doc_src/cmds/string-match.rst | 6 ++++-- src/builtin_string.cpp | 32 ++++++++++++++++++++++++++++++-- tests/checks/string.fish | 25 +++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/doc_src/cmds/string-match.rst b/doc_src/cmds/string-match.rst index 1f05f4a6a..3a3eaa0e3 100644 --- a/doc_src/cmds/string-match.rst +++ b/doc_src/cmds/string-match.rst @@ -8,9 +8,9 @@ Synopsis :: - string match [(-a | --all)] [(-e | --entire)] [(-i | --ignore-case)] [(-r | --regex)] [(-n | --index)] [(-q | --quiet)] [(-v | --invert)] PATTERN [STRING...] + string match [(-a | --all)] [(-e | --entire)] [(-i | --ignore-case)] [(-g | --groups-only)] [(-r | --regex)] [(-n | --index)] [(-q | --quiet)] [(-v | --invert)] PATTERN [STRING...] -.. END SYNOPSIS +. END SYNOPSIS Description ----------- @@ -23,6 +23,8 @@ If you specify the ``-e`` or ``--entire`` then each matching string is printed i Matching can be made case-insensitive with ``--ignore-case`` or ``-i``. +If ``--groups-only`` or ``-g`` is given, only the capturing groups will be reported - meaning the full match will be skipped. This is incompatible with ``--entire`` and ``--invert``, and requires ``--regex``. It is useful as a simple cutting tool instead of ``string replace``, so you can simply choose "this part" of a string. + If ``--index`` or ``-n`` is given, each match is reported as a 1-based start position and a length. By default, PATTERN is interpreted as a glob pattern matched against each entire STRING argument. A glob pattern is only considered a valid match if it matches the entire STRING. If ``--regex`` or ``-r`` is given, PATTERN is interpreted as a Perl-compatible regular expression, which does not have to match the entire STRING. For a regular expression containing capturing groups, multiple items will be reported for each match, one for the entire match and one for each capturing group. With this, only the matching part of the STRING will be reported, unless ``--entire`` is given. diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index 018f4b316..ead8e9ce6 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -142,6 +142,7 @@ struct options_t { //!OCLINT(too many fields) bool count_valid = false; bool entire_valid = false; bool filter_valid = false; + bool groups_only_valid = false; bool ignore_case_valid = false; bool index_valid = false; bool invert_valid = false; @@ -165,6 +166,7 @@ struct options_t { //!OCLINT(too many fields) bool all = false; bool entire = false; bool filter = false; + bool groups_only = false; bool ignore_case = false; bool index = false; bool invert_match = false; @@ -340,6 +342,16 @@ static int handle_flag_f(const wchar_t **argv, parser_t &parser, io_streams_t &s return STATUS_INVALID_ARGS; } +static int handle_flag_g(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + if (opts->groups_only_valid) { + opts->groups_only = true; + return STATUS_CMD_OK; + } + string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + static int handle_flag_i(const wchar_t **argv, parser_t &parser, io_streams_t &streams, const wgetopter_t &w, options_t *opts) { if (opts->ignore_case_valid) { @@ -496,6 +508,7 @@ static wcstring construct_short_opts(options_t *opts) { //!OCLINT(high npath co if (opts->count_valid) short_opts.append(L"n:"); if (opts->entire_valid) short_opts.append(L"e"); if (opts->filter_valid) short_opts.append(L"f"); + if (opts->groups_only_valid) short_opts.append(L"g"); if (opts->ignore_case_valid) short_opts.append(L"i"); if (opts->index_valid) short_opts.append(L"n"); if (opts->invert_valid) short_opts.append(L"v"); @@ -526,6 +539,7 @@ static const struct woption long_options[] = {{L"all", no_argument, nullptr, 'a' {L"entire", no_argument, nullptr, 'e'}, {L"end", required_argument, nullptr, 'e'}, {L"filter", no_argument, nullptr, 'f'}, + {L"groups-only", no_argument, nullptr, 'g'}, {L"ignore-case", no_argument, nullptr, 'i'}, {L"index", no_argument, nullptr, 'n'}, {L"invert", no_argument, nullptr, 'v'}, @@ -548,7 +562,7 @@ static const struct woption long_options[] = {{L"all", no_argument, nullptr, 'a' static const std::unordered_map flag_to_function = { {'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e}, - {'f', handle_flag_f}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m}, + {'f', handle_flag_f}, {'g', handle_flag_g}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m}, {'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r}, {'s', handle_flag_s}, {'v', handle_flag_v}, {'w', handle_flag_w}, {1, handle_flag_1}}; @@ -954,7 +968,8 @@ class pcre2_matcher_t final : public string_matcher_t { } PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match); - for (int j = (opts.entire ? 1 : 0); j < pcre2_rc; j++) { + // If we have groups-only, we skip the first match, which is the full one. + for (int j = (opts.entire || opts.groups_only ? 1 : 0); j < pcre2_rc; j++) { PCRE2_SIZE begin = ovector[2 * j]; PCRE2_SIZE end = ovector[2 * j + 1]; @@ -1133,6 +1148,7 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, const options_t opts; opts.all_valid = true; opts.entire_valid = true; + opts.groups_only_valid = true; opts.ignore_case_valid = true; opts.invert_valid = true; opts.quiet_valid = true; @@ -1149,6 +1165,18 @@ static int string_match(parser_t &parser, io_streams_t &streams, int argc, const return STATUS_INVALID_ARGS; } + if (opts.invert_match && opts.groups_only) { + streams.err.append_format(BUILTIN_ERR_COMBO2, cmd, + _(L"--invert and --groups-only are mutually exclusive")); + return STATUS_INVALID_ARGS; + } + + if (opts.entire && opts.groups_only) { + streams.err.append_format(BUILTIN_ERR_COMBO2, cmd, + _(L"--entire and --groups-only are mutually exclusive")); + return STATUS_INVALID_ARGS; + } + std::unique_ptr matcher; if (opts.regex) { matcher = make_unique(cmd, pattern, opts, streams, parser); diff --git a/tests/checks/string.fish b/tests/checks/string.fish index ce1ef7f1d..8f64da1a1 100644 --- a/tests/checks/string.fish +++ b/tests/checks/string.fish @@ -730,3 +730,28 @@ string escape \x7F string pad -w 8 he \eh # CHECK: he # CHECK: {{\x1bh}} + +string match -rg '(.*)fish' catfish +# CHECK: cat +string match -rg '(.*)fish' shellfish +# CHECK: shell +# An empty match +string match -rg '(.*)fish' fish +# No match at all +string match -rg '(.*)fish' banana +# Make sure it doesn't start matching something +string match -r --groups-only '(.+)fish' fish +echo $status +# CHECK: 1 +# Multiple groups +string match -r --groups-only '(.+)fish(.*)' catfishcolor +# CHECK: cat +# CHECK: color + +# Examples specifically called out in #6056. +echo "foo bar baz" | string match -rg 'foo (bar) baz' +# CHECK: bar +echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x' +# CHECK: 1 +# CHECK: 2 +# CHECK: 3