2016-04-19 19:49:15 -07:00
|
|
|
// Implementation of the string builtin.
|
2020-03-21 01:49:44 +09:00
|
|
|
#include "config.h" // IWYU pragma: keep
|
2015-09-12 13:36:22 -07:00
|
|
|
|
2015-09-12 12:59:40 -07:00
|
|
|
#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS
|
|
|
|
#ifdef _WIN32
|
|
|
|
#define PCRE2_STATIC
|
|
|
|
#endif
|
2016-04-19 19:49:15 -07:00
|
|
|
#include <algorithm>
|
2019-11-18 17:11:16 -08:00
|
|
|
#include <cerrno>
|
|
|
|
#include <climits>
|
|
|
|
#include <cstdarg>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <cstdlib>
|
2019-10-13 15:50:48 -07:00
|
|
|
#include <cwchar>
|
2017-06-11 11:49:59 -07:00
|
|
|
#include <cwctype>
|
2020-11-28 01:00:27 -06:00
|
|
|
#include <functional>
|
2016-04-19 19:49:15 -07:00
|
|
|
#include <iterator>
|
2017-02-10 18:47:02 -08:00
|
|
|
#include <memory>
|
2016-04-20 23:00:54 -07:00
|
|
|
#include <string>
|
2017-08-19 11:55:06 -05:00
|
|
|
#include <unordered_map>
|
2017-06-11 11:49:59 -07:00
|
|
|
#include <utility>
|
2016-04-20 23:00:54 -07:00
|
|
|
#include <vector>
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2015-09-12 13:36:22 -07:00
|
|
|
#include "builtin.h"
|
|
|
|
#include "common.h"
|
2020-11-06 17:24:44 -06:00
|
|
|
#include "env.h"
|
2016-04-19 19:49:15 -07:00
|
|
|
#include "fallback.h" // IWYU pragma: keep
|
2019-01-12 20:20:35 +01:00
|
|
|
#include "future_feature_flags.h"
|
2016-04-19 19:49:15 -07:00
|
|
|
#include "io.h"
|
2015-09-12 13:36:22 -07:00
|
|
|
#include "parse_util.h"
|
2020-11-06 17:24:44 -06:00
|
|
|
#include "parser.h"
|
2016-04-19 19:49:15 -07:00
|
|
|
#include "pcre2.h"
|
2021-07-01 19:36:21 +02:00
|
|
|
#include "screen.h"
|
2017-07-27 15:05:35 +02:00
|
|
|
#include "wcstringutil.h"
|
2015-09-12 13:36:22 -07:00
|
|
|
#include "wgetopt.h"
|
2015-09-12 12:59:40 -07:00
|
|
|
#include "wildcard.h"
|
2016-04-20 23:00:54 -07:00
|
|
|
#include "wutil.h" // IWYU pragma: keep
|
|
|
|
|
2017-12-18 17:26:33 +01:00
|
|
|
// How many bytes we read() at once.
|
|
|
|
// Bash uses 128 here, so we do too (see READ_CHUNK_SIZE).
|
|
|
|
// This should be about the size of a line.
|
|
|
|
#define STRING_CHUNK_SIZE 128
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
static void string_error(io_streams_t &streams, const wchar_t *fmt, ...) {
|
2015-09-21 11:24:49 -07:00
|
|
|
streams.err.append(L"string ");
|
2015-09-12 12:59:40 -07:00
|
|
|
va_list va;
|
|
|
|
va_start(va, fmt);
|
2015-09-21 11:24:49 -07:00
|
|
|
streams.err.append_formatv(fmt, va);
|
2015-09-12 12:59:40 -07:00
|
|
|
va_end(va);
|
|
|
|
}
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
static void string_unknown_option(parser_t &parser, io_streams_t &streams, const wchar_t *subcmd,
|
|
|
|
const wchar_t *opt) {
|
2015-09-21 11:24:49 -07:00
|
|
|
string_error(streams, BUILTIN_ERR_UNKNOWN, subcmd, opt);
|
2019-06-25 17:15:48 +02:00
|
|
|
builtin_print_error_trailer(parser, streams.err, L"string");
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
// We read from stdin if we are the second or later process in a pipeline.
|
|
|
|
static bool string_args_from_stdin(const io_streams_t &streams) {
|
2015-09-25 14:17:53 -07:00
|
|
|
return streams.stdin_is_directly_redirected;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-12-21 12:42:57 -08:00
|
|
|
static const wchar_t *string_get_arg_argv(int *argidx, const wchar_t *const *argv) {
|
2019-11-18 18:34:50 -08:00
|
|
|
return argv && argv[*argidx] ? argv[(*argidx)++] : nullptr;
|
2017-12-21 12:42:57 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// A helper type for extracting arguments from either argv or stdin.
|
|
|
|
namespace {
|
|
|
|
class arg_iterator_t {
|
|
|
|
// The list of arguments passed to the string builtin.
|
|
|
|
const wchar_t *const *argv_;
|
|
|
|
// If using argv, index of the next argument to return.
|
|
|
|
int argidx_;
|
|
|
|
// If not using argv, a string to store bytes that have been read but not yet returned.
|
|
|
|
std::string buffer_;
|
2019-02-26 19:50:53 +01:00
|
|
|
// If set, when reading from a stream, split on newlines.
|
|
|
|
const bool split_;
|
2017-12-21 12:42:57 -08:00
|
|
|
// Backing storage for the next() string.
|
|
|
|
wcstring storage_;
|
|
|
|
const io_streams_t &streams_;
|
|
|
|
|
2018-05-29 21:11:50 -07:00
|
|
|
/// Reads the next argument from stdin, returning true if an argument was produced and false if
|
|
|
|
/// not. On true, the string is stored in storage_.
|
|
|
|
bool get_arg_stdin() {
|
2017-12-21 12:42:57 -08:00
|
|
|
assert(string_args_from_stdin(streams_) && "should not be reading from stdin");
|
2021-02-10 17:19:08 -08:00
|
|
|
assert(streams_.stdin_fd >= 0 && "should have a valid fd");
|
2019-02-26 19:50:53 +01:00
|
|
|
// Read in chunks from fd until buffer has a line (or the end if split_ is unset).
|
2017-12-21 12:42:57 -08:00
|
|
|
size_t pos;
|
2019-02-26 19:50:53 +01:00
|
|
|
while (!split_ || (pos = buffer_.find('\n')) == std::string::npos) {
|
2017-12-21 12:42:57 -08:00
|
|
|
char buf[STRING_CHUNK_SIZE];
|
|
|
|
long n = read_blocked(streams_.stdin_fd, buf, STRING_CHUNK_SIZE);
|
|
|
|
if (n == 0) {
|
|
|
|
// If we still have buffer contents, flush them,
|
2018-05-29 21:11:50 -07:00
|
|
|
// in case there was no trailing sep.
|
|
|
|
if (buffer_.empty()) return false;
|
2017-12-21 12:42:57 -08:00
|
|
|
storage_ = str2wcstring(buffer_);
|
|
|
|
buffer_.clear();
|
2018-05-29 21:11:50 -07:00
|
|
|
return true;
|
2017-12-21 12:42:57 -08:00
|
|
|
}
|
|
|
|
if (n == -1) {
|
|
|
|
// Some error happened. We can't do anything about it,
|
|
|
|
// so ignore it.
|
|
|
|
// (read_blocked already retries for EAGAIN and EINTR)
|
|
|
|
storage_ = str2wcstring(buffer_);
|
|
|
|
buffer_.clear();
|
2018-05-29 21:11:50 -07:00
|
|
|
return false;
|
2017-12-21 12:42:57 -08:00
|
|
|
}
|
|
|
|
buffer_.append(buf, n);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2016-04-19 19:49:15 -07:00
|
|
|
|
2018-05-29 21:11:50 -07:00
|
|
|
// Split the buffer on the sep and return the first part.
|
2017-12-21 12:42:57 -08:00
|
|
|
storage_ = str2wcstring(buffer_, pos);
|
|
|
|
buffer_.erase(0, pos + 1);
|
2018-05-29 21:11:50 -07:00
|
|
|
return true;
|
2017-12-21 12:42:57 -08:00
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2017-12-21 12:42:57 -08:00
|
|
|
public:
|
2018-05-29 21:11:50 -07:00
|
|
|
arg_iterator_t(const wchar_t *const *argv, int argidx, const io_streams_t &streams,
|
2019-02-26 19:50:53 +01:00
|
|
|
bool split = true)
|
|
|
|
: argv_(argv), argidx_(argidx), split_(split), streams_(streams) {}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-01-02 16:00:38 +01:00
|
|
|
const wcstring *nextstr() {
|
|
|
|
if (string_args_from_stdin(streams_)) {
|
2019-11-18 18:34:50 -08:00
|
|
|
return get_arg_stdin() ? &storage_ : nullptr;
|
2018-01-02 16:00:38 +01:00
|
|
|
}
|
|
|
|
if (auto arg = string_get_arg_argv(&argidx_, argv_)) {
|
|
|
|
storage_ = arg;
|
|
|
|
return &storage_;
|
|
|
|
} else {
|
2019-11-18 18:34:50 -08:00
|
|
|
return nullptr;
|
2018-01-02 16:00:38 +01:00
|
|
|
}
|
|
|
|
}
|
2017-12-21 12:42:57 -08:00
|
|
|
};
|
2018-09-27 22:28:39 -04:00
|
|
|
} // namespace
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
// This is used by the string subcommands to communicate with the option parser which flags are
|
|
|
|
// valid and get the result of parsing the command for flags.
|
2021-01-11 15:23:52 -08:00
|
|
|
struct options_t { //!OCLINT(too many fields)
|
2017-06-11 11:49:59 -07:00
|
|
|
bool all_valid = false;
|
2020-09-27 19:12:42 +02:00
|
|
|
bool char_to_pad_valid = false;
|
|
|
|
bool chars_to_trim_valid = false;
|
2017-06-11 11:49:59 -07:00
|
|
|
bool count_valid = false;
|
|
|
|
bool entire_valid = false;
|
|
|
|
bool filter_valid = false;
|
2019-06-11 16:05:24 +02:00
|
|
|
bool groups_only_valid = false;
|
2017-06-11 11:49:59 -07:00
|
|
|
bool ignore_case_valid = false;
|
|
|
|
bool index_valid = false;
|
|
|
|
bool invert_valid = false;
|
|
|
|
bool left_valid = false;
|
|
|
|
bool length_valid = false;
|
|
|
|
bool max_valid = false;
|
|
|
|
bool no_newline_valid = false;
|
|
|
|
bool no_quoted_valid = false;
|
|
|
|
bool quiet_valid = false;
|
|
|
|
bool regex_valid = false;
|
|
|
|
bool right_valid = false;
|
|
|
|
bool start_valid = false;
|
2020-03-22 14:53:09 +00:00
|
|
|
bool end_valid = false;
|
2017-06-20 21:55:16 -07:00
|
|
|
bool style_valid = false;
|
2018-03-29 08:12:08 -05:00
|
|
|
bool no_empty_valid = false;
|
2019-06-16 16:40:14 -07:00
|
|
|
bool no_trim_newlines_valid = false;
|
2020-03-21 01:31:23 +09:00
|
|
|
bool fields_valid = false;
|
2020-04-18 15:25:08 +09:00
|
|
|
bool allow_empty_valid = false;
|
2021-07-01 19:36:56 +02:00
|
|
|
bool visible_valid = false;
|
2020-09-27 19:12:42 +02:00
|
|
|
bool width_valid = false;
|
2017-06-11 11:49:59 -07:00
|
|
|
|
|
|
|
bool all = false;
|
|
|
|
bool entire = false;
|
|
|
|
bool filter = false;
|
2019-06-11 16:05:24 +02:00
|
|
|
bool groups_only = false;
|
2017-06-11 11:49:59 -07:00
|
|
|
bool ignore_case = false;
|
|
|
|
bool index = false;
|
|
|
|
bool invert_match = false;
|
|
|
|
bool left = false;
|
|
|
|
bool no_newline = false;
|
|
|
|
bool no_quoted = false;
|
|
|
|
bool quiet = false;
|
|
|
|
bool regex = false;
|
|
|
|
bool right = false;
|
2018-03-29 08:12:08 -05:00
|
|
|
bool no_empty = false;
|
2019-06-16 16:40:14 -07:00
|
|
|
bool no_trim_newlines = false;
|
2020-04-18 15:25:08 +09:00
|
|
|
bool allow_empty = false;
|
2021-07-01 19:36:56 +02:00
|
|
|
bool visible = false;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
long count = 0;
|
|
|
|
long length = 0;
|
|
|
|
long max = 0;
|
|
|
|
long start = 0;
|
2020-03-22 14:53:09 +00:00
|
|
|
long end = 0;
|
2021-03-09 18:32:15 +01:00
|
|
|
ssize_t width = 0;
|
2020-09-27 19:12:42 +02:00
|
|
|
|
2020-09-27 21:51:20 +02:00
|
|
|
wchar_t char_to_pad = L' ';
|
2020-06-17 23:11:03 -05:00
|
|
|
|
2020-03-21 01:31:23 +09:00
|
|
|
std::vector<int> fields;
|
|
|
|
|
2020-03-23 00:06:04 +00:00
|
|
|
const wchar_t *chars_to_trim = L" \f\n\r\t\v";
|
2019-11-18 18:34:50 -08:00
|
|
|
const wchar_t *arg1 = nullptr;
|
|
|
|
const wchar_t *arg2 = nullptr;
|
2017-06-20 21:55:16 -07:00
|
|
|
|
|
|
|
escape_string_style_t escape_style = STRING_STYLE_SCRIPT;
|
2020-04-02 16:58:05 -07:00
|
|
|
};
|
2017-06-11 11:49:59 -07:00
|
|
|
|
2021-07-01 19:36:21 +02:00
|
|
|
static size_t width_without_escapes(wcstring ins) {
|
|
|
|
ssize_t width = 0;
|
|
|
|
// TODO: this is the same as fish_wcwidth_min_0 from screen.cpp
|
|
|
|
// Make that reusable (and add a wcswidth version).
|
|
|
|
for (auto c : ins) {
|
|
|
|
auto w = fish_wcwidth(c);
|
|
|
|
if (w > 0) width += w;
|
|
|
|
}
|
|
|
|
|
2021-08-10 06:52:51 +02:00
|
|
|
// ANSI escape sequences like \e\[31m contain printable characters. Subtract their width
|
|
|
|
// because they are not rendered.
|
2021-07-01 19:36:21 +02:00
|
|
|
size_t pos = 0;
|
|
|
|
while ((pos = ins.find('\x1B', pos)) != std::string::npos) {
|
|
|
|
auto len = escape_code_length(ins.c_str() + pos);
|
|
|
|
if (len) {
|
|
|
|
auto sub = ins.substr(pos, *len);
|
|
|
|
for (auto c : sub) {
|
|
|
|
auto w = fish_wcwidth(c);
|
|
|
|
if (w > 0) width -= w;
|
|
|
|
}
|
2021-07-17 22:35:30 +02:00
|
|
|
// Move us forward behind the escape code,
|
|
|
|
// it might include a second escape!
|
|
|
|
// E.g. SGR0 ("reset") is \e\(B\e\[m in xterm.
|
|
|
|
pos += *len - 1;
|
|
|
|
} else {
|
|
|
|
pos++;
|
2021-07-01 19:36:21 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return width;
|
|
|
|
}
|
|
|
|
|
2017-06-20 21:55:16 -07:00
|
|
|
/// This handles the `--style=xxx` flag.
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_1(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-20 21:55:16 -07:00
|
|
|
const wchar_t *cmd = argv[0];
|
|
|
|
|
|
|
|
if (opts->style_valid) {
|
2019-03-12 14:06:01 -07:00
|
|
|
if (std::wcscmp(w.woptarg, L"script") == 0) {
|
2017-06-20 21:55:16 -07:00
|
|
|
opts->escape_style = STRING_STYLE_SCRIPT;
|
2019-03-12 14:06:01 -07:00
|
|
|
} else if (std::wcscmp(w.woptarg, L"url") == 0) {
|
2017-06-20 21:55:16 -07:00
|
|
|
opts->escape_style = STRING_STYLE_URL;
|
2019-03-12 14:06:01 -07:00
|
|
|
} else if (std::wcscmp(w.woptarg, L"var") == 0) {
|
2017-06-20 21:55:16 -07:00
|
|
|
opts->escape_style = STRING_STYLE_VAR;
|
2019-03-12 14:06:01 -07:00
|
|
|
} else if (std::wcscmp(w.woptarg, L"regex") == 0) {
|
2018-11-16 20:21:05 -06:00
|
|
|
opts->escape_style = STRING_STYLE_REGEX;
|
2017-06-23 23:19:09 -07:00
|
|
|
} else {
|
2017-06-20 21:55:16 -07:00
|
|
|
string_error(streams, _(L"%ls: Invalid escape style '%ls'\n"), cmd, w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
string_unknown_option(parser, streams, cmd, argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_N(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->no_newline_valid) {
|
|
|
|
opts->no_newline = true;
|
|
|
|
return STATUS_CMD_OK;
|
2019-06-16 16:40:14 -07:00
|
|
|
} else if (opts->no_trim_newlines_valid) {
|
|
|
|
opts->no_trim_newlines = true;
|
|
|
|
return STATUS_CMD_OK;
|
2017-06-11 11:49:59 -07:00
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_a(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->all_valid) {
|
|
|
|
opts->all = true;
|
|
|
|
return STATUS_CMD_OK;
|
2020-04-18 15:25:08 +09:00
|
|
|
} else if (opts->allow_empty_valid) {
|
|
|
|
opts->allow_empty = true;
|
|
|
|
return STATUS_CMD_OK;
|
2017-06-11 11:49:59 -07:00
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_c(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2020-09-27 19:12:42 +02:00
|
|
|
if (opts->chars_to_trim_valid) {
|
2017-06-11 11:49:59 -07:00
|
|
|
opts->chars_to_trim = w.woptarg;
|
|
|
|
return STATUS_CMD_OK;
|
2020-09-27 19:12:42 +02:00
|
|
|
} else if (opts->char_to_pad_valid) {
|
|
|
|
if (wcslen(w.woptarg) != 1) {
|
2020-09-27 21:51:20 +02:00
|
|
|
string_error(streams, _(L"%ls: Padding should be a character '%ls'\n"), argv[0],
|
|
|
|
w.woptarg);
|
2020-09-27 19:12:42 +02:00
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
opts->char_to_pad = w.woptarg[0];
|
|
|
|
return STATUS_CMD_OK;
|
2017-06-11 11:49:59 -07:00
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_e(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2020-03-22 14:53:09 +00:00
|
|
|
if (opts->end_valid) {
|
|
|
|
opts->end = fish_wcstol(w.woptarg);
|
|
|
|
if (opts->end == 0 || opts->end == LONG_MIN || errno == ERANGE) {
|
|
|
|
string_error(streams, _(L"%ls: Invalid end value '%ls'\n"), argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
} else if (opts->entire_valid) {
|
2017-06-11 11:49:59 -07:00
|
|
|
opts->entire = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_f(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->filter_valid) {
|
|
|
|
opts->filter = true;
|
|
|
|
return STATUS_CMD_OK;
|
2020-03-21 01:31:23 +09:00
|
|
|
} else if (opts->fields_valid) {
|
|
|
|
for (const wcstring &s : split_string(w.woptarg, L',')) {
|
2020-03-22 01:30:00 +09:00
|
|
|
wcstring_list_t range = split_string(s, L'-');
|
|
|
|
if (range.size() == 2) {
|
2020-04-06 17:53:33 +03:00
|
|
|
int begin = fish_wcstoi(range.at(0).c_str());
|
2020-06-07 17:22:36 -05:00
|
|
|
if (begin <= 0 || errno == ERANGE) {
|
2020-03-22 01:30:00 +09:00
|
|
|
string_error(streams, _(L"%ls: Invalid range value for field '%ls'\n"), argv[0],
|
|
|
|
w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
2020-04-06 17:53:33 +03:00
|
|
|
int end = fish_wcstoi(range.at(1).c_str());
|
2020-06-07 17:22:36 -05:00
|
|
|
if (end <= 0 || errno == ERANGE) {
|
2020-03-22 01:30:00 +09:00
|
|
|
string_error(streams, _(L"%ls: Invalid range value for field '%ls'\n"), argv[0],
|
|
|
|
w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
if (begin <= end) {
|
|
|
|
for (int i = begin; i <= end; i++) {
|
|
|
|
opts->fields.push_back(i);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (int i = begin; i >= end; i--) {
|
|
|
|
opts->fields.push_back(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2020-04-06 17:53:33 +03:00
|
|
|
int field = fish_wcstoi(s.c_str());
|
2020-06-07 17:22:36 -05:00
|
|
|
if (field <= 0 || errno == ERANGE) {
|
2020-03-22 01:30:00 +09:00
|
|
|
string_error(streams, _(L"%ls: Invalid fields value '%ls'\n"), argv[0],
|
|
|
|
w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
opts->fields.push_back(field);
|
2020-03-21 01:31:23 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return STATUS_CMD_OK;
|
2017-06-11 11:49:59 -07:00
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2019-06-11 16:05:24 +02:00
|
|
|
static int handle_flag_g(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
|
|
|
const wgetopter_t &w, options_t *opts) {
|
|
|
|
if (opts->groups_only_valid) {
|
|
|
|
opts->groups_only = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_i(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->ignore_case_valid) {
|
|
|
|
opts->ignore_case = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
} else if (opts->index_valid) {
|
|
|
|
opts->index = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_l(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->length_valid) {
|
|
|
|
opts->length = fish_wcstol(w.woptarg);
|
|
|
|
if (opts->length < 0 || opts->length == LONG_MIN || errno == ERANGE) {
|
|
|
|
string_error(streams, _(L"%ls: Invalid length value '%ls'\n"), argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2017-06-11 11:49:59 -07:00
|
|
|
return STATUS_CMD_OK;
|
|
|
|
} else if (opts->left_valid) {
|
|
|
|
opts->left = true;
|
|
|
|
return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2017-06-11 11:49:59 -07:00
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_m(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->max_valid) {
|
|
|
|
opts->max = fish_wcstol(w.woptarg);
|
|
|
|
if (opts->max < 0 || errno == ERANGE) {
|
|
|
|
string_error(streams, _(L"%ls: Invalid max value '%ls'\n"), argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2017-06-11 11:49:59 -07:00
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_n(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->count_valid) {
|
|
|
|
opts->count = fish_wcstol(w.woptarg);
|
|
|
|
if (opts->count < 0 || errno == ERANGE) {
|
|
|
|
string_error(streams, _(L"%ls: Invalid count value '%ls'\n"), argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
} else if (opts->index_valid) {
|
|
|
|
opts->index = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
} else if (opts->no_quoted_valid) {
|
|
|
|
opts->no_quoted = true;
|
|
|
|
return STATUS_CMD_OK;
|
2018-03-29 08:12:08 -05:00
|
|
|
} else if (opts->no_empty_valid) {
|
|
|
|
opts->no_empty = true;
|
|
|
|
return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2017-06-11 11:49:59 -07:00
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_q(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->quiet_valid) {
|
|
|
|
opts->quiet = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_r(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->regex_valid) {
|
|
|
|
opts->regex = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
} else if (opts->right_valid) {
|
|
|
|
opts->right = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
2016-10-29 17:25:48 -07:00
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_s(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->start_valid) {
|
|
|
|
opts->start = fish_wcstol(w.woptarg);
|
|
|
|
if (opts->start == 0 || opts->start == LONG_MIN || errno == ERANGE) {
|
|
|
|
string_error(streams, _(L"%ls: Invalid start value '%ls'\n"), argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_v(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-03-14 15:06:14 -07:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->invert_valid) {
|
|
|
|
opts->invert_match = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-07-01 19:36:56 +02:00
|
|
|
static int handle_flag_V(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
|
|
|
const wgetopter_t &w, options_t *opts) {
|
|
|
|
if (opts->visible_valid) {
|
|
|
|
opts->visible = true;
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int handle_flag_w(const wchar_t **argv, parser_t &parser, io_streams_t &streams,
|
2020-09-27 19:12:42 +02:00
|
|
|
const wgetopter_t &w, options_t *opts) {
|
|
|
|
long width = 0;
|
|
|
|
if (opts->width_valid) {
|
|
|
|
width = fish_wcstol(w.woptarg);
|
|
|
|
if (width < 0) {
|
|
|
|
string_error(streams, _(L"%ls: Invalid width value '%ls'\n"), argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (errno) {
|
|
|
|
string_error(streams, BUILTIN_ERR_NOT_NUMBER, argv[0], w.woptarg);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
opts->width = static_cast<size_t>(width);
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
string_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
/// This constructs the wgetopt() short options string based on which arguments are valid for the
|
|
|
|
/// subcommand. We have to do this because many short flags have multiple meanings and may or may
|
|
|
|
/// not require an argument depending on the meaning.
|
|
|
|
static wcstring construct_short_opts(options_t *opts) { //!OCLINT(high npath complexity)
|
|
|
|
wcstring short_opts(L":");
|
|
|
|
if (opts->all_valid) short_opts.append(L"a");
|
2020-09-27 19:12:42 +02:00
|
|
|
if (opts->char_to_pad_valid) short_opts.append(L"c:");
|
|
|
|
if (opts->chars_to_trim_valid) short_opts.append(L"c:");
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->count_valid) short_opts.append(L"n:");
|
|
|
|
if (opts->entire_valid) short_opts.append(L"e");
|
|
|
|
if (opts->filter_valid) short_opts.append(L"f");
|
2019-06-11 16:05:24 +02:00
|
|
|
if (opts->groups_only_valid) short_opts.append(L"g");
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->ignore_case_valid) short_opts.append(L"i");
|
|
|
|
if (opts->index_valid) short_opts.append(L"n");
|
|
|
|
if (opts->invert_valid) short_opts.append(L"v");
|
2021-07-01 19:36:56 +02:00
|
|
|
if (opts->visible_valid) short_opts.append(L"V");
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts->left_valid) short_opts.append(L"l");
|
|
|
|
if (opts->length_valid) short_opts.append(L"l:");
|
|
|
|
if (opts->max_valid) short_opts.append(L"m:");
|
|
|
|
if (opts->no_newline_valid) short_opts.append(L"N");
|
|
|
|
if (opts->no_quoted_valid) short_opts.append(L"n");
|
|
|
|
if (opts->quiet_valid) short_opts.append(L"q");
|
|
|
|
if (opts->regex_valid) short_opts.append(L"r");
|
|
|
|
if (opts->right_valid) short_opts.append(L"r");
|
|
|
|
if (opts->start_valid) short_opts.append(L"s:");
|
2020-03-22 14:53:09 +00:00
|
|
|
if (opts->end_valid) short_opts.append(L"e:");
|
2018-03-29 08:12:08 -05:00
|
|
|
if (opts->no_empty_valid) short_opts.append(L"n");
|
2019-06-16 16:40:14 -07:00
|
|
|
if (opts->no_trim_newlines_valid) short_opts.append(L"N");
|
2020-03-21 01:31:23 +09:00
|
|
|
if (opts->fields_valid) short_opts.append(L"f:");
|
2020-04-18 15:25:08 +09:00
|
|
|
if (opts->allow_empty_valid) short_opts.append(L"a");
|
2020-09-27 19:12:42 +02:00
|
|
|
if (opts->width_valid) short_opts.append(L"w:");
|
2017-06-11 11:49:59 -07:00
|
|
|
return short_opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Note that several long flags share the same short flag. That is okay. The caller is expected
|
|
|
|
// to indicate that a max of one of the long flags sharing a short flag is valid.
|
2019-06-15 22:30:31 -07:00
|
|
|
// Remember: adjust share/completions/string.fish when `string` options change
|
2019-11-18 18:34:50 -08:00
|
|
|
static const struct woption long_options[] = {{L"all", no_argument, nullptr, 'a'},
|
|
|
|
{L"chars", required_argument, nullptr, 'c'},
|
|
|
|
{L"count", required_argument, nullptr, 'n'},
|
|
|
|
{L"entire", no_argument, nullptr, 'e'},
|
2020-03-22 14:53:09 +00:00
|
|
|
{L"end", required_argument, nullptr, 'e'},
|
2019-11-18 18:34:50 -08:00
|
|
|
{L"filter", no_argument, nullptr, 'f'},
|
2019-06-11 16:05:24 +02:00
|
|
|
{L"groups-only", no_argument, nullptr, 'g'},
|
2019-11-18 18:34:50 -08:00
|
|
|
{L"ignore-case", no_argument, nullptr, 'i'},
|
|
|
|
{L"index", no_argument, nullptr, 'n'},
|
|
|
|
{L"invert", no_argument, nullptr, 'v'},
|
2021-07-01 19:36:56 +02:00
|
|
|
{L"visible", no_argument, nullptr, 'V'},
|
2019-11-18 18:34:50 -08:00
|
|
|
{L"left", no_argument, nullptr, 'l'},
|
|
|
|
{L"length", required_argument, nullptr, 'l'},
|
|
|
|
{L"max", required_argument, nullptr, 'm'},
|
|
|
|
{L"no-empty", no_argument, nullptr, 'n'},
|
|
|
|
{L"no-newline", no_argument, nullptr, 'N'},
|
|
|
|
{L"no-quoted", no_argument, nullptr, 'n'},
|
|
|
|
{L"quiet", no_argument, nullptr, 'q'},
|
|
|
|
{L"regex", no_argument, nullptr, 'r'},
|
|
|
|
{L"right", no_argument, nullptr, 'r'},
|
|
|
|
{L"start", required_argument, nullptr, 's'},
|
|
|
|
{L"style", required_argument, nullptr, 1},
|
|
|
|
{L"no-trim-newlines", no_argument, nullptr, 'N'},
|
2020-03-21 01:31:23 +09:00
|
|
|
{L"fields", required_argument, nullptr, 'f'},
|
2020-04-18 15:25:08 +09:00
|
|
|
{L"allow-empty", no_argument, nullptr, 'a'},
|
2020-09-27 19:12:42 +02:00
|
|
|
{L"width", required_argument, nullptr, 'w'},
|
2019-11-18 18:34:50 -08:00
|
|
|
{nullptr, 0, nullptr, 0}};
|
2017-06-11 11:49:59 -07:00
|
|
|
|
2018-10-01 09:59:22 -07:00
|
|
|
static const std::unordered_map<char, decltype(*handle_flag_N)> flag_to_function = {
|
2017-06-11 11:49:59 -07:00
|
|
|
{'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e},
|
2021-08-10 06:52:51 +02:00
|
|
|
{'f', handle_flag_f}, {'g', handle_flag_g}, {'i', handle_flag_i}, {'l', handle_flag_l},
|
|
|
|
{'m', handle_flag_m}, {'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r},
|
|
|
|
{'s', handle_flag_s}, {'V', handle_flag_V}, {'v', handle_flag_v}, {'w', handle_flag_w},
|
|
|
|
{1, handle_flag_1}};
|
2016-10-29 17:25:48 -07:00
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
/// Parse the arguments for flags recognized by a specific string subcommand.
|
2021-02-13 18:41:09 -08:00
|
|
|
static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, const wchar_t **argv,
|
2017-06-11 11:49:59 -07:00
|
|
|
parser_t &parser, io_streams_t &streams) {
|
|
|
|
const wchar_t *cmd = argv[0];
|
|
|
|
wcstring short_opts = construct_short_opts(opts);
|
|
|
|
const wchar_t *short_options = short_opts.c_str();
|
2017-06-08 20:56:24 -07:00
|
|
|
int opt;
|
|
|
|
wgetopter_t w;
|
2019-11-18 18:34:50 -08:00
|
|
|
while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) {
|
2017-06-11 11:49:59 -07:00
|
|
|
auto fn = flag_to_function.find(opt);
|
|
|
|
if (fn != flag_to_function.end()) {
|
|
|
|
int retval = fn->second(argv, parser, streams, w, opts);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
|
|
|
} else if (opt == ':') {
|
2020-01-08 17:33:36 +01:00
|
|
|
streams.err.append(L"string "); // clone of string_error
|
2020-01-16 15:14:21 -08:00
|
|
|
builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1],
|
|
|
|
false /* print_hints */);
|
2017-06-11 11:49:59 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else if (opt == '?') {
|
|
|
|
string_unknown_option(parser, streams, cmd, argv[w.woptind - 1]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
} else {
|
|
|
|
DIE("unexpected retval from wgetopt_long");
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
*optind = w.woptind;
|
|
|
|
|
|
|
|
// If the caller requires one or two mandatory args deal with that here.
|
|
|
|
if (n_req_args) {
|
|
|
|
opts->arg1 = string_get_arg_argv(optind, argv);
|
2019-09-17 22:00:08 -07:00
|
|
|
if (!opts->arg1 && n_req_args == 1) {
|
|
|
|
string_error(streams, BUILTIN_ERR_ARG_COUNT0, cmd);
|
2017-06-11 11:49:59 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (n_req_args > 1) {
|
|
|
|
opts->arg2 = string_get_arg_argv(optind, argv);
|
|
|
|
if (!opts->arg2) {
|
2019-09-17 22:00:08 -07:00
|
|
|
string_error(streams, BUILTIN_ERR_MIN_ARG_COUNT1, cmd, n_req_args,
|
|
|
|
!!opts->arg2 + !!opts->arg1);
|
2017-06-11 11:49:59 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
// At this point we should not have optional args and be reading args from stdin.
|
|
|
|
if (string_args_from_stdin(streams) && argc > *optind) {
|
|
|
|
string_error(streams, BUILTIN_ERR_TOO_MANY_ARGUMENTS, cmd);
|
2017-05-02 00:39:50 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_escape(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2018-01-11 16:09:35 +01:00
|
|
|
options_t opts;
|
|
|
|
opts.no_quoted_valid = true;
|
|
|
|
opts.style_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
2017-06-20 21:55:16 -07:00
|
|
|
|
2018-01-11 16:09:35 +01:00
|
|
|
// Currently, only the script style supports options.
|
|
|
|
// Ignore them for other styles for now.
|
2017-06-20 21:55:16 -07:00
|
|
|
escape_flags_t flags = 0;
|
2018-01-11 16:09:35 +01:00
|
|
|
if (opts.escape_style == STRING_STYLE_SCRIPT) {
|
|
|
|
flags = ESCAPE_ALL;
|
|
|
|
if (opts.no_quoted) flags |= ESCAPE_NO_QUOTED;
|
2017-06-20 21:55:16 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
int nesc = 0;
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2018-01-11 16:09:35 +01:00
|
|
|
streams.out.append(escape_string(*arg, flags, opts.escape_style));
|
2017-06-11 11:49:59 -07:00
|
|
|
streams.out.append(L'\n');
|
|
|
|
nesc++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2018-01-11 16:09:35 +01:00
|
|
|
DIE("should never reach this statement");
|
2017-06-11 11:49:59 -07:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_unescape(parser_t &parser, io_streams_t &streams, int argc,
|
|
|
|
const wchar_t **argv) {
|
2018-01-11 16:09:35 +01:00
|
|
|
options_t opts;
|
|
|
|
opts.no_quoted_valid = true;
|
|
|
|
opts.style_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
2017-06-22 20:47:54 -07:00
|
|
|
int nesc = 0;
|
|
|
|
unescape_flags_t flags = 0;
|
|
|
|
|
2018-01-11 16:09:35 +01:00
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
2017-06-22 20:47:54 -07:00
|
|
|
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2017-06-22 20:47:54 -07:00
|
|
|
wcstring result;
|
2018-01-11 16:09:35 +01:00
|
|
|
if (unescape_string(*arg, &result, flags, opts.escape_style)) {
|
2017-06-22 20:47:54 -07:00
|
|
|
streams.out.append(result);
|
|
|
|
streams.out.append(L'\n');
|
|
|
|
nesc++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nesc > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
|
|
|
DIE("should never reach this statement");
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_join_maybe0(parser_t &parser, io_streams_t &streams, int argc,
|
|
|
|
const wchar_t **argv, bool is_join0) {
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
int optind;
|
2018-06-24 14:03:13 -07:00
|
|
|
int retval = parse_opts(&opts, &optind, is_join0 ? 0 : 1, argc, argv, parser, streams);
|
2017-06-11 11:49:59 -07:00
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
|
|
|
|
2018-06-24 14:03:13 -07:00
|
|
|
const wcstring sep = is_join0 ? wcstring(1, L'\0') : wcstring(opts.arg1);
|
2015-09-12 12:59:40 -07:00
|
|
|
int nargs = 0;
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2017-06-11 11:49:59 -07:00
|
|
|
if (!opts.quiet) {
|
2016-04-19 19:49:15 -07:00
|
|
|
if (nargs > 0) {
|
2015-09-21 11:24:49 -07:00
|
|
|
streams.out.append(sep);
|
|
|
|
}
|
2018-01-03 12:13:39 +01:00
|
|
|
streams.out.append(*arg);
|
2020-11-29 12:06:48 +01:00
|
|
|
} else if (nargs > 1) {
|
|
|
|
return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
nargs++;
|
|
|
|
}
|
2017-06-11 11:49:59 -07:00
|
|
|
if (nargs > 0 && !opts.quiet) {
|
2018-06-24 14:03:13 -07:00
|
|
|
streams.out.push_back(is_join0 ? L'\0' : L'\n');
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-05-04 00:18:02 -07:00
|
|
|
return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_join(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2018-06-24 14:03:13 -07:00
|
|
|
return string_join_maybe0(parser, streams, argc, argv, false /* is_join0 */);
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_join0(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2018-06-24 14:03:13 -07:00
|
|
|
return string_join_maybe0(parser, streams, argc, argv, true /* is_join0 */);
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_length(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.quiet_valid = true;
|
2021-07-01 19:36:56 +02:00
|
|
|
opts.visible_valid = true;
|
2017-06-11 11:49:59 -07:00
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
|
|
|
int nnonempty = 0;
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
Let visible length work with CR and LF
Because we are, ultimately, interested in how many cells a string
occupies, we *have* to handle carriage return (`\r`) and line
feed (`\n`).
A carriage return sets the current tally to 0, and only the longest
tally is kept. The idea here is that the last position is the same as
the last position of the longest string. So:
abcdef\r123
ends up looking like
123def
which is the same width as abcdef, 6.
A line feed meanwhile means we flush the current tally and start a new
one. Every line is printed separately, even if it's given as one.
That's because, well, counting the width over multiple lines
doesn't *help*.
As a sidenote: This is necessarily imperfect, because, while we may
know the width of the terminal ($COLUMNS), we don't know the current
cursor position. So we can only give the width, and the user can then
figure something out on their own.
But for the common case of figuring out how wide the prompt is, this
should do.
2021-07-29 20:49:14 +02:00
|
|
|
if (opts.visible) {
|
|
|
|
// Visible length only makes sense line-wise.
|
|
|
|
for (auto &line : split_string(*arg, L'\n')) {
|
|
|
|
size_t max = 0;
|
2021-08-10 06:52:51 +02:00
|
|
|
// Carriage-return returns us to the beginning. The longest substring without
|
|
|
|
// carriage-return determines the overall width.
|
Let visible length work with CR and LF
Because we are, ultimately, interested in how many cells a string
occupies, we *have* to handle carriage return (`\r`) and line
feed (`\n`).
A carriage return sets the current tally to 0, and only the longest
tally is kept. The idea here is that the last position is the same as
the last position of the longest string. So:
abcdef\r123
ends up looking like
123def
which is the same width as abcdef, 6.
A line feed meanwhile means we flush the current tally and start a new
one. Every line is printed separately, even if it's given as one.
That's because, well, counting the width over multiple lines
doesn't *help*.
As a sidenote: This is necessarily imperfect, because, while we may
know the width of the terminal ($COLUMNS), we don't know the current
cursor position. So we can only give the width, and the user can then
figure something out on their own.
But for the common case of figuring out how wide the prompt is, this
should do.
2021-07-29 20:49:14 +02:00
|
|
|
for (auto &reset : split_string(line, L'\r')) {
|
|
|
|
size_t n = width_without_escapes(reset);
|
|
|
|
if (n > max) max = n;
|
|
|
|
}
|
|
|
|
if (max > 0) {
|
|
|
|
nnonempty++;
|
|
|
|
}
|
|
|
|
if (!opts.quiet) {
|
|
|
|
streams.out.append(to_string(max));
|
|
|
|
streams.out.append(L'\n');
|
|
|
|
} else if (nnonempty > 0) {
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
size_t n = arg->length();
|
|
|
|
if (n > 0) {
|
|
|
|
nnonempty++;
|
|
|
|
}
|
|
|
|
if (!opts.quiet) {
|
|
|
|
streams.out.append(to_string(n));
|
|
|
|
streams.out.append(L'\n');
|
|
|
|
} else if (nnonempty > 0) {
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-04 00:18:02 -07:00
|
|
|
return nnonempty > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
class string_matcher_t {
|
|
|
|
protected:
|
2021-04-20 11:28:34 -07:00
|
|
|
const options_t opts;
|
2015-09-21 11:24:49 -07:00
|
|
|
io_streams_t &streams;
|
2021-04-20 11:28:34 -07:00
|
|
|
int total_matched{0};
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
public:
|
2018-02-18 18:39:03 -08:00
|
|
|
string_matcher_t(options_t opts_, io_streams_t &streams_)
|
2021-04-20 11:28:34 -07:00
|
|
|
: opts(std::move(opts_)), streams(streams_) {}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-02-18 18:44:58 -08:00
|
|
|
virtual ~string_matcher_t() = default;
|
2018-01-23 19:53:24 +01:00
|
|
|
virtual bool report_matches(const wcstring &arg) = 0;
|
2018-08-09 19:46:11 -04:00
|
|
|
int match_count() const { return total_matched; }
|
2021-04-20 11:28:34 -07:00
|
|
|
|
|
|
|
virtual bool is_valid() const = 0;
|
2021-04-20 15:15:13 -07:00
|
|
|
virtual void clear_capture_vars() {}
|
2015-09-12 12:59:40 -07:00
|
|
|
};
|
|
|
|
|
2021-04-18 16:50:28 -07:00
|
|
|
class wildcard_matcher_t final : public string_matcher_t {
|
2016-04-19 19:49:15 -07:00
|
|
|
private:
|
2015-09-12 12:59:40 -07:00
|
|
|
wcstring wcpattern;
|
2016-04-19 19:49:15 -07:00
|
|
|
|
|
|
|
public:
|
2018-01-23 19:53:24 +01:00
|
|
|
wildcard_matcher_t(const wchar_t * /*argv0*/, const wcstring &pattern, const options_t &opts,
|
2017-06-11 11:49:59 -07:00
|
|
|
io_streams_t &streams)
|
2016-04-19 19:49:15 -07:00
|
|
|
: string_matcher_t(opts, streams), wcpattern(parse_util_unescape_wildcards(pattern)) {
|
|
|
|
if (opts.ignore_case) {
|
2019-09-22 15:33:08 -07:00
|
|
|
wcpattern = wcstolower(std::move(wcpattern));
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2018-09-16 13:03:15 +02:00
|
|
|
if (opts.entire) {
|
2019-01-04 08:45:53 +01:00
|
|
|
if (!wcpattern.empty()) {
|
|
|
|
if (wcpattern.front() != ANY_STRING) wcpattern.insert(0, 1, ANY_STRING);
|
|
|
|
if (wcpattern.back() != ANY_STRING) wcpattern.push_back(ANY_STRING);
|
|
|
|
} else {
|
|
|
|
// If the pattern is empty, this becomes one ANY_STRING that matches everything.
|
|
|
|
wcpattern.push_back(ANY_STRING);
|
|
|
|
}
|
2017-04-24 21:05:51 -07:00
|
|
|
}
|
2017-06-16 21:00:24 -07:00
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-02-18 18:50:35 -08:00
|
|
|
~wildcard_matcher_t() override = default;
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2018-01-23 19:53:24 +01:00
|
|
|
bool report_matches(const wcstring &arg) override {
|
2017-06-16 21:00:24 -07:00
|
|
|
// Note: --all is a no-op for glob matching since the pattern is always matched
|
|
|
|
// against the entire argument.
|
|
|
|
bool match;
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2017-06-16 21:00:24 -07:00
|
|
|
if (opts.ignore_case) {
|
2019-09-22 15:33:08 -07:00
|
|
|
match = wildcard_match(wcstolower(arg), wcpattern, false);
|
2017-06-16 21:00:24 -07:00
|
|
|
} else {
|
|
|
|
match = wildcard_match(arg, wcpattern, false);
|
|
|
|
}
|
|
|
|
if (match ^ opts.invert_match) {
|
|
|
|
total_matched++;
|
|
|
|
|
|
|
|
if (!opts.quiet) {
|
|
|
|
if (opts.index) {
|
2018-01-23 19:53:24 +01:00
|
|
|
streams.out.append_format(L"1 %lu\n", arg.length());
|
2016-04-19 19:49:15 -07:00
|
|
|
} else {
|
2017-06-16 21:00:24 -07:00
|
|
|
streams.out.append(arg);
|
|
|
|
streams.out.append(L'\n');
|
2017-06-11 11:49:59 -07:00
|
|
|
}
|
2017-06-16 21:00:24 -07:00
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2017-06-16 21:00:24 -07:00
|
|
|
return true;
|
|
|
|
}
|
2021-04-20 11:28:34 -07:00
|
|
|
|
|
|
|
bool is_valid() const override { return true; }
|
2015-09-12 12:59:40 -07:00
|
|
|
};
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
static wcstring pcre2_strerror(int err_code) {
|
2015-09-12 16:52:38 -07:00
|
|
|
wchar_t buf[128];
|
2019-11-18 17:08:16 -08:00
|
|
|
pcre2_get_error_message(err_code, reinterpret_cast<PCRE2_UCHAR *>(buf),
|
|
|
|
sizeof(buf) / sizeof(wchar_t));
|
2015-09-12 12:59:40 -07:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
2021-07-22 10:43:25 -07:00
|
|
|
struct compiled_regex_t : noncopyable_t {
|
2021-04-20 11:28:34 -07:00
|
|
|
pcre2_code *code{nullptr};
|
|
|
|
pcre2_match_data *match{nullptr};
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2021-04-20 15:15:13 -07:00
|
|
|
// The list of named capture groups.
|
|
|
|
wcstring_list_t capture_group_names;
|
|
|
|
|
2018-01-23 19:53:24 +01:00
|
|
|
compiled_regex_t(const wchar_t *argv0, const wcstring &pattern, bool ignore_case,
|
2021-04-20 11:28:34 -07:00
|
|
|
io_streams_t &streams) {
|
2016-04-19 19:49:15 -07:00
|
|
|
// Disable some sequences that can lead to security problems.
|
2015-09-12 12:59:40 -07:00
|
|
|
uint32_t options = PCRE2_NEVER_UTF;
|
|
|
|
#if PCRE2_CODE_UNIT_WIDTH < 32
|
|
|
|
options |= PCRE2_NEVER_BACKSLASH_C;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int err_code = 0;
|
|
|
|
PCRE2_SIZE err_offset = 0;
|
|
|
|
|
2019-11-18 18:34:50 -08:00
|
|
|
code = pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
|
|
|
|
options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset,
|
|
|
|
nullptr);
|
|
|
|
if (code == nullptr) {
|
2016-04-19 19:49:15 -07:00
|
|
|
string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), argv0,
|
|
|
|
pcre2_strerror(err_code).c_str());
|
2018-01-23 19:53:24 +01:00
|
|
|
string_error(streams, L"%ls: %ls\n", argv0, pattern.c_str());
|
2015-09-21 11:24:49 -07:00
|
|
|
string_error(streams, L"%ls: %*ls\n", argv0, err_offset, L"^");
|
2015-09-12 12:59:40 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-04-20 15:15:13 -07:00
|
|
|
this->capture_group_names = get_capture_group_names(code);
|
|
|
|
if (!validate_capture_group_names(streams)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-11-18 18:34:50 -08:00
|
|
|
match = pcre2_match_data_create_from_pattern(code, nullptr);
|
2017-02-13 20:37:27 -08:00
|
|
|
assert(match);
|
2021-04-20 15:15:13 -07:00
|
|
|
this->valid_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \return the list of capture group names from \p code.
|
|
|
|
static wcstring_list_t get_capture_group_names(const pcre2_code *code) {
|
|
|
|
PCRE2_SPTR name_table;
|
|
|
|
uint32_t name_entry_size;
|
|
|
|
uint32_t name_count;
|
|
|
|
|
|
|
|
pcre2_pattern_info(code, PCRE2_INFO_NAMETABLE, &name_table);
|
|
|
|
pcre2_pattern_info(code, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
|
|
|
pcre2_pattern_info(code, PCRE2_INFO_NAMECOUNT, &name_count);
|
|
|
|
|
|
|
|
struct name_table_entry_t {
|
|
|
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
|
|
uint8_t match_index_msb;
|
|
|
|
uint8_t match_index_lsb;
|
|
|
|
#if CHAR_BIT == PCRE2_CODE_UNIT_WIDTH
|
|
|
|
char name[];
|
|
|
|
#else
|
|
|
|
char8_t name[];
|
|
|
|
#endif
|
|
|
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
|
|
uint16_t match_index;
|
|
|
|
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
|
|
|
wchar_t name[];
|
|
|
|
#else
|
|
|
|
char16_t name[];
|
|
|
|
#endif
|
|
|
|
#else
|
|
|
|
uint32_t match_index;
|
|
|
|
#if WCHAR_T_BITS == PCRE2_CODE_UNIT_WIDTH
|
|
|
|
wchar_t name[];
|
|
|
|
#else
|
|
|
|
char32_t name[];
|
|
|
|
#endif // WCHAR_T_BITS
|
|
|
|
#endif // PCRE2_CODE_UNIT_WIDTH
|
|
|
|
};
|
|
|
|
|
|
|
|
const auto *names = reinterpret_cast<const name_table_entry_t *>(name_table);
|
|
|
|
wcstring_list_t result;
|
|
|
|
result.reserve(name_count);
|
|
|
|
for (uint32_t i = 0; i < name_count; ++i) {
|
|
|
|
const auto &name_entry = names[i * name_entry_size];
|
|
|
|
result.emplace_back(name_entry.name);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if our capture group names are valid. If any are invalid then report an error to \p
|
|
|
|
/// streams. \return true if all names are valid.
|
|
|
|
bool validate_capture_group_names(io_streams_t &streams) {
|
|
|
|
for (const wcstring &name : this->capture_group_names) {
|
|
|
|
if (env_var_t::flags_for(name.c_str()) & env_var_t::flag_read_only) {
|
|
|
|
// Modification of read-only variables is not allowed
|
|
|
|
streams.err.append_format(
|
|
|
|
L"Modification of read-only variable \"%ls\" is not allowed\n", name.c_str());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
~compiled_regex_t() {
|
2020-06-01 06:02:45 +08:00
|
|
|
pcre2_match_data_free(match);
|
|
|
|
pcre2_code_free(code);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2021-04-20 11:28:34 -07:00
|
|
|
|
2021-04-20 15:15:13 -07:00
|
|
|
bool is_valid() const { return this->valid_; }
|
2021-04-20 11:28:34 -07:00
|
|
|
|
2021-04-20 15:15:13 -07:00
|
|
|
private:
|
|
|
|
bool valid_{false};
|
2015-09-12 12:59:40 -07:00
|
|
|
};
|
|
|
|
|
2021-04-18 16:50:28 -07:00
|
|
|
class pcre2_matcher_t final : public string_matcher_t {
|
2015-09-12 12:59:40 -07:00
|
|
|
const wchar_t *argv0;
|
|
|
|
compiled_regex_t regex;
|
2020-11-06 17:24:44 -06:00
|
|
|
parser_t &parser;
|
2020-12-04 18:45:08 +01:00
|
|
|
bool imported_vars = false;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2020-11-09 22:30:32 -06:00
|
|
|
enum class match_result_t {
|
|
|
|
pcre2_error = -1,
|
|
|
|
no_match = 0,
|
|
|
|
match = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
match_result_t report_match(const wcstring &arg, int pcre2_rc) {
|
2016-04-19 19:49:15 -07:00
|
|
|
if (pcre2_rc == PCRE2_ERROR_NOMATCH) {
|
|
|
|
if (opts.invert_match && !opts.quiet) {
|
2016-05-31 23:14:03 +02:00
|
|
|
if (opts.index) {
|
2018-01-23 19:53:24 +01:00
|
|
|
streams.out.append_format(L"1 %lu\n", arg.length());
|
2016-05-31 23:14:03 +02:00
|
|
|
} else {
|
|
|
|
streams.out.append(arg);
|
|
|
|
streams.out.push_back(L'\n');
|
|
|
|
}
|
2016-04-08 10:18:58 +08:00
|
|
|
}
|
|
|
|
|
2020-11-09 22:30:32 -06:00
|
|
|
return opts.invert_match ? match_result_t::match : match_result_t::no_match;
|
2016-04-19 19:49:15 -07:00
|
|
|
} else if (pcre2_rc < 0) {
|
|
|
|
string_error(streams, _(L"%ls: Regular expression match error: %ls\n"), argv0,
|
|
|
|
pcre2_strerror(pcre2_rc).c_str());
|
2020-11-09 22:30:32 -06:00
|
|
|
return match_result_t::pcre2_error;
|
2016-04-19 19:49:15 -07:00
|
|
|
} else if (pcre2_rc == 0) {
|
2015-09-12 12:59:40 -07:00
|
|
|
// The output vector wasn't big enough. Should not happen.
|
2015-09-21 11:24:49 -07:00
|
|
|
string_error(streams, _(L"%ls: Regular expression internal error\n"), argv0);
|
2020-11-09 22:30:32 -06:00
|
|
|
return match_result_t::pcre2_error;
|
2017-04-24 21:05:51 -07:00
|
|
|
} else if (opts.invert_match) {
|
2020-11-09 22:30:32 -06:00
|
|
|
return match_result_t::no_match;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2019-11-04 17:33:37 +01:00
|
|
|
if (opts.entire && !opts.quiet) {
|
2017-04-24 21:05:51 -07:00
|
|
|
streams.out.append(arg);
|
|
|
|
streams.out.push_back(L'\n');
|
|
|
|
}
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2015-09-12 12:59:40 -07:00
|
|
|
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex.match);
|
2019-06-11 16:05:24 +02:00
|
|
|
// If we have groups-only, we skip the first match, which is the full one.
|
|
|
|
for (int j = (opts.entire || opts.groups_only ? 1 : 0); j < pcre2_rc; j++) {
|
2016-04-19 19:49:15 -07:00
|
|
|
PCRE2_SIZE begin = ovector[2 * j];
|
|
|
|
PCRE2_SIZE end = ovector[2 * j + 1];
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
if (begin != PCRE2_UNSET && end != PCRE2_UNSET && !opts.quiet) {
|
|
|
|
if (opts.index) {
|
2019-11-18 17:08:16 -08:00
|
|
|
streams.out.append_format(L"%lu %lu", (begin + 1), (end - begin));
|
2017-04-24 21:05:51 -07:00
|
|
|
} else if (end > begin) {
|
|
|
|
// May have end < begin if \K is used.
|
2019-07-31 11:06:39 -07:00
|
|
|
streams.out.append(arg.substr(begin, end - begin));
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2016-04-08 10:18:58 +08:00
|
|
|
streams.out.push_back(L'\n');
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
}
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2020-11-09 22:30:32 -06:00
|
|
|
return opts.invert_match ? match_result_t::no_match : match_result_t::match;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2020-11-06 17:24:44 -06:00
|
|
|
class regex_importer_t {
|
|
|
|
private:
|
2021-04-20 15:15:13 -07:00
|
|
|
std::map<wcstring, wcstring_list_t> matches_;
|
|
|
|
env_stack_t &vars_;
|
2020-11-06 17:24:44 -06:00
|
|
|
const wcstring &haystack_;
|
|
|
|
const compiled_regex_t ®ex_;
|
2021-04-18 20:57:37 -07:00
|
|
|
const bool all_flag_;
|
2021-04-20 15:15:13 -07:00
|
|
|
bool do_import_{false};
|
2020-11-06 17:24:44 -06:00
|
|
|
|
|
|
|
public:
|
2021-04-20 15:15:13 -07:00
|
|
|
regex_importer_t(env_stack_t &vars, const wcstring &haystack, const compiled_regex_t ®ex,
|
2021-04-18 20:57:37 -07:00
|
|
|
bool all_flag)
|
2021-04-20 15:15:13 -07:00
|
|
|
: vars_(vars), haystack_(haystack), regex_(regex), all_flag_(all_flag) {
|
|
|
|
for (const wcstring &name : regex_.capture_group_names) {
|
|
|
|
matches_.emplace(name, wcstring_list_t{});
|
2020-11-06 17:24:44 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// This member function should be called each time a match is found
|
2021-04-18 16:50:28 -07:00
|
|
|
void import_vars() {
|
2021-04-20 15:15:13 -07:00
|
|
|
do_import_ = true;
|
2020-11-06 17:24:44 -06:00
|
|
|
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(regex_.match);
|
2021-04-18 16:50:28 -07:00
|
|
|
for (auto &kv : matches_) {
|
2020-11-06 17:24:44 -06:00
|
|
|
const auto &name = kv.first;
|
2021-04-18 16:50:28 -07:00
|
|
|
wcstring_list_t &vals = kv.second;
|
|
|
|
|
2020-11-06 17:24:44 -06:00
|
|
|
// A named group may actually correspond to multiple group numbers, each of which
|
|
|
|
// might have to be enumerated.
|
|
|
|
PCRE2_SPTR first = nullptr;
|
|
|
|
PCRE2_SPTR last = nullptr;
|
|
|
|
int entry_size = pcre2_substring_nametable_scan(
|
|
|
|
regex_.code, (PCRE2_SPTR)(name.c_str()), &first, &last);
|
|
|
|
if (entry_size <= 0) {
|
|
|
|
FLOGF(warning, L"PCRE2 failure retrieving named matches");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool value_found = false;
|
2021-04-18 16:50:28 -07:00
|
|
|
for (const auto *group_ptr = first; group_ptr <= last; group_ptr += entry_size) {
|
2020-11-06 17:24:44 -06:00
|
|
|
int group_num = group_ptr[0];
|
|
|
|
|
|
|
|
PCRE2_SIZE *capture = ovector + (2 * group_num);
|
|
|
|
PCRE2_SIZE begin = capture[0];
|
|
|
|
PCRE2_SIZE end = capture[1];
|
|
|
|
|
|
|
|
if (begin != PCRE2_UNSET && end != PCRE2_UNSET && end >= begin) {
|
2021-04-18 16:50:28 -07:00
|
|
|
vals.push_back(haystack_.substr(begin, end - begin));
|
2020-11-06 17:24:44 -06:00
|
|
|
value_found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there are multiple named groups and --all was used, we need to ensure that the
|
|
|
|
// indexes are always in sync between the variables. If an optional named group
|
|
|
|
// didn't match but its brethren did, we need to make sure to put *something* in the
|
|
|
|
// resulting array, and unfortunately fish doesn't support empty/null members so
|
|
|
|
// we're going to have to use an empty string as the sentinel value.
|
2021-04-18 20:57:37 -07:00
|
|
|
if (!value_found && all_flag_) {
|
2021-08-17 02:20:28 -07:00
|
|
|
vals.emplace_back();
|
2020-11-06 17:24:44 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
~regex_importer_t() {
|
2021-04-20 15:15:13 -07:00
|
|
|
if (!do_import_) return;
|
2021-04-18 16:50:28 -07:00
|
|
|
for (auto &kv : matches_) {
|
|
|
|
const wcstring &name = kv.first;
|
|
|
|
wcstring_list_t &value = kv.second;
|
2021-04-20 15:15:13 -07:00
|
|
|
vars_.set(name, ENV_DEFAULT, std::move(value));
|
2020-11-06 17:24:44 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
public:
|
2018-01-23 19:53:24 +01:00
|
|
|
pcre2_matcher_t(const wchar_t *argv0_, const wcstring &pattern, const options_t &opts,
|
2020-11-06 17:24:44 -06:00
|
|
|
io_streams_t &streams, parser_t &parser_)
|
2015-09-21 11:24:49 -07:00
|
|
|
: string_matcher_t(opts, streams),
|
2015-09-12 12:59:40 -07:00
|
|
|
argv0(argv0_),
|
2020-11-06 17:24:44 -06:00
|
|
|
regex(argv0_, pattern, opts.ignore_case, streams),
|
|
|
|
parser(parser_) {}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-02-18 18:50:35 -08:00
|
|
|
~pcre2_matcher_t() override = default;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-01-23 19:53:24 +01:00
|
|
|
bool report_matches(const wcstring &arg) override {
|
2016-04-19 19:49:15 -07:00
|
|
|
// A return value of true means all is well (even if no matches were found), false indicates
|
|
|
|
// an unrecoverable error.
|
2021-04-20 11:28:34 -07:00
|
|
|
assert(regex.code && "report_matches should only be called if the regex was valid");
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2021-04-20 15:15:13 -07:00
|
|
|
regex_importer_t var_importer(this->parser.vars(), arg, this->regex, opts.all);
|
2020-11-06 17:24:44 -06:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
// See pcre2demo.c for an explanation of this logic.
|
2018-01-23 19:53:24 +01:00
|
|
|
PCRE2_SIZE arglen = arg.length();
|
2020-11-09 22:30:32 -06:00
|
|
|
auto rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0,
|
2020-12-06 15:33:04 +01:00
|
|
|
regex.match, nullptr));
|
2021-04-20 15:15:13 -07:00
|
|
|
|
2020-12-04 18:45:08 +01:00
|
|
|
// We only import variables for the *first matching argument*
|
2021-04-20 15:15:13 -07:00
|
|
|
bool do_var_import = (rc == match_result_t::match && !imported_vars);
|
|
|
|
if (do_var_import) {
|
2021-04-18 16:50:28 -07:00
|
|
|
var_importer.import_vars();
|
2021-04-20 15:15:13 -07:00
|
|
|
imported_vars = true;
|
2020-12-04 18:45:08 +01:00
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2020-11-09 22:30:32 -06:00
|
|
|
switch (rc) {
|
|
|
|
case match_result_t::pcre2_error:
|
|
|
|
return false;
|
|
|
|
case match_result_t::no_match:
|
|
|
|
return true;
|
|
|
|
case match_result_t::match:
|
|
|
|
total_matched++;
|
|
|
|
}
|
2020-01-03 16:14:09 -08:00
|
|
|
|
|
|
|
if (opts.invert_match) return true;
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
// Report any additional matches.
|
2021-04-20 15:15:13 -07:00
|
|
|
for (auto *ovector = pcre2_get_ovector_pointer(regex.match); opts.all; total_matched++) {
|
2015-09-12 12:59:40 -07:00
|
|
|
uint32_t options = 0;
|
2016-04-19 19:49:15 -07:00
|
|
|
PCRE2_SIZE offset = ovector[1]; // start at end of previous match
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
if (ovector[0] == ovector[1]) {
|
2020-01-03 16:14:09 -08:00
|
|
|
if (ovector[0] == arglen) break;
|
2015-09-12 12:59:40 -07:00
|
|
|
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
|
|
|
}
|
|
|
|
|
2018-09-27 22:28:39 -04:00
|
|
|
rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset,
|
2019-11-18 18:34:50 -08:00
|
|
|
options, regex.match, nullptr));
|
2020-01-03 16:14:09 -08:00
|
|
|
|
2020-11-09 22:30:32 -06:00
|
|
|
if (rc == match_result_t::pcre2_error) {
|
|
|
|
// This shouldn't happen as we've already validated the regex above
|
2020-01-16 15:14:21 -08:00
|
|
|
return false;
|
2020-11-09 22:30:32 -06:00
|
|
|
}
|
|
|
|
|
2020-11-06 17:24:44 -06:00
|
|
|
// Call import_vars() before modifying the ovector
|
2021-04-20 15:15:13 -07:00
|
|
|
if (rc == match_result_t::match && do_var_import) {
|
2021-04-18 16:50:28 -07:00
|
|
|
var_importer.import_vars();
|
2020-11-06 17:24:44 -06:00
|
|
|
}
|
|
|
|
|
2020-11-09 22:30:32 -06:00
|
|
|
if (rc == match_result_t::no_match) {
|
2020-01-03 16:14:09 -08:00
|
|
|
if (options == 0 /* all matches found now */) break;
|
2015-09-12 12:59:40 -07:00
|
|
|
ovector[1] = offset + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2021-04-20 11:28:34 -07:00
|
|
|
|
2021-04-20 15:15:13 -07:00
|
|
|
/// Override to clear our capture variables if we had no match.
|
|
|
|
void clear_capture_vars() override {
|
|
|
|
assert(!imported_vars && "Should not already have imported variables");
|
|
|
|
for (const wcstring &name : regex.capture_group_names) {
|
|
|
|
parser.vars().set_empty(name, ENV_DEFAULT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-20 11:28:34 -07:00
|
|
|
bool is_valid() const override { return regex.is_valid(); }
|
2015-09-12 12:59:40 -07:00
|
|
|
};
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_match(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
|
|
|
const wchar_t *cmd = argv[0];
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.all_valid = true;
|
|
|
|
opts.entire_valid = true;
|
2019-06-11 16:05:24 +02:00
|
|
|
opts.groups_only_valid = true;
|
2017-06-11 11:49:59 -07:00
|
|
|
opts.ignore_case_valid = true;
|
|
|
|
opts.invert_valid = true;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
opts.regex_valid = true;
|
|
|
|
opts.index_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 1, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
|
|
|
const wchar_t *pattern = opts.arg1;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2017-05-01 22:19:58 -07:00
|
|
|
if (opts.entire && opts.index) {
|
2017-04-24 21:05:51 -07:00
|
|
|
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
|
2017-09-08 16:27:52 +02:00
|
|
|
_(L"--entire and --index are mutually exclusive"));
|
2017-05-02 00:39:50 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
2017-04-24 21:05:51 -07:00
|
|
|
}
|
|
|
|
|
2019-06-11 16:05:24 +02:00
|
|
|
if (opts.invert_match && opts.groups_only) {
|
|
|
|
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
|
|
|
|
_(L"--invert and --groups-only are mutually exclusive"));
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opts.entire && opts.groups_only) {
|
|
|
|
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
|
|
|
|
_(L"--entire and --groups-only are mutually exclusive"));
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2017-01-21 14:53:29 -08:00
|
|
|
std::unique_ptr<string_matcher_t> matcher;
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts.regex) {
|
2020-11-06 17:24:44 -06:00
|
|
|
matcher = make_unique<pcre2_matcher_t>(cmd, pattern, opts, streams, parser);
|
2016-04-19 19:49:15 -07:00
|
|
|
} else {
|
2017-04-24 21:05:51 -07:00
|
|
|
matcher = make_unique<wildcard_matcher_t>(cmd, pattern, opts, streams);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2021-04-20 11:28:34 -07:00
|
|
|
if (!matcher->is_valid()) {
|
|
|
|
// An error will have been printed by the constructor.
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2018-01-23 19:53:24 +01:00
|
|
|
if (!matcher->report_matches(*arg)) {
|
2017-05-02 00:39:50 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2020-11-29 12:06:48 +01:00
|
|
|
if (opts.quiet && matcher->match_count() > 0) return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2021-04-20 15:15:13 -07:00
|
|
|
if (matcher->match_count() == 0) {
|
|
|
|
matcher->clear_capture_vars();
|
|
|
|
}
|
|
|
|
|
2017-05-04 00:18:02 -07:00
|
|
|
return matcher->match_count() > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_pad(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2020-09-27 19:12:42 +02:00
|
|
|
options_t opts;
|
|
|
|
opts.char_to_pad_valid = true;
|
|
|
|
opts.right_valid = true;
|
|
|
|
opts.width_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
|
|
|
|
2020-09-27 21:51:20 +02:00
|
|
|
size_t pad_char_width = fish_wcwidth(opts.char_to_pad);
|
|
|
|
if (pad_char_width == 0) {
|
|
|
|
string_error(streams, _(L"%ls: Invalid padding character of width zero\n"), argv[0]);
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2020-09-27 19:12:42 +02:00
|
|
|
// Pad left by default
|
|
|
|
if (!opts.right) {
|
|
|
|
opts.left = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find max width of strings and keep the inputs
|
2021-03-09 18:32:15 +01:00
|
|
|
ssize_t max_width = 0;
|
2020-09-27 21:51:20 +02:00
|
|
|
std::vector<wcstring> inputs;
|
2020-09-27 19:12:42 +02:00
|
|
|
|
|
|
|
arg_iterator_t aiter_width(argv, optind, streams);
|
|
|
|
while (const wcstring *arg = aiter_width.nextstr()) {
|
|
|
|
wcstring input_string = *arg;
|
2021-07-01 19:36:21 +02:00
|
|
|
ssize_t width = width_without_escapes(input_string);
|
2020-09-27 19:12:42 +02:00
|
|
|
if (width > max_width) max_width = width;
|
2020-09-27 21:51:20 +02:00
|
|
|
inputs.push_back(std::move(input_string));
|
2020-09-27 19:12:42 +02:00
|
|
|
}
|
|
|
|
|
2021-03-09 18:32:15 +01:00
|
|
|
ssize_t pad_width = max_width > opts.width ? max_width : opts.width;
|
2020-09-27 21:51:20 +02:00
|
|
|
for (auto &input : inputs) {
|
|
|
|
wcstring padded;
|
2021-07-01 19:36:21 +02:00
|
|
|
ssize_t padded_width = width_without_escapes(input);
|
2020-09-27 19:12:42 +02:00
|
|
|
if (pad_width >= padded_width) {
|
2021-03-09 18:32:15 +01:00
|
|
|
ssize_t pad = (pad_width - padded_width) / pad_char_width;
|
|
|
|
ssize_t remaining_width = (pad_width - padded_width) % pad_char_width;
|
2020-09-27 19:12:42 +02:00
|
|
|
if (opts.left) {
|
2020-09-27 21:51:20 +02:00
|
|
|
padded.append(pad, opts.char_to_pad);
|
|
|
|
padded.append(remaining_width, L' ');
|
|
|
|
padded.append(input);
|
2020-09-27 19:12:42 +02:00
|
|
|
}
|
|
|
|
if (opts.right) {
|
2020-09-27 21:51:20 +02:00
|
|
|
padded.append(input);
|
|
|
|
padded.append(remaining_width, L' ');
|
2020-09-27 19:12:42 +02:00
|
|
|
padded.append(pad, opts.char_to_pad);
|
|
|
|
}
|
|
|
|
}
|
2020-09-27 21:51:20 +02:00
|
|
|
padded.push_back(L'\n');
|
2020-09-27 19:12:42 +02:00
|
|
|
streams.out.append(padded);
|
|
|
|
}
|
|
|
|
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
class string_replacer_t {
|
|
|
|
protected:
|
2015-09-12 12:59:40 -07:00
|
|
|
const wchar_t *argv0;
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
2015-09-12 12:59:40 -07:00
|
|
|
int total_replaced;
|
2015-09-21 11:24:49 -07:00
|
|
|
io_streams_t &streams;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
public:
|
2018-02-18 18:39:03 -08:00
|
|
|
string_replacer_t(const wchar_t *argv0_, options_t opts_, io_streams_t &streams_)
|
2020-03-13 13:45:41 -07:00
|
|
|
: argv0(argv0_), opts(std::move(opts_)), total_replaced(0), streams(streams_) {}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-02-18 18:44:58 -08:00
|
|
|
virtual ~string_replacer_t() = default;
|
2018-08-09 19:46:11 -04:00
|
|
|
int replace_count() const { return total_replaced; }
|
2018-01-23 19:53:24 +01:00
|
|
|
virtual bool replace_matches(const wcstring &arg) = 0;
|
2015-09-12 12:59:40 -07:00
|
|
|
};
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
class literal_replacer_t : public string_replacer_t {
|
2018-01-23 19:53:24 +01:00
|
|
|
const wcstring pattern;
|
|
|
|
const wcstring replacement;
|
2015-09-12 16:43:51 -07:00
|
|
|
size_t patlen;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
public:
|
2019-12-21 13:50:58 -08:00
|
|
|
literal_replacer_t(const wchar_t *argv0, wcstring pattern_, const wchar_t *replacement_,
|
2017-06-11 11:49:59 -07:00
|
|
|
const options_t &opts, io_streams_t &streams)
|
2015-09-21 11:24:49 -07:00
|
|
|
: string_replacer_t(argv0, opts, streams),
|
2019-12-21 13:50:58 -08:00
|
|
|
pattern(std::move(pattern_)),
|
2016-04-19 19:49:15 -07:00
|
|
|
replacement(replacement_),
|
2018-01-23 19:53:24 +01:00
|
|
|
patlen(pattern.length()) {}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-02-18 18:50:35 -08:00
|
|
|
~literal_replacer_t() override = default;
|
2018-01-23 19:53:24 +01:00
|
|
|
bool replace_matches(const wcstring &arg) override;
|
2015-09-12 12:59:40 -07:00
|
|
|
};
|
|
|
|
|
2018-09-27 22:22:55 -04:00
|
|
|
static maybe_t<wcstring> interpret_escapes(const wcstring &arg) {
|
2017-06-08 20:56:24 -07:00
|
|
|
wcstring result;
|
2018-08-26 00:43:40 -07:00
|
|
|
result.reserve(arg.size());
|
|
|
|
const wchar_t *cursor = arg.c_str();
|
|
|
|
const wchar_t *end = cursor + arg.size();
|
|
|
|
while (cursor < end) {
|
|
|
|
if (*cursor == L'\\') {
|
2018-09-27 22:22:55 -04:00
|
|
|
if (auto escape_len = read_unquoted_escape(cursor, &result, true, false)) {
|
|
|
|
cursor += *escape_len;
|
|
|
|
} else {
|
|
|
|
// Invalid escape.
|
|
|
|
return none();
|
|
|
|
}
|
2017-06-08 20:56:24 -07:00
|
|
|
} else {
|
2018-08-26 00:43:40 -07:00
|
|
|
result.push_back(*cursor);
|
|
|
|
cursor++;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
}
|
2017-06-08 20:56:24 -07:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
class regex_replacer_t : public string_replacer_t {
|
|
|
|
compiled_regex_t regex;
|
2018-09-27 22:22:55 -04:00
|
|
|
maybe_t<wcstring> replacement;
|
2017-06-08 20:56:24 -07:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
public:
|
2018-01-23 19:53:24 +01:00
|
|
|
regex_replacer_t(const wchar_t *argv0, const wcstring &pattern, const wcstring &replacement_,
|
2017-06-11 11:49:59 -07:00
|
|
|
const options_t &opts, io_streams_t &streams)
|
2015-09-21 11:24:49 -07:00
|
|
|
: string_replacer_t(argv0, opts, streams),
|
2019-01-12 20:20:35 +01:00
|
|
|
regex(argv0, pattern, opts.ignore_case, streams) {
|
|
|
|
if (feature_test(features_t::string_replace_backslash)) {
|
|
|
|
replacement = replacement_;
|
|
|
|
} else {
|
|
|
|
replacement = interpret_escapes(replacement_);
|
|
|
|
}
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2018-01-23 19:53:24 +01:00
|
|
|
bool replace_matches(const wcstring &arg) override;
|
2016-11-01 20:00:09 -07:00
|
|
|
};
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-11-01 20:00:09 -07:00
|
|
|
/// A return value of true means all is well (even if no replacements were performed), false
|
|
|
|
/// indicates an unrecoverable error.
|
2018-01-23 19:53:24 +01:00
|
|
|
bool literal_replacer_t::replace_matches(const wcstring &arg) {
|
2017-04-27 21:53:39 -07:00
|
|
|
wcstring result;
|
|
|
|
bool replacement_occurred = false;
|
|
|
|
|
|
|
|
if (patlen == 0) {
|
|
|
|
replacement_occurred = true;
|
|
|
|
result = arg;
|
|
|
|
} else {
|
2019-03-12 14:06:01 -07:00
|
|
|
auto &cmp_func = opts.ignore_case ? wcsncasecmp : std::wcsncmp;
|
2018-01-23 19:53:24 +01:00
|
|
|
const wchar_t *cur = arg.c_str();
|
2018-08-26 00:43:40 -07:00
|
|
|
const wchar_t *end = cur + arg.size();
|
|
|
|
while (cur < end) {
|
2018-09-27 22:28:39 -04:00
|
|
|
if ((opts.all || !replacement_occurred) &&
|
|
|
|
cmp_func(cur, pattern.c_str(), patlen) == 0) {
|
2017-04-27 21:53:39 -07:00
|
|
|
result += replacement;
|
|
|
|
cur += patlen;
|
|
|
|
replacement_occurred = true;
|
|
|
|
total_replaced++;
|
|
|
|
} else {
|
2018-08-26 00:43:40 -07:00
|
|
|
result.push_back(*cur);
|
2017-04-27 21:53:39 -07:00
|
|
|
cur++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
|
|
|
|
streams.out.append(result);
|
|
|
|
streams.out.append(L'\n');
|
2016-11-01 20:00:09 -07:00
|
|
|
}
|
|
|
|
|
2017-04-27 21:53:39 -07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A return value of true means all is well (even if no replacements were performed), false
|
|
|
|
/// indicates an unrecoverable error.
|
2018-01-23 19:53:24 +01:00
|
|
|
bool regex_replacer_t::replace_matches(const wcstring &arg) {
|
2018-09-27 22:28:39 -04:00
|
|
|
if (!regex.code) return false; // pcre2_compile() failed
|
2018-09-27 22:22:55 -04:00
|
|
|
if (!replacement) return false; // replacement was an invalid string
|
2017-04-27 21:53:39 -07:00
|
|
|
|
2020-09-27 21:33:23 +02:00
|
|
|
// clang-format off
|
2020-09-20 10:33:04 +02:00
|
|
|
// SUBSTITUTE_OVERFLOW_LENGTH causes pcre to return the needed buffer length if the passed one is to small
|
|
|
|
// SUBSTITUTE_EXTENDED changes how substitution expressions are interpreted (`$` as the special character)
|
|
|
|
// SUBSTITUTE_UNSET_EMPTY treats unmatched capturing groups as empty instead of erroring.
|
|
|
|
// SUBSTITUTE_GLOBAL means more than one substitution happens.
|
2020-09-27 21:33:23 +02:00
|
|
|
// clang-format on
|
2016-11-01 20:00:09 -07:00
|
|
|
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED |
|
2020-09-27 21:33:23 +02:00
|
|
|
PCRE2_SUBSTITUTE_UNSET_EMPTY | (opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0);
|
2018-01-23 19:53:24 +01:00
|
|
|
size_t arglen = arg.length();
|
2016-11-01 20:00:09 -07:00
|
|
|
PCRE2_SIZE bufsize = (arglen == 0) ? 16 : 2 * arglen;
|
2020-04-02 16:04:04 -07:00
|
|
|
auto output = static_cast<wchar_t *>(malloc(sizeof(wchar_t) * bufsize));
|
2016-11-01 20:00:09 -07:00
|
|
|
int pcre2_rc;
|
2018-01-23 19:53:24 +01:00
|
|
|
PCRE2_SIZE outlen = bufsize;
|
2016-11-01 20:00:09 -07:00
|
|
|
|
|
|
|
bool done = false;
|
|
|
|
while (!done) {
|
2017-02-13 20:37:27 -08:00
|
|
|
assert(output);
|
|
|
|
|
2018-01-23 19:53:24 +01:00
|
|
|
pcre2_rc = pcre2_substitute(regex.code, PCRE2_SPTR(arg.c_str()), arglen,
|
2016-11-01 20:00:09 -07:00
|
|
|
0, // start offset
|
|
|
|
options, regex.match,
|
2020-01-03 15:33:44 -08:00
|
|
|
nullptr, // match_data
|
2018-09-27 22:22:55 -04:00
|
|
|
PCRE2_SPTR(replacement->c_str()), replacement->length(),
|
2019-11-18 17:08:16 -08:00
|
|
|
reinterpret_cast<PCRE2_UCHAR *>(output), &outlen);
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-11-01 20:00:09 -07:00
|
|
|
if (pcre2_rc != PCRE2_ERROR_NOMEMORY || bufsize >= outlen) {
|
|
|
|
done = true;
|
2016-04-19 19:49:15 -07:00
|
|
|
} else {
|
2016-11-01 20:00:09 -07:00
|
|
|
bufsize = outlen;
|
2020-04-02 16:04:04 -07:00
|
|
|
auto new_output = static_cast<wchar_t *>(realloc(output, sizeof(wchar_t) * bufsize));
|
2016-11-14 21:31:51 -08:00
|
|
|
if (new_output) output = new_output;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2016-11-01 20:00:09 -07:00
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-11-01 20:00:09 -07:00
|
|
|
bool rc = true;
|
|
|
|
if (pcre2_rc < 0) {
|
|
|
|
string_error(streams, _(L"%ls: Regular expression substitute error: %ls\n"), argv0,
|
|
|
|
pcre2_strerror(pcre2_rc).c_str());
|
|
|
|
rc = false;
|
|
|
|
} else {
|
2019-03-14 15:12:14 -07:00
|
|
|
wcstring outstr(output, outlen);
|
2017-04-27 21:53:39 -07:00
|
|
|
bool replacement_occurred = pcre2_rc > 0;
|
|
|
|
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
|
2018-01-23 19:53:24 +01:00
|
|
|
streams.out.append(outstr);
|
2016-11-01 20:00:09 -07:00
|
|
|
streams.out.append(L'\n');
|
|
|
|
}
|
|
|
|
total_replaced += pcre2_rc;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2016-11-01 20:00:09 -07:00
|
|
|
|
|
|
|
free(output);
|
|
|
|
return rc;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_replace(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.all_valid = true;
|
|
|
|
opts.filter_valid = true;
|
|
|
|
opts.ignore_case_valid = true;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
opts.regex_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 2, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
|
|
|
|
|
|
|
const wchar_t *pattern = opts.arg1;
|
|
|
|
const wchar_t *replacement = opts.arg2;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2017-01-21 15:47:12 -08:00
|
|
|
std::unique_ptr<string_replacer_t> replacer;
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts.regex) {
|
2017-01-21 15:47:12 -08:00
|
|
|
replacer = make_unique<regex_replacer_t>(argv[0], pattern, replacement, opts, streams);
|
2016-04-19 19:49:15 -07:00
|
|
|
} else {
|
2017-01-21 15:47:12 -08:00
|
|
|
replacer = make_unique<literal_replacer_t>(argv[0], pattern, replacement, opts, streams);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2018-01-23 19:53:24 +01:00
|
|
|
if (!replacer->replace_matches(*arg)) return STATUS_INVALID_ARGS;
|
2020-11-29 12:06:48 +01:00
|
|
|
if (opts.quiet && replacer->replace_count() > 0) return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-05-04 00:18:02 -07:00
|
|
|
return replacer->replace_count() > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc,
|
|
|
|
const wchar_t **argv, bool is_split0) {
|
|
|
|
const wchar_t *cmd = argv[0];
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
opts.right_valid = true;
|
|
|
|
opts.max_valid = true;
|
|
|
|
opts.max = LONG_MAX;
|
2018-03-29 08:12:08 -05:00
|
|
|
opts.no_empty_valid = true;
|
2020-03-21 01:31:23 +09:00
|
|
|
opts.fields_valid = true;
|
2020-04-18 15:25:08 +09:00
|
|
|
opts.allow_empty_valid = true;
|
2017-06-11 11:49:59 -07:00
|
|
|
int optind;
|
2018-05-29 21:11:50 -07:00
|
|
|
int retval = parse_opts(&opts, &optind, is_split0 ? 0 : 1, argc, argv, parser, streams);
|
2017-06-11 11:49:59 -07:00
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
|
|
|
|
2020-04-18 15:25:08 +09:00
|
|
|
if (opts.fields.size() < 1 && opts.allow_empty) {
|
2020-04-21 19:24:33 +02:00
|
|
|
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
|
2020-04-18 15:25:08 +09:00
|
|
|
_(L"--allow-empty is only valid with --fields"));
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2018-05-29 21:11:50 -07:00
|
|
|
const wcstring sep = is_split0 ? wcstring(1, L'\0') : wcstring(opts.arg1);
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2020-04-18 13:03:54 +09:00
|
|
|
std::vector<wcstring_list_t> all_splits;
|
|
|
|
size_t split_count = 0;
|
2015-09-13 01:11:49 -07:00
|
|
|
size_t arg_count = 0;
|
2019-02-26 19:50:53 +01:00
|
|
|
arg_iterator_t aiter(argv, optind, streams, !is_split0);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2020-04-18 13:03:54 +09:00
|
|
|
wcstring_list_t splits;
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts.right) {
|
2018-09-27 22:28:39 -04:00
|
|
|
split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max,
|
|
|
|
opts.no_empty);
|
2016-04-19 19:49:15 -07:00
|
|
|
} else {
|
2018-09-27 22:28:39 -04:00
|
|
|
split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max,
|
|
|
|
opts.no_empty);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2020-04-18 13:03:54 +09:00
|
|
|
all_splits.push_back(splits);
|
2020-11-29 12:06:48 +01:00
|
|
|
// If we're quiet, we return early if we've found something to split.
|
|
|
|
if (opts.quiet && splits.size() > 1) return STATUS_CMD_OK;
|
2020-04-18 13:03:54 +09:00
|
|
|
split_count += splits.size();
|
2015-09-13 01:11:49 -07:00
|
|
|
arg_count++;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2016-04-19 19:49:15 -07:00
|
|
|
|
2020-04-18 13:03:54 +09:00
|
|
|
for (auto &splits : all_splits) {
|
|
|
|
// If we are from the right, split_about gave us reversed strings, in reversed order!
|
|
|
|
if (opts.right) {
|
|
|
|
for (auto &split : splits) {
|
|
|
|
std::reverse(split.begin(), split.end());
|
|
|
|
}
|
|
|
|
std::reverse(splits.begin(), splits.end());
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2016-04-19 19:49:15 -07:00
|
|
|
|
2020-04-18 13:03:54 +09:00
|
|
|
if (!opts.quiet) {
|
|
|
|
if (is_split0 && !splits.empty()) {
|
|
|
|
// split0 ignores a trailing \0, so a\0b\0 is two elements.
|
|
|
|
// In contrast to split, where a\nb\n is three - "a", "b" and "".
|
|
|
|
//
|
|
|
|
// Remove the last element if it is empty.
|
|
|
|
if (splits.back().empty()) splits.pop_back();
|
2020-03-24 23:25:37 +09:00
|
|
|
}
|
2020-04-18 13:03:54 +09:00
|
|
|
if (opts.fields.size() > 0) {
|
2020-04-18 15:25:08 +09:00
|
|
|
// Print nothing and return error if any of the supplied
|
|
|
|
// fields do not exist, unless `--allow-empty` is used.
|
|
|
|
if (!opts.allow_empty) {
|
|
|
|
for (const auto &field : opts.fields) {
|
|
|
|
// field indexing starts from 1
|
|
|
|
if (field - 1 >= (long)splits.size()) {
|
|
|
|
return STATUS_CMD_ERROR;
|
|
|
|
}
|
2020-04-18 13:03:54 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for (const auto &field : opts.fields) {
|
2020-04-18 15:25:08 +09:00
|
|
|
if (field - 1 < (long)splits.size()) {
|
2020-07-29 16:03:29 -07:00
|
|
|
streams.out.append_with_separation(splits.at(field - 1),
|
|
|
|
separation_type_t::explicitly);
|
2020-04-18 15:25:08 +09:00
|
|
|
}
|
2020-04-18 13:03:54 +09:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (const wcstring &split : splits) {
|
2020-07-29 16:03:29 -07:00
|
|
|
streams.out.append_with_separation(split, separation_type_t::explicitly);
|
2020-04-18 13:03:54 +09:00
|
|
|
}
|
2020-03-21 01:31:23 +09:00
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
}
|
2016-04-19 19:49:15 -07:00
|
|
|
// We split something if we have more split values than args.
|
2018-05-29 21:11:50 -07:00
|
|
|
return split_count > arg_count ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_split(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2018-05-29 21:11:50 -07:00
|
|
|
return string_split_maybe0(parser, streams, argc, argv, false /* is_split0 */);
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_split0(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2018-05-29 21:11:50 -07:00
|
|
|
return string_split_maybe0(parser, streams, argc, argv, true /* is_split0 */);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_collect(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2019-06-15 22:30:31 -07:00
|
|
|
options_t opts;
|
2021-07-09 21:20:58 +02:00
|
|
|
opts.allow_empty_valid = true;
|
2019-06-16 16:40:14 -07:00
|
|
|
opts.no_trim_newlines_valid = true;
|
2019-06-15 22:30:31 -07:00
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
|
|
|
|
|
|
|
arg_iterator_t aiter(argv, optind, streams, /* don't split */ false);
|
2020-07-29 16:03:29 -07:00
|
|
|
size_t appended = 0;
|
2019-06-15 22:30:31 -07:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2020-07-29 16:03:29 -07:00
|
|
|
const wchar_t *s = arg->c_str();
|
|
|
|
size_t len = arg->size();
|
2019-06-16 16:40:14 -07:00
|
|
|
if (!opts.no_trim_newlines) {
|
2020-07-29 16:03:29 -07:00
|
|
|
while (len > 0 && s[len - 1] == L'\n') {
|
|
|
|
len -= 1;
|
2019-06-16 16:40:14 -07:00
|
|
|
}
|
2019-06-15 22:30:31 -07:00
|
|
|
}
|
2020-07-29 16:03:29 -07:00
|
|
|
streams.out.append_with_separation(s, len, separation_type_t::explicitly);
|
|
|
|
appended += len;
|
2019-06-15 22:30:31 -07:00
|
|
|
}
|
|
|
|
|
2021-07-09 21:20:58 +02:00
|
|
|
// If we haven't printed anything and "no_empty" is set,
|
|
|
|
// print something empty. Helps with empty ellision:
|
|
|
|
// echo (true | string collect --allow-empty)"bar"
|
|
|
|
// prints "bar".
|
|
|
|
if (opts.allow_empty && appended == 0) {
|
|
|
|
streams.out.append_with_separation(L"", 0, separation_type_t::explicitly);
|
|
|
|
}
|
|
|
|
|
2020-07-29 16:03:29 -07:00
|
|
|
return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2019-06-15 22:30:31 -07:00
|
|
|
}
|
|
|
|
|
2017-03-07 15:39:21 +01:00
|
|
|
// Helper function to abstract the repeat logic from string_repeat
|
|
|
|
// returns the to_repeat string, repeated count times.
|
|
|
|
static wcstring wcsrepeat(const wcstring &to_repeat, size_t count) {
|
|
|
|
wcstring repeated;
|
|
|
|
repeated.reserve(to_repeat.length() * count);
|
|
|
|
|
|
|
|
for (size_t j = 0; j < count; j++) {
|
|
|
|
repeated += to_repeat;
|
|
|
|
}
|
|
|
|
|
|
|
|
return repeated;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Helper function to abstract the repeat until logic from string_repeat
|
|
|
|
// returns the to_repeat string, repeated until max char has been reached.
|
|
|
|
static wcstring wcsrepeat_until(const wcstring &to_repeat, size_t max) {
|
2018-05-13 19:26:51 +02:00
|
|
|
if (to_repeat.length() == 0) return wcstring();
|
2017-03-07 15:39:21 +01:00
|
|
|
size_t count = max / to_repeat.length();
|
|
|
|
size_t mod = max % to_repeat.length();
|
|
|
|
|
|
|
|
return wcsrepeat(to_repeat, count) + to_repeat.substr(0, mod);
|
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.count_valid = true;
|
|
|
|
opts.max_valid = true;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
opts.no_newline_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
2017-03-07 15:39:21 +01:00
|
|
|
|
2021-01-11 15:36:54 -08:00
|
|
|
bool all_empty = true;
|
2021-01-11 16:30:33 -08:00
|
|
|
bool first = true;
|
2017-03-07 15:39:21 +01:00
|
|
|
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2021-01-11 16:30:33 -08:00
|
|
|
while (const wcstring *word = aiter.nextstr()) {
|
|
|
|
if (!first && !opts.quiet) {
|
|
|
|
streams.out.append(L'\n');
|
|
|
|
}
|
|
|
|
first = false;
|
2017-06-11 11:49:59 -07:00
|
|
|
const bool limit_repeat =
|
2019-11-18 17:08:16 -08:00
|
|
|
(opts.max > 0 && word->length() * opts.count > static_cast<size_t>(opts.max)) ||
|
|
|
|
!opts.count;
|
2017-06-11 11:49:59 -07:00
|
|
|
const wcstring repeated =
|
2018-05-13 19:26:51 +02:00
|
|
|
limit_repeat ? wcsrepeat_until(*word, opts.max) : wcsrepeat(*word, opts.count);
|
2021-01-11 15:36:54 -08:00
|
|
|
if (!repeated.empty()) {
|
|
|
|
all_empty = false;
|
|
|
|
if (opts.quiet) {
|
|
|
|
// Early out if we can - see #7495.
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
|
|
|
}
|
2021-01-11 16:30:33 -08:00
|
|
|
|
|
|
|
// Append if not quiet.
|
2021-01-11 15:36:54 -08:00
|
|
|
if (!opts.quiet) {
|
2017-03-07 15:39:21 +01:00
|
|
|
streams.out.append(repeated);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-11 15:36:54 -08:00
|
|
|
// Historical behavior is to never append a newline if all strings were empty.
|
|
|
|
if (!opts.quiet && !opts.no_newline && !all_empty) {
|
|
|
|
streams.out.append(L'\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
return all_empty ? STATUS_CMD_ERROR : STATUS_CMD_OK;
|
2017-03-07 15:39:21 +01:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_sub(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
|
|
|
const wchar_t *cmd = argv[0];
|
2020-03-22 14:53:09 +00:00
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.length_valid = true;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
opts.start_valid = true;
|
2020-03-22 14:53:09 +00:00
|
|
|
opts.end_valid = true;
|
2017-06-11 11:49:59 -07:00
|
|
|
opts.length = -1;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2020-03-22 14:53:09 +00:00
|
|
|
if (opts.length != -1 && opts.end != 0) {
|
|
|
|
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
|
|
|
|
_(L"--end and --length are mutually exclusive"));
|
|
|
|
return STATUS_INVALID_ARGS;
|
|
|
|
}
|
|
|
|
|
2015-09-12 12:59:40 -07:00
|
|
|
int nsub = 0;
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *s = aiter.nextstr()) {
|
2019-11-25 16:56:39 -08:00
|
|
|
using size_type = wcstring::size_type;
|
2015-09-13 02:15:37 -07:00
|
|
|
size_type pos = 0;
|
|
|
|
size_type count = wcstring::npos;
|
2020-03-22 14:53:09 +00:00
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts.start > 0) {
|
|
|
|
pos = static_cast<size_type>(opts.start - 1);
|
|
|
|
} else if (opts.start < 0) {
|
|
|
|
assert(opts.start != LONG_MIN); // checked above
|
2020-04-02 16:04:04 -07:00
|
|
|
auto n = static_cast<size_type>(-opts.start);
|
2018-01-11 15:47:44 +01:00
|
|
|
pos = n > s->length() ? 0 : s->length() - n;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2020-03-22 14:53:09 +00:00
|
|
|
|
2018-01-11 15:47:44 +01:00
|
|
|
if (pos > s->length()) {
|
|
|
|
pos = s->length();
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts.length >= 0) {
|
|
|
|
count = static_cast<size_type>(opts.length);
|
2020-03-22 14:53:09 +00:00
|
|
|
} else if (opts.end != 0) {
|
|
|
|
size_type n;
|
|
|
|
if (opts.end > 0) {
|
|
|
|
n = static_cast<size_type>(opts.end);
|
|
|
|
} else {
|
|
|
|
assert(opts.end != LONG_MIN); // checked above
|
|
|
|
n = static_cast<size_type>(-opts.end);
|
|
|
|
n = n > s->length() ? 0 : s->length() - n;
|
|
|
|
}
|
|
|
|
count = n < pos ? 0 : n - pos;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
// Note that std::string permits count to extend past end of string.
|
2017-06-11 11:49:59 -07:00
|
|
|
if (!opts.quiet) {
|
2018-01-11 15:47:44 +01:00
|
|
|
streams.out.append(s->substr(pos, count));
|
2015-09-21 11:24:49 -07:00
|
|
|
streams.out.append(L'\n');
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
nsub++;
|
2020-11-29 12:06:48 +01:00
|
|
|
if (opts.quiet) return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2017-05-04 00:18:02 -07:00
|
|
|
return nsub > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_trim(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
2020-09-27 19:12:42 +02:00
|
|
|
opts.chars_to_trim_valid = true;
|
2017-06-11 11:49:59 -07:00
|
|
|
opts.left_valid = true;
|
|
|
|
opts.right_valid = true;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
2016-04-08 10:18:58 +08:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
// If neither left or right is specified, we do both.
|
2017-06-11 11:49:59 -07:00
|
|
|
if (!opts.left && !opts.right) {
|
|
|
|
opts.left = opts.right = true;
|
2015-09-12 17:15:08 -07:00
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2015-09-12 17:15:08 -07:00
|
|
|
size_t ntrim = 0;
|
2016-04-19 19:49:15 -07:00
|
|
|
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2016-04-19 19:49:15 -07:00
|
|
|
// Begin and end are respectively the first character to keep on the left, and first
|
|
|
|
// character to trim on the right. The length is thus end - start.
|
2018-01-11 16:03:52 +01:00
|
|
|
size_t begin = 0, end = arg->size();
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts.right) {
|
2018-01-11 16:03:52 +01:00
|
|
|
size_t last_to_keep = arg->find_last_not_of(opts.chars_to_trim);
|
2015-09-12 17:15:08 -07:00
|
|
|
end = (last_to_keep == wcstring::npos) ? 0 : last_to_keep + 1;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2017-06-11 11:49:59 -07:00
|
|
|
if (opts.left) {
|
2018-01-11 16:03:52 +01:00
|
|
|
size_t first_to_keep = arg->find_first_not_of(opts.chars_to_trim);
|
2015-09-12 17:15:08 -07:00
|
|
|
begin = (first_to_keep == wcstring::npos ? end : first_to_keep);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
2018-01-11 16:03:52 +01:00
|
|
|
assert(begin <= end && end <= arg->size());
|
|
|
|
ntrim += arg->size() - (end - begin);
|
2017-06-11 11:49:59 -07:00
|
|
|
if (!opts.quiet) {
|
2018-01-11 16:03:52 +01:00
|
|
|
streams.out.append(wcstring(*arg, begin, end - begin));
|
2015-09-21 11:24:49 -07:00
|
|
|
streams.out.append(L'\n');
|
2020-11-29 12:06:48 +01:00
|
|
|
} else if (ntrim > 0) {
|
|
|
|
return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-04 00:18:02 -07:00
|
|
|
return ntrim > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2018-01-11 16:04:28 +01:00
|
|
|
// A helper function for lower and upper.
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_transform(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv,
|
2018-09-27 22:28:39 -04:00
|
|
|
std::wint_t (*func)(std::wint_t)) {
|
2017-06-11 11:49:59 -07:00
|
|
|
options_t opts;
|
|
|
|
opts.quiet_valid = true;
|
|
|
|
int optind;
|
|
|
|
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
|
|
|
|
if (retval != STATUS_CMD_OK) return retval;
|
2017-06-10 17:35:25 -07:00
|
|
|
|
|
|
|
int n_transformed = 0;
|
2017-12-21 12:42:57 -08:00
|
|
|
arg_iterator_t aiter(argv, optind, streams);
|
2018-05-28 12:05:47 +02:00
|
|
|
while (const wcstring *arg = aiter.nextstr()) {
|
2018-01-11 16:04:28 +01:00
|
|
|
wcstring transformed(*arg);
|
|
|
|
std::transform(transformed.begin(), transformed.end(), transformed.begin(), func);
|
|
|
|
if (transformed != *arg) n_transformed++;
|
2017-06-11 11:49:59 -07:00
|
|
|
if (!opts.quiet) {
|
2017-06-10 17:35:25 -07:00
|
|
|
streams.out.append(transformed);
|
|
|
|
streams.out.append(L'\n');
|
2020-11-29 12:06:48 +01:00
|
|
|
} else if (n_transformed > 0) {
|
|
|
|
return STATUS_CMD_OK;
|
2017-06-10 17:35:25 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return n_transformed > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
|
|
|
|
}
|
|
|
|
|
2018-01-11 16:04:28 +01:00
|
|
|
/// Implementation of `string lower`.
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_lower(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2018-01-11 16:04:28 +01:00
|
|
|
return string_transform(parser, streams, argc, argv, std::towlower);
|
|
|
|
}
|
|
|
|
|
2017-06-11 11:49:59 -07:00
|
|
|
/// Implementation of `string upper`.
|
2021-02-13 18:41:09 -08:00
|
|
|
static int string_upper(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) {
|
2018-01-11 16:04:28 +01:00
|
|
|
return string_transform(parser, streams, argc, argv, std::towupper);
|
2017-06-10 17:35:25 -07:00
|
|
|
}
|
|
|
|
|
2020-09-27 21:33:23 +02:00
|
|
|
// Keep sorted alphabetically
|
2021-02-08 15:31:49 -06:00
|
|
|
static constexpr const struct string_subcommand {
|
2015-09-12 12:59:40 -07:00
|
|
|
const wchar_t *name;
|
2016-11-01 19:12:14 -07:00
|
|
|
int (*handler)(parser_t &, io_streams_t &, int argc, //!OCLINT(unused param)
|
2021-02-13 18:41:09 -08:00
|
|
|
const wchar_t **argv); //!OCLINT(unused param)
|
2020-09-27 21:33:23 +02:00
|
|
|
} string_subcommands[] = {
|
|
|
|
{L"collect", &string_collect}, {L"escape", &string_escape}, {L"join", &string_join},
|
|
|
|
{L"join0", &string_join0}, {L"length", &string_length}, {L"lower", &string_lower},
|
|
|
|
{L"match", &string_match}, {L"pad", &string_pad}, {L"repeat", &string_repeat},
|
|
|
|
{L"replace", &string_replace}, {L"split", &string_split}, {L"split0", &string_split0},
|
|
|
|
{L"sub", &string_sub}, {L"trim", &string_trim}, {L"unescape", &string_unescape},
|
|
|
|
{L"upper", &string_upper},
|
|
|
|
};
|
2021-07-15 13:15:24 -07:00
|
|
|
ASSERT_SORTED_BY_NAME(string_subcommands);
|
2015-09-12 12:59:40 -07:00
|
|
|
|
2016-04-19 19:49:15 -07:00
|
|
|
/// The string builtin, for manipulating strings.
|
2021-02-13 18:41:09 -08:00
|
|
|
maybe_t<int> builtin_string(parser_t &parser, io_streams_t &streams, const wchar_t **argv) {
|
|
|
|
const wchar_t *cmd = argv[0];
|
2015-09-12 12:59:40 -07:00
|
|
|
int argc = builtin_count_args(argv);
|
2016-04-19 19:49:15 -07:00
|
|
|
if (argc <= 1) {
|
2017-06-19 21:05:34 -07:00
|
|
|
streams.err.append_format(BUILTIN_ERR_MISSING_SUBCMD, cmd);
|
2019-06-25 17:15:48 +02:00
|
|
|
builtin_print_error_trailer(parser, streams.err, L"string");
|
2017-05-02 00:39:50 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2019-03-12 14:06:01 -07:00
|
|
|
if (std::wcscmp(argv[1], L"-h") == 0 || std::wcscmp(argv[1], L"--help") == 0) {
|
2019-10-20 11:38:17 +02:00
|
|
|
builtin_print_help(parser, streams, L"string");
|
2017-05-04 00:18:02 -07:00
|
|
|
return STATUS_CMD_OK;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2020-09-27 21:33:23 +02:00
|
|
|
const wchar_t *subcmd_name = argv[1];
|
|
|
|
|
|
|
|
static auto begin = std::begin(string_subcommands);
|
|
|
|
static auto end = std::end(string_subcommands);
|
|
|
|
string_subcommand search{subcmd_name, 0};
|
|
|
|
auto binsearch = std::lower_bound(
|
|
|
|
begin, end, search, [&](const string_subcommand &cmd1, const string_subcommand &cmd2) {
|
|
|
|
return wcscmp(cmd1.name, cmd2.name) < 0;
|
|
|
|
});
|
|
|
|
const string_subcommand *subcmd = nullptr;
|
|
|
|
if (binsearch != end && wcscmp(subcmd_name, binsearch->name) == 0) subcmd = &*binsearch;
|
|
|
|
|
|
|
|
if (subcmd == nullptr) {
|
|
|
|
streams.err.append_format(BUILTIN_ERR_INVALID_SUBCMD, cmd, subcmd_name);
|
2019-06-25 17:15:48 +02:00
|
|
|
builtin_print_error_trailer(parser, streams.err, L"string");
|
2017-05-02 00:39:50 -07:00
|
|
|
return STATUS_INVALID_ARGS;
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|
|
|
|
|
2019-10-27 10:56:24 +01:00
|
|
|
if (argc >= 3 && (std::wcscmp(argv[2], L"-h") == 0 || std::wcscmp(argv[2], L"--help") == 0)) {
|
2020-09-27 21:33:23 +02:00
|
|
|
wcstring string_dash_subcommand = wcstring(argv[0]) + L"-" + subcmd_name;
|
2019-10-27 10:56:24 +01:00
|
|
|
builtin_print_help(parser, streams, string_dash_subcommand.c_str());
|
|
|
|
return STATUS_CMD_OK;
|
|
|
|
}
|
2015-09-12 12:59:40 -07:00
|
|
|
argc--;
|
|
|
|
argv++;
|
2015-09-21 11:24:49 -07:00
|
|
|
return subcmd->handler(parser, streams, argc, argv);
|
2015-09-12 12:59:40 -07:00
|
|
|
}
|