Report errors on invalid replacements in string replace

If the replacement in `string replace` is invalid, prior to this fix we would
enter into an infinite loop trying to parse it. Instead report errors correctly.

Fixes 
This commit is contained in:
ridiculousfish 2018-09-27 22:28:39 -04:00
parent ae54b66799
commit a1728d61af
4 changed files with 64 additions and 65 deletions

@ -133,7 +133,7 @@ class arg_iterator_t {
}
}
};
}
} // namespace
// This is used by the string subcommands to communicate with the option parser which flags are
// valid and get the result of parsing the command for flags.
@ -411,32 +411,23 @@ static wcstring construct_short_opts(options_t *opts) { //!OCLINT(high npath co
// Note that several long flags share the same short flag. That is okay. The caller is expected
// to indicate that a max of one of the long flags sharing a short flag is valid.
static const struct woption long_options[] = {{L"all", no_argument, NULL, 'a'},
{L"chars", required_argument, NULL, 'c'},
{L"count", required_argument, NULL, 'n'},
{L"entire", no_argument, NULL, 'e'},
{L"filter", no_argument, NULL, 'f'},
{L"ignore-case", no_argument, NULL, 'i'},
{L"index", no_argument, NULL, 'n'},
{L"invert", no_argument, NULL, 'v'},
{L"left", no_argument, NULL, 'l'},
{L"length", required_argument, NULL, 'l'},
{L"max", required_argument, NULL, 'm'},
{L"no-empty", no_argument, NULL, 'n'},
{L"no-newline", no_argument, NULL, 'N'},
{L"no-quoted", no_argument, NULL, 'n'},
{L"quiet", no_argument, NULL, 'q'},
{L"regex", no_argument, NULL, 'r'},
{L"right", no_argument, NULL, 'r'},
{L"start", required_argument, NULL, 's'},
{L"style", required_argument, NULL, 1},
{NULL, 0, NULL, 0}};
static const struct woption long_options[] = {
{L"all", no_argument, NULL, 'a'}, {L"chars", required_argument, NULL, 'c'},
{L"count", required_argument, NULL, 'n'}, {L"entire", no_argument, NULL, 'e'},
{L"filter", no_argument, NULL, 'f'}, {L"ignore-case", no_argument, NULL, 'i'},
{L"index", no_argument, NULL, 'n'}, {L"invert", no_argument, NULL, 'v'},
{L"left", no_argument, NULL, 'l'}, {L"length", required_argument, NULL, 'l'},
{L"max", required_argument, NULL, 'm'}, {L"no-empty", no_argument, NULL, 'n'},
{L"no-newline", no_argument, NULL, 'N'}, {L"no-quoted", no_argument, NULL, 'n'},
{L"quiet", no_argument, NULL, 'q'}, {L"regex", no_argument, NULL, 'r'},
{L"right", no_argument, NULL, 'r'}, {L"start", required_argument, NULL, 's'},
{L"style", required_argument, NULL, 1}, {NULL, 0, NULL, 0}};
static std::unordered_map<char, decltype(*handle_flag_N)> flag_to_function = {
{'N', handle_flag_N}, {'a', handle_flag_a}, {'c', handle_flag_c}, {'e', handle_flag_e},
{'f', handle_flag_f}, {'i', handle_flag_i}, {'l', handle_flag_l}, {'m', handle_flag_m},
{'n', handle_flag_n}, {'q', handle_flag_q}, {'r', handle_flag_r}, {'s', handle_flag_s},
{'v', handle_flag_v}, {1, handle_flag_1} };
{'v', handle_flag_v}, {1, handle_flag_1}};
/// Parse the arguments for flags recognized by a specific string subcommand.
static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, wchar_t **argv,
@ -689,8 +680,9 @@ struct compiled_regex_t {
int err_code = 0;
PCRE2_SIZE err_offset = 0;
code = pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0);
code =
pcre2_compile(PCRE2_SPTR(pattern.c_str()), pattern.length(),
options | (ignore_case ? PCRE2_CASELESS : 0), &err_code, &err_offset, 0);
if (code == 0) {
string_error(streams, _(L"%ls: Regular expression compile error: %ls\n"), argv0,
pcre2_strerror(err_code).c_str());
@ -789,7 +781,7 @@ class pcre2_matcher_t : public string_matcher_t {
// See pcre2demo.c for an explanation of this logic.
PCRE2_SIZE arglen = arg.length();
int rc = report_match(
arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0));
arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, 0, 0, regex.match, 0));
if (rc < 0) { // pcre2 match error.
return false;
} else if (rc == 0) { // no match
@ -815,8 +807,8 @@ class pcre2_matcher_t : public string_matcher_t {
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset, options,
regex.match, 0));
rc = report_match(arg, pcre2_match(regex.code, PCRE2_SPTR(arg.c_str()), arglen, offset,
options, regex.match, 0));
if (rc < 0) {
return false;
}
@ -955,7 +947,8 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
const wchar_t *cur = arg.c_str();
const wchar_t *end = cur + arg.size();
while (cur < end) {
if ((opts.all || !replacement_occurred) && cmp_func(cur, pattern.c_str(), patlen) == 0) {
if ((opts.all || !replacement_occurred) &&
cmp_func(cur, pattern.c_str(), patlen) == 0) {
result += replacement;
cur += patlen;
replacement_occurred = true;
@ -978,7 +971,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
/// A return value of true means all is well (even if no replacements were performed), false
/// indicates an unrecoverable error.
bool regex_replacer_t::replace_matches(const wcstring &arg) {
if (!regex.code) return false; // pcre2_compile() failed
if (!regex.code) return false; // pcre2_compile() failed
if (!replacement) return false; // replacement was an invalid string
uint32_t options = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED |
@ -1076,9 +1069,11 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc
arg_iterator_t aiter(argv, optind, streams, is_split0);
while (const wcstring *arg = aiter.nextstr()) {
if (opts.right) {
split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max, opts.no_empty);
split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max,
opts.no_empty);
} else {
split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max, opts.no_empty);
split_about(arg->begin(), arg->end(), sep.begin(), sep.end(), &splits, opts.max,
opts.no_empty);
}
arg_count++;
}
@ -1247,7 +1242,8 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, wchar_
}
// A helper function for lower and upper.
static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv, std::wint_t (*func)(std::wint_t)) {
static int string_transform(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv,
std::wint_t (*func)(std::wint_t)) {
options_t opts;
opts.quiet_valid = true;
int optind;

@ -180,8 +180,7 @@ show_stackframe(const wchar_t msg_level, int frame_count, int skip_levels) {
#else // HAVE_BACKTRACE_SYMBOLS
void __attribute__((noinline))
show_stackframe(const wchar_t msg_level, int, int) {
void __attribute__((noinline)) show_stackframe(const wchar_t msg_level, int, int) {
debug_shared(msg_level, L"Sorry, but your system does not support backtraces");
}
#endif // HAVE_BACKTRACE_SYMBOLS
@ -331,8 +330,8 @@ char *wcs2str(const wchar_t *in) {
// Here we probably allocate a buffer probably much larger than necessary.
char *out = (char *)malloc(MAX_UTF8_BYTES * wcslen(in) + 1);
assert(out);
//Instead of returning the return value of wcs2str_internal, return `out` directly.
//This eliminates false warnings in coverity about resource leaks.
// Instead of returning the return value of wcs2str_internal, return `out` directly.
// This eliminates false warnings in coverity about resource leaks.
wcs2str_internal(in, out);
return out;
}
@ -521,18 +520,16 @@ void fish_setlocale() {
if (can_be_encoded(L'\u2026')) {
ellipsis_char = L'\u2026';
ellipsis_str = L"\u2026";
}
else {
ellipsis_char = L'$'; // "horizontal ellipsis"
} else {
ellipsis_char = L'$'; // "horizontal ellipsis"
ellipsis_str = L"...";
}
if (is_windows_subsystem_for_linux()) {
//neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
//they can be *encoded* just fine. Use alternative glyphs.
// neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
// they can be *encoded* just fine. Use alternative glyphs.
omitted_newline_char = can_be_encoded(L'\u00b6') ? L'\u00b6' : L'~'; // "pilcrow"
obfuscation_read_char = can_be_encoded(L'\u2022') ? L'\u2022' : L'*'; // "bullet"
}
else {
} else {
omitted_newline_char = can_be_encoded(L'\u23CE') ? L'\u23CE' : L'~'; // "return"
obfuscation_read_char = can_be_encoded(L'\u25CF') ? L'\u25CF' : L'#'; // "black circle"
}
@ -590,7 +587,6 @@ bool should_suppress_stderr_for_tests() {
return program_name && !wcscmp(program_name, TESTS_PROGRAM_NAME);
}
static void debug_shared(const wchar_t level, const wcstring &msg) {
pid_t current_pid = getpid();
@ -805,7 +801,7 @@ wcstring reformat_for_screen(const wcstring &msg) {
/// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
static void escape_string_url(const wcstring &in, wcstring &out) {
for (auto& c1 : in) {
for (auto &c1 : in) {
// This silliness is so we get the correct result whether chars are signed or unsigned.
unsigned int c2 = (unsigned int)c1 & 0xFF;
if (!(c2 & 0x80) &&
@ -1027,7 +1023,8 @@ static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring
case L';':
case L'"':
case L'~': {
bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) || (c == L'?' && no_qmark);
bool char_is_normal = (c == L'~' && no_tilde) || (c == L'^' && no_caret) ||
(c == L'?' && no_qmark);
if (!char_is_normal) {
need_escape = 1;
if (escape_all) out += L'\\';
@ -1346,7 +1343,12 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
int brace_count = 0;
bool errored = false;
enum { mode_unquoted, mode_single_quotes, mode_double_quotes, mode_braces } mode = mode_unquoted;
enum {
mode_unquoted,
mode_single_quotes,
mode_double_quotes,
mode_braces
} mode = mode_unquoted;
for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
const wchar_t c = input[input_position];
@ -1418,7 +1420,8 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in
// We can't parse them properly, but it shouldn't hurt,
// so we don't assert here.
// See #4954.
// assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we shouldn't be able to get here");
// assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
// shouldn't be able to get here");
brace_count--;
brace_text_start = brace_text_start && brace_count > 0;
to_append_or_none = BRACE_END;
@ -1748,10 +1751,11 @@ bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &valu
value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0;
}
bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value) {
bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
const wcstring &value) {
size_t suffix_size = proposed_suffix.size();
return suffix_size <= value.size() &&
wcsncasecmp(value.c_str() + (value.size() - suffix_size), proposed_suffix.c_str(), suffix_size) == 0;
return suffix_size <= value.size() && wcsncasecmp(value.c_str() + (value.size() - suffix_size),
proposed_suffix.c_str(), suffix_size) == 0;
}
/// Returns true if seq, represented as a subsequence, is contained within string.
@ -2061,9 +2065,7 @@ void setup_fork_guards() {
initial_pid = getpid();
}
void save_term_foreground_process_group() {
initial_fg_process_group = tcgetpgrp(STDIN_FILENO);
}
void save_term_foreground_process_group() { initial_fg_process_group = tcgetpgrp(STDIN_FILENO); }
void restore_term_foreground_process_group() {
if (initial_fg_process_group == -1) return;

@ -22,9 +22,9 @@
#include <mutex>
#include <sstream>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <tuple>
#include <vector>
#include "fallback.h" // IWYU pragma: keep
@ -187,7 +187,8 @@ extern struct termios shell_modes;
/// The character to use where the text has been truncated. Is an ellipsis on unicode system and a $
/// on other systems.
extern wchar_t ellipsis_char;
/// The character or string to use where text has been truncated (ellipsis if possible, otherwise ...)
/// The character or string to use where text has been truncated (ellipsis if possible, otherwise
/// ...)
extern const wchar_t *ellipsis_str;
/// Character representing an omitted newline at the end of text.
@ -328,7 +329,8 @@ bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value
/// Test if a string is a suffix of another.
bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value);
bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value);
bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, const wcstring &value);
bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
const wcstring &value);
/// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.
bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix,
@ -534,7 +536,7 @@ class null_terminated_array_t {
void operator=(null_terminated_array_t rhs);
null_terminated_array_t(const null_terminated_array_t &);
typedef std::vector<std::basic_string<CharType_t> > string_list_t;
typedef std::vector<std::basic_string<CharType_t>> string_list_t;
size_t size() const {
size_t len = 0;
@ -882,7 +884,6 @@ struct enum_map {
const wchar_t *const str;
};
/// Given a string return the matching enum. Return the sentinal enum if no match is made. The map
/// must be sorted by the `str` member. A binary search is twice as fast as a linear search with 16
/// elements in the map.
@ -916,13 +917,13 @@ static const wchar_t *enum_to_str(T enum_val, const enum_map<T> map[]) {
return NULL;
};
template<typename... Args>
template <typename... Args>
using tuple_list = std::vector<std::tuple<Args...>>;
//Given a container mapping one X to many Y, return a list of {X,Y}
template<typename X, typename Y>
// Given a container mapping one X to many Y, return a list of {X,Y}
template <typename X, typename Y>
inline tuple_list<X, Y> flatten(const std::unordered_map<X, std::vector<Y>> &list) {
tuple_list<X, Y> results(list.size() * 1.5); //just a guess as to the initial size
tuple_list<X, Y> results(list.size() * 1.5); // just a guess as to the initial size
for (auto &kv : list) {
for (auto &v : kv.second) {
results.emplace_back(std::make_tuple(kv.first, v));
@ -1004,7 +1005,7 @@ struct hash<const wcstring> {
return hasher((wcstring)w);
}
};
}
} // namespace std
#endif
#endif