builtin string: don't print final newline if it's missing from stdin

A command like "printf nonewline | sed s/x/y/" does not print a
concluding newline, whereas "printf nnl | string replace x y" does.
This is an edge case -- usually the user input does have a newline at
the end -- but it seems still better for this command to just forward
the user's data.

Teach most string subcommands to check if stdin is missing the trailing
newline, and stop adding one in that case.
This does not apply when input is read from commandline arguments.

* Most subcommands stop adding the final newline, because they don't
  really care about newlines, so besides their normal processing,
  they just want to preserve user input. They are:
  * string collect
  * string escape/unescape
  * string join¹
  * string lower/upper
  * string pad
  * string replace
  * string repeat
  * string sub
  * string trim

* string match keeps adding the newline, following "grep". Additionally,
  for string match --regex, it's important to output capture groups
  separated by newlines, resulting in multiple output lines for an
  input line. So it is not obvious where to leave out the newline.

* string split/split0 keep adding the newline for the same reason --
  they are meant to output multiple elements for a single input line.

¹) string join0 is not changed because it already printed a trailing
   zero byte instead of the trailing newline. This is consistent
   with other tools like "find -print0".

Closes #3847
This commit is contained in:
Johannes Altmanninger 2021-11-22 21:08:56 +01:00
parent 4a3e55f69c
commit 745129e825
6 changed files with 85 additions and 34 deletions

View File

@ -77,6 +77,7 @@ Scripting improvements
- ``cd ""`` no longer crashes fish (:issue:`8147`).
- ``set --query`` can now query whether a variable is a pathvar via ``--path`` or ``--unpath`` (:issue:`8494`).
- Tilde characters (``~``) produced by custom completions are no longer escaped when applied to the command line, making it easier to use the output of a recursive ``complete -C`` in completion scripts (:issue:`4570`).
- Most ``string`` subcommands no longer print a final newline if such is missing from stdin (:issue:`3847`).
Interactive improvements
------------------------

View File

@ -79,6 +79,11 @@ class arg_iterator_t {
// Backing storage for the next() string.
wcstring storage_;
const io_streams_t &streams_;
// If set, we have consumed all of stdin and its last line is missing a newline character.
// This is an edge case -- we expect text input, which is conventionally terminated by a
// newline character. But if it isn't, we use this to avoid creating one out of thin air,
// to not corrupt input data.
bool missing_trailing_newline = false;
/// Reads the next argument from stdin, returning true if an argument was produced and false if
/// not. On true, the string is stored in storage_.
@ -94,6 +99,7 @@ class arg_iterator_t {
// If we still have buffer contents, flush them,
// in case there was no trailing sep.
if (buffer_.empty()) return false;
missing_trailing_newline = true;
storage_ = str2wcstring(buffer_);
buffer_.clear();
return true;
@ -131,6 +137,11 @@ class arg_iterator_t {
return nullptr;
}
}
/// Returns true if we should add a newline after printing output for the current item.
/// This is only ever false in an edge case, namely after we have consumed stdin and the
/// last line is missing a trailing newline.
bool want_newline() const { return !missing_trailing_newline; }
};
// This is used by the string subcommands to communicate with the option parser which flags are
@ -688,7 +699,9 @@ static int string_escape(parser_t &parser, io_streams_t &streams, int argc, cons
arg_iterator_t aiter(argv, optind, streams);
while (const wcstring *arg = aiter.nextstr()) {
streams.out.append(escape_string(*arg, flags, opts.escape_style));
streams.out.append(L'\n');
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
nesc++;
}
@ -713,7 +726,9 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc,
wcstring result;
if (unescape_string(*arg, &result, flags, opts.escape_style)) {
streams.out.append(result);
streams.out.append(L'\n');
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
nesc++;
}
}
@ -745,7 +760,11 @@ static int string_join_maybe0(parser_t &parser, io_streams_t &streams, int argc,
nargs++;
}
if (nargs > 0 && !opts.quiet) {
streams.out.push_back(is_join0 ? L'\0' : L'\n');
if (is_join0) {
streams.out.push_back(L'\0');
} else if (aiter.want_newline()) {
streams.out.push_back(L'\n');
}
}
return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
@ -1323,7 +1342,9 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, const w
padded.append(pad, opts.char_to_pad);
}
}
padded.push_back(L'\n');
if (aiter_width.want_newline()) {
padded.push_back(L'\n');
}
streams.out.append(padded);
}
@ -1343,7 +1364,7 @@ class string_replacer_t {
virtual ~string_replacer_t() = default;
int replace_count() const { return total_replaced; }
virtual bool replace_matches(const wcstring &arg) = 0;
virtual bool replace_matches(const wcstring &arg, bool want_newline) = 0;
};
class literal_replacer_t : public string_replacer_t {
@ -1360,7 +1381,7 @@ class literal_replacer_t : public string_replacer_t {
patlen(pattern.length()) {}
~literal_replacer_t() override = default;
bool replace_matches(const wcstring &arg) override;
bool replace_matches(const wcstring &arg, bool want_newline) override;
};
static maybe_t<wcstring> interpret_escapes(const wcstring &arg) {
@ -1400,12 +1421,12 @@ class regex_replacer_t : public string_replacer_t {
}
}
bool replace_matches(const wcstring &arg) override;
bool replace_matches(const wcstring &arg, bool want_newline) override;
};
/// A return value of true means all is well (even if no replacements were performed), false
/// indicates an unrecoverable error.
bool literal_replacer_t::replace_matches(const wcstring &arg) {
bool literal_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
wcstring result;
bool replacement_occurred = false;
@ -1432,7 +1453,9 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
streams.out.append(result);
streams.out.append(L'\n');
if (want_newline) {
streams.out.append(L'\n');
}
}
return true;
@ -1440,7 +1463,7 @@ bool literal_replacer_t::replace_matches(const wcstring &arg) {
/// A return value of true means all is well (even if no replacements were performed), false
/// indicates an unrecoverable error.
bool regex_replacer_t::replace_matches(const wcstring &arg) {
bool regex_replacer_t::replace_matches(const wcstring &arg, bool want_newline) {
if (!regex.code) return false; // pcre2_compile() failed
if (!replacement) return false; // replacement was an invalid string
@ -1488,7 +1511,9 @@ bool regex_replacer_t::replace_matches(const wcstring &arg) {
bool replacement_occurred = pcre2_rc > 0;
if (!opts.quiet && (!opts.filter || replacement_occurred)) {
streams.out.append(outstr);
streams.out.append(L'\n');
if (want_newline) {
streams.out.append(L'\n');
}
}
total_replaced += pcre2_rc;
}
@ -1520,7 +1545,7 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, con
arg_iterator_t aiter(argv, optind, streams);
while (const wcstring *arg = aiter.nextstr()) {
if (!replacer->replace_matches(*arg)) return STATUS_INVALID_ARGS;
if (!replacer->replace_matches(*arg, aiter.want_newline())) return STATUS_INVALID_ARGS;
if (opts.quiet && replacer->replace_count() > 0) return STATUS_CMD_OK;
}
@ -1601,12 +1626,12 @@ static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc
for (const auto &field : opts.fields) {
if (field - 1 < (long)splits.size()) {
streams.out.append_with_separation(splits.at(field - 1),
separation_type_t::explicitly);
separation_type_t::explicitly, true);
}
}
} else {
for (const wcstring &split : splits) {
streams.out.append_with_separation(split, separation_type_t::explicitly);
streams.out.append_with_separation(split, separation_type_t::explicitly, true);
}
}
}
@ -1641,7 +1666,8 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con
len -= 1;
}
}
streams.out.append_with_separation(s, len, separation_type_t::explicitly);
streams.out.append_with_separation(s, len, separation_type_t::explicitly,
aiter.want_newline());
appended += len;
}
@ -1650,7 +1676,9 @@ static int string_collect(parser_t &parser, io_streams_t &streams, int argc, con
// echo (true | string collect --allow-empty)"bar"
// prints "bar".
if (opts.allow_empty && appended == 0) {
streams.out.append_with_separation(L"", 0, separation_type_t::explicitly);
streams.out.append_with_separation(
L"", 0, separation_type_t::explicitly,
true /* historical behavior is to always print a newline */);
}
return appended > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
@ -1718,7 +1746,7 @@ static int string_repeat(parser_t &parser, io_streams_t &streams, int argc, cons
}
// Historical behavior is to never append a newline if all strings were empty.
if (!opts.quiet && !opts.no_newline && !all_empty) {
if (!opts.quiet && !opts.no_newline && !all_empty && aiter.want_newline()) {
streams.out.append(L'\n');
}
@ -1780,7 +1808,9 @@ static int string_sub(parser_t &parser, io_streams_t &streams, int argc, const w
// Note that std::string permits count to extend past end of string.
if (!opts.quiet) {
streams.out.append(s->substr(pos, count));
streams.out.append(L'\n');
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
}
nsub++;
if (opts.quiet) return STATUS_CMD_OK;
@ -1823,7 +1853,9 @@ static int string_trim(parser_t &parser, io_streams_t &streams, int argc, const
ntrim += arg->size() - (end - begin);
if (!opts.quiet) {
streams.out.append(wcstring(*arg, begin, end - begin));
streams.out.append(L'\n');
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
} else if (ntrim > 0) {
return STATUS_CMD_OK;
}
@ -1849,7 +1881,9 @@ static int string_transform(parser_t &parser, io_streams_t &streams, int argc, c
if (transformed != *arg) n_transformed++;
if (!opts.quiet) {
streams.out.append(transformed);
streams.out.append(L'\n');
if (aiter.want_newline()) {
streams.out.append(L'\n');
}
} else if (n_transformed > 0) {
return STATUS_CMD_OK;
}

View File

@ -309,13 +309,14 @@ shared_ptr<const io_data_t> io_chain_t::io_for_fd(int fd) const {
void output_stream_t::append_narrow_buffer(separated_buffer_t buffer) {
for (const auto &rhs_elem : buffer.elements()) {
append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation);
append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation, false);
}
}
void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type) {
void output_stream_t::append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline) {
append(s, len);
if (type == separation_type_t::explicitly) {
if (type == separation_type_t::explicitly && want_newline) {
append(L'\n');
}
}
@ -352,7 +353,8 @@ void buffered_output_stream_t::append(const wchar_t *s, size_t amt) {
}
void buffered_output_stream_t::append_with_separation(const wchar_t *s, size_t len,
separation_type_t type) {
separation_type_t type, bool want_newline) {
UNUSED(want_newline);
buffer_->append(wcs2string(s, len), type);
}

View File

@ -365,11 +365,14 @@ class output_stream_t : noncopyable_t, nonmovable_t {
virtual int flush_and_check_error();
/// An optional override point. This is for explicit separation.
virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type);
/// \param want_newline this is true if the output item should be ended with a newline. This
/// is only relevant if we are printing the output to a stream,
virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline);
/// The following are all convenience overrides.
void append_with_separation(const wcstring &s, separation_type_t type) {
append_with_separation(s.data(), s.size(), type);
void append_with_separation(const wcstring &s, separation_type_t type, bool want_newline) {
append_with_separation(s.data(), s.size(), type, want_newline);
}
/// Append a string.
@ -443,7 +446,8 @@ class buffered_output_stream_t final : public output_stream_t {
}
void append(const wchar_t *s, size_t amt) override;
void append_with_separation(const wchar_t *s, size_t len, separation_type_t type) override;
void append_with_separation(const wchar_t *s, size_t len, separation_type_t type,
bool want_newline) override;
int flush_and_check_error() override;
private:

View File

@ -40,6 +40,7 @@ echo \'{ hello , world }\'
for phrase in {good\,, beautiful ,morning}
echo -n "$phrase "
end | string trim
echo
for phrase in {goodbye\,,\ cruel\ ,world\n}
echo -n $phrase
end

View File

@ -601,12 +601,12 @@ and echo uppercasing a uppercase string did not fail as expected
# 'Check NUL'
# Note: We do `string escape` at the end to make a `\0` literal visible.
printf 'a\0b' | string escape
printf 'a\0c' | string match -e a | string escape
printf 'a\0d' | string split '' | string escape
printf 'a\0b' | string match -r '.*b$' | string escape
printf 'a\0b' | string replace b g | string escape
printf 'a\0b' | string replace -r b g | string escape
printf 'a\0b\n' | string escape
printf 'a\0c\n' | string match -e a | string escape
printf 'a\0d\n' | string split '' | string escape
printf 'a\0b\n' | string match -r '.*b$' | string escape
printf 'a\0b\n' | string replace b g | string escape
printf 'a\0b\n' | string replace -r b g | string escape
# TODO: These do not yet work!
# printf 'a\0b' | string match '*b' | string escape
# CHECK: a\x00b
@ -797,3 +797,12 @@ echo "foo1x foo2x foo3x" | string match -arg 'foo(\d)x'
# CHECK: 1
# CHECK: 2
# CHECK: 3
# Most subcommands preserve missing newline (#3847).
echo -n abc | string upper
echo '<eol>'
# CHECK: ABC<eol>
printf \<
printf my-password | string replace -ra . \*
printf \>\n
# CHECK: <***********>