Add separated_buffer_t and adopt it in output_stream_t

separated_buffer_t encapsulates the logic around discarding (which
was previously duplicated between output_stream_t and io_buffer_t),
and will also encapsulate the logic around explicitly separated
output.
This commit is contained in:
ridiculousfish 2018-05-29 21:11:34 -07:00
parent 5b9331ade0
commit 90a4af5112
5 changed files with 154 additions and 64 deletions

View File

@ -938,7 +938,7 @@ void exec_job(parser_t &parser, job_t *j) {
const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER; const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER;
const bool no_stdout_output = stdout_stream.empty(); const bool no_stdout_output = stdout_stream.empty();
const bool no_stderr_output = stderr_stream.empty(); const bool no_stderr_output = stderr_stream.empty();
const bool stdout_discarded = stdout_stream.output_discarded(); const bool stdout_discarded = stdout_stream.buffer().discarded();
if (!stdout_discarded && no_stdout_output && no_stderr_output) { if (!stdout_discarded && no_stdout_output && no_stderr_output) {
// The builtin produced no output and is not inside of a pipeline. No // The builtin produced no output and is not inside of a pipeline. No
@ -950,6 +950,12 @@ void exec_job(parser_t &parser, job_t *j) {
// The builtin produced no stderr, and its stdout is going to an // The builtin produced no stderr, and its stdout is going to an
// internal buffer. There is no need to fork. This helps out the // internal buffer. There is no need to fork. This helps out the
// performance quite a bit in complex completion code. // performance quite a bit in complex completion code.
// TODO: we're sloppy about handling explicitly separated output.
// Theoretically we could have explicitly separated output on stdout and
// also stderr output; in that case we ought to thread the exp-sep output
// through to the io buffer. We're getting away with this because the only
// thing that can output exp-sep output is `string split0` which doesn't
// also produce stderr.
debug(3, L"Skipping fork: buffered output for internal builtin '%ls'", debug(3, L"Skipping fork: buffered output for internal builtin '%ls'",
p->argv0()); p->argv0());
@ -960,8 +966,8 @@ void exec_job(parser_t &parser, job_t *j) {
// We are writing to normal stdout and stderr. Just do it - no need to fork. // We are writing to normal stdout and stderr. Just do it - no need to fork.
debug(3, L"Skipping fork: ordinary output for internal builtin '%ls'", debug(3, L"Skipping fork: ordinary output for internal builtin '%ls'",
p->argv0()); p->argv0());
const std::string outbuff = wcs2string(stdout_stream.buffer()); const std::string outbuff = wcs2string(stdout_stream.contents());
const std::string errbuff = wcs2string(stderr_stream.buffer()); const std::string errbuff = wcs2string(stderr_stream.contents());
bool builtin_io_done = do_builtin_io(outbuff.data(), outbuff.size(), bool builtin_io_done = do_builtin_io(outbuff.data(), outbuff.size(),
errbuff.data(), errbuff.size()); errbuff.data(), errbuff.size());
if (!builtin_io_done && errno != EPIPE) { if (!builtin_io_done && errno != EPIPE) {
@ -990,11 +996,11 @@ void exec_job(parser_t &parser, job_t *j) {
// in the child. // in the child.
// //
// These strings may contain embedded nulls, so don't treat them as C strings. // These strings may contain embedded nulls, so don't treat them as C strings.
const std::string outbuff_str = wcs2string(stdout_stream.buffer()); const std::string outbuff_str = wcs2string(stdout_stream.contents());
const char *outbuff = outbuff_str.data(); const char *outbuff = outbuff_str.data();
size_t outbuff_len = outbuff_str.size(); size_t outbuff_len = outbuff_str.size();
const std::string errbuff_str = wcs2string(stderr_stream.buffer()); const std::string errbuff_str = wcs2string(stderr_stream.contents());
const char *errbuff = errbuff_str.data(); const char *errbuff = errbuff_str.data();
size_t errbuff_len = errbuff_str.size(); size_t errbuff_len = errbuff_str.size();

View File

@ -4107,10 +4107,10 @@ static void run_one_string_test(const wchar_t *const *argv, int expected_rc,
if (rc != expected_rc) { if (rc != expected_rc) {
err(L"Test failed on line %lu: [%ls]: expected return code %d but got %d", __LINE__, err(L"Test failed on line %lu: [%ls]: expected return code %d but got %d", __LINE__,
args.c_str(), expected_rc, rc); args.c_str(), expected_rc, rc);
} else if (streams.out.buffer() != expected_out) { } else if (streams.out.contents() != expected_out) {
err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]", __LINE__, args.c_str(), err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]", __LINE__, args.c_str(),
escape_string(expected_out, ESCAPE_ALL).c_str(), escape_string(expected_out, ESCAPE_ALL).c_str(),
escape_string(streams.out.buffer(), ESCAPE_ALL).c_str()); escape_string(streams.out.contents(), ESCAPE_ALL).c_str());
} }
} }

View File

@ -34,11 +34,11 @@ void io_buffer_t::print() const {
void io_buffer_t::append_from_stream(const output_stream_t &stream) { void io_buffer_t::append_from_stream(const output_stream_t &stream) {
if (output_discarded()) if (output_discarded())
return; return;
if (stream.output_discarded()) { if (stream.buffer().discarded()) {
set_discard(); set_discard();
return; return;
} }
const std::string str = wcs2string(stream.buffer()); const std::string str = wcs2string(stream.contents());
out_buffer_append(str.data(), str.size()); out_buffer_append(str.data(), str.size());
} }

192
src/io.h
View File

@ -22,6 +22,127 @@ using std::tr1::shared_ptr;
#include "common.h" #include "common.h"
#include "env.h" #include "env.h"
/// separated_buffer_t is composed of a sequence of elements, some of which may be explicitly
/// separated (e.g. through string spit0) and some of which the separation is inferred. This enum
/// tracks the type.
enum class separation_type_t {
/// This element's separation should be inferred, e.g. through IFS.
inferred,
/// This element was explicitly separated and should not be separated further.
explicitly
};
/// A separated_buffer_t contains a list of elements, some of which may be separated explicitly and
/// others which must be separated further by the user (e.g. via IFS).
template <typename StringType>
class separated_buffer_t {
struct element_t {
StringType contents;
separation_type_t separation;
element_t(StringType contents, separation_type_t sep)
: contents(std::move(contents)), separation(sep) {}
bool is_explicitly_separated() const { return separation == separation_type_t::explicitly; }
};
/// Limit on how much data we'll buffer. Zero means no limit.
size_t buffer_limit_;
/// Current size of all contents.
size_t contents_size_{0};
/// List of buffer elements.
std::vector<element_t> elements_;
/// True if we're discarding input because our buffer_limit has been exceeded.
bool discard = false;
/// Mark that we are about to add the given size \p delta to the buffer. \return true if we
/// succeed, false if we exceed buffer_limit.
bool try_add_size(size_t delta) {
if (discard) return false;
contents_size_ += delta;
if (contents_size_ < delta) {
// Overflow!
set_discard();
return false;
}
if (buffer_limit_ > 0 && contents_size_ > buffer_limit_) {
set_discard();
return false;
}
return true;
}
/// separated_buffer_t may not be copied.
separated_buffer_t(const separated_buffer_t &) = delete;
void operator=(const separated_buffer_t &) = delete;
public:
/// Construct a separated_buffer_t with the given buffer limit \p limit, or 0 for no limit.
separated_buffer_t(size_t limit) : buffer_limit_(limit) {}
/// \return the buffer limit size, or 0 for no limit.
size_t limit() const { return buffer_limit_; }
/// \return the contents size.
size_t size() const { return contents_size_; }
/// \return whether the output has been discarded.
bool discarded() const { return discard; }
/// Mark the contents as discarded.
void set_discard() {
elements_.clear();
contents_size_ = 0;
discard = true;
}
/// Serialize the contents to a single string, where explicitly separated elements have a
/// newline appended.
StringType newline_serialized() const {
StringType result;
result.reserve(size());
for (const auto &elem : elements_) {
result.append(elem.contents);
if (elem.is_explicitly_separated()) {
result.push_back('\n');
}
}
return result;
}
/// \return the list of elements.
const std::vector<element_t> &elements() const { return elements_; }
/// Append an element with range [begin, end) and the given separation type \p sep.
template <typename Iterator>
void append(Iterator begin, Iterator end, separation_type_t sep = separation_type_t::inferred) {
if (!try_add_size(std::distance(begin, end))) return;
// Try merging with the last element.
if (sep == separation_type_t::inferred && !elements_.empty() && !elements_.back().is_explicitly_separated()) {
elements_.back().contents.append(begin, end);
} else {
elements_.emplace_back(StringType(begin, end), sep);
}
}
/// Append a string \p str with the given separation type \p sep.
void append(const StringType &str, separation_type_t sep = separation_type_t::inferred) {
append(str.begin(), str.end(), sep);
}
// Given that this is a narrow stream, convert a wide stream \p rhs to narrow and then append
// it.
template <typename RHSStringType>
void append_wide_buffer(const separated_buffer_t<RHSStringType> &rhs) {
for (const auto &rhs_elem : rhs.elements()) {
append(wcs2string(rhs_elem.contents), rhs_elem.separation);
}
}
};
/// Describes what type of IO operation an io_data_t represents. /// Describes what type of IO operation an io_data_t represents.
enum io_mode_t { IO_FILE, IO_PIPE, IO_FD, IO_BUFFER, IO_CLOSE }; enum io_mode_t { IO_FILE, IO_PIPE, IO_FD, IO_BUFFER, IO_CLOSE };
@ -194,77 +315,40 @@ bool pipe_avoid_conflicts_with_io_chain(int fds[2], const io_chain_t &ios);
/// Class representing the output that a builtin can generate. /// Class representing the output that a builtin can generate.
class output_stream_t { class output_stream_t {
private: private:
/// Limit on how much data we'll buffer. Zero means no limit. /// Storage for our data.
size_t buffer_limit; separated_buffer_t<wcstring> buffer_;
/// True if we're discarding input.
bool discard;
// No copying. // No copying.
output_stream_t(const output_stream_t &s); output_stream_t(const output_stream_t &s) = delete;
void operator=(const output_stream_t &s); void operator=(const output_stream_t &s) = delete;
wcstring buffer_;
void check_for_overflow() {
if (buffer_limit && buffer_.size() > buffer_limit) {
discard = true;
buffer_.clear();
}
}
public: public:
output_stream_t(size_t buffer_limit_) : buffer_limit(buffer_limit_), discard(false) {} output_stream_t(size_t buffer_limit) : buffer_(buffer_limit) {}
void append(const wcstring &s) { void append(const wcstring &s) { buffer_.append(s.begin(), s.end()); }
if (discard) return;
buffer_.append(s);
check_for_overflow();
}
void append(const wchar_t *s) { const separated_buffer_t<wcstring> &buffer() const { return buffer_; }
if (discard) return;
buffer_.append(s);
check_for_overflow();
}
void append(wchar_t s) { void append(const wchar_t *s) { append(s, wcslen(s)); }
if (discard) return;
buffer_.push_back(s);
check_for_overflow();
}
void append(const wchar_t *s, size_t amt) { void append(wchar_t s) { append(&s, 1); }
if (discard) return;
buffer_.append(s, amt);
check_for_overflow();
}
void push_back(wchar_t c) { void append(const wchar_t *s, size_t amt) { buffer_.append(s, s + amt); }
if (discard) return;
buffer_.push_back(c); void push_back(wchar_t c) { append(c); }
check_for_overflow();
}
void append_format(const wchar_t *format, ...) { void append_format(const wchar_t *format, ...) {
if (discard) return;
va_list va; va_list va;
va_start(va, format); va_start(va, format);
::append_formatv(buffer_, format, va); append_formatv(format, va);
va_end(va); va_end(va);
check_for_overflow();
} }
void append_formatv(const wchar_t *format, va_list va_orig) { void append_formatv(const wchar_t *format, va_list va) { append(vformat_string(format, va)); }
if (discard) return;
::append_formatv(buffer_, format, va_orig);
check_for_overflow();
}
const wcstring &buffer() const { return buffer_; } bool empty() const { return buffer_.size() == 0; }
/// Function that returns true if we discarded the input because there was too much data. wcstring contents() const { return buffer_.newline_serialized(); }
bool output_discarded() const { return discard; }
bool empty() const { return buffer_.empty(); }
}; };
struct io_streams_t { struct io_streams_t {

View File

@ -328,7 +328,7 @@ parse_execution_result_t parse_execution_context_t::run_function_statement(
proc_set_last_status(err); proc_set_last_status(err);
if (!streams.err.empty()) { if (!streams.err.empty()) {
this->report_error(header, L"%ls", streams.err.buffer().c_str()); this->report_error(header, L"%ls", streams.err.contents().c_str());
result = parse_execution_errored; result = parse_execution_errored;
} }