From 369890cdd936f81c51347f6f0e94a60a5b1bfe30 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 27 May 2018 17:14:12 -0700
Subject: [PATCH 1/8] Clean up io_buffer_t interface

---
 src/io.h | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/io.h b/src/io.h
index b385c451d..3d54679c0 100644
--- a/src/io.h
+++ b/src/io.h
@@ -101,7 +101,7 @@ class io_chain_t;
 class io_buffer_t : public io_pipe_t {
    private:
     /// True if we're discarding input.
-    bool discard;
+    bool discard{false};
     /// Limit on how much data we'll buffer. Zero means no limit.
     size_t buffer_limit;
     /// Buffer to save output in.
@@ -109,7 +109,6 @@ class io_buffer_t : public io_pipe_t {
 
     explicit io_buffer_t(int f, size_t limit)
         : io_pipe_t(IO_BUFFER, f, false /* not input */),
-          discard(false),
           buffer_limit(limit),
           out_buffer() {}
 
@@ -130,25 +129,23 @@ class io_buffer_t : public io_pipe_t {
     }
 
     /// Function to get a pointer to the buffer.
-    char *out_buffer_ptr(void) { return out_buffer.empty() ? NULL : &out_buffer.at(0); }
-
-    const char *out_buffer_ptr(void) const { return out_buffer.empty() ? NULL : &out_buffer.at(0); }
+    const char *out_buffer_ptr() const { return out_buffer.empty() ? NULL : &out_buffer.at(0); }
 
     /// Function to get the size of the buffer.
-    size_t out_buffer_size(void) const { return out_buffer.size(); }
+    size_t out_buffer_size() const { return out_buffer.size(); }
 
     /// Function that returns true if we discarded the input because there was too much data.
-    bool output_discarded(void) { return discard; }
+    bool output_discarded() { return discard; }
 
     /// Function to explicitly put the object in discard mode. Meant to be used when moving
     /// the results from an output_stream_t to an io_buffer_t.
-    void set_discard(void) {
+    void set_discard() {
         discard = true;
         out_buffer.clear();
     }
 
     /// This is used to transfer the buffer limit for this object to a output_stream_t object.
-    size_t get_buffer_limit(void) { return buffer_limit; }
+    size_t get_buffer_limit() { return buffer_limit; }
 
     /// Ensures that the pipes do not conflict with any fd redirections in the chain.
     bool avoid_conflicts_with_io_chain(const io_chain_t &ios);
@@ -213,10 +210,6 @@ class output_stream_t {
    public:
     output_stream_t(size_t buffer_limit_) : buffer_limit(buffer_limit_), discard(false) {}
 
-#if 0
-    void set_buffer_limit(size_t buffer_limit_) { buffer_limit = buffer_limit_; }
-#endif
-
     void append(const wcstring &s) {
         if (discard) return;
         buffer_.append(s);
@@ -265,7 +258,7 @@ class output_stream_t {
     const wcstring &buffer() const { return buffer_; }
 
     /// Function that returns true if we discarded the input because there was too much data.
-    bool output_discarded(void) { return discard; }
+    bool output_discarded() { return discard; }
 
     bool empty() const { return buffer_.empty(); }
 };

From 5b9331ade0b6c2944ec92a71416e2cf63d27fb11 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 27 May 2018 23:56:20 -0700
Subject: [PATCH 2/8] Teach io_buffer_t to append from output_stream_t directly

This will simplify logic when we teach output_stream_t about explicitly
split outputs, i.e. for 'string split0'
---
 src/exec.cpp | 25 ++++++++++---------------
 src/io.cpp   | 11 +++++++++++
 src/io.h     | 10 +++++++---
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/src/exec.cpp b/src/exec.cpp
index a3e9ce456..a12c33c9e 100644
--- a/src/exec.cpp
+++ b/src/exec.cpp
@@ -925,8 +925,8 @@ void exec_job(parser_t &parser, job_t *j) {
                     process_net_io_chain.get_io_for_fd(STDERR_FILENO);
 
                 assert(builtin_io_streams.get() != NULL);
-                const wcstring &stdout_buffer = builtin_io_streams->out.buffer();
-                const wcstring &stderr_buffer = builtin_io_streams->err.buffer();
+                const output_stream_t &stdout_stream = builtin_io_streams->out;
+                const output_stream_t &stderr_stream = builtin_io_streams->err;
 
                 // If we are outputting to a file, we have to actually do it, even if we have no
                 // output, so that we can truncate the file. Does not apply to /dev/null.
@@ -936,9 +936,9 @@ void exec_job(parser_t &parser, job_t *j) {
                     // We are handling reads directly in the main loop. Note that we may still end
                     // up forking.
                     const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER;
-                    const bool no_stdout_output = stdout_buffer.empty();
-                    const bool no_stderr_output = stderr_buffer.empty();
-                    const bool stdout_discarded = builtin_io_streams->out.output_discarded();
+                    const bool no_stdout_output = stdout_stream.empty();
+                    const bool no_stderr_output = stderr_stream.empty();
+                    const bool stdout_discarded = stdout_stream.output_discarded();
 
                     if (!stdout_discarded && no_stdout_output && no_stderr_output) {
                         // The builtin produced no output and is not inside of a pipeline. No
@@ -954,19 +954,14 @@ void exec_job(parser_t &parser, job_t *j) {
                               p->argv0());
 
                         io_buffer_t *io_buffer = static_cast<io_buffer_t *>(stdout_io.get());
-                        if (stdout_discarded) {
-                            io_buffer->set_discard();
-                        } else {
-                            const std::string res = wcs2string(builtin_io_streams->out.buffer());
-                            io_buffer->out_buffer_append(res.data(), res.size());
-                        }
+                        io_buffer->append_from_stream(stdout_stream);
                         fork_was_skipped = true;
                     } else if (stdout_io.get() == NULL && stderr_io.get() == NULL) {
                         // We are writing to normal stdout and stderr. Just do it - no need to fork.
                         debug(3, L"Skipping fork: ordinary output for internal builtin '%ls'",
                               p->argv0());
-                        const std::string outbuff = wcs2string(stdout_buffer);
-                        const std::string errbuff = wcs2string(stderr_buffer);
+                        const std::string outbuff = wcs2string(stdout_stream.buffer());
+                        const std::string errbuff = wcs2string(stderr_stream.buffer());
                         bool builtin_io_done = do_builtin_io(outbuff.data(), outbuff.size(),
                                                              errbuff.data(), errbuff.size());
                         if (!builtin_io_done && errno != EPIPE) {
@@ -995,11 +990,11 @@ void exec_job(parser_t &parser, job_t *j) {
                     // in the child.
                     //
                     // These strings may contain embedded nulls, so don't treat them as C strings.
-                    const std::string outbuff_str = wcs2string(stdout_buffer);
+                    const std::string outbuff_str = wcs2string(stdout_stream.buffer());
                     const char *outbuff = outbuff_str.data();
                     size_t outbuff_len = outbuff_str.size();
 
-                    const std::string errbuff_str = wcs2string(stderr_buffer);
+                    const std::string errbuff_str = wcs2string(stderr_stream.buffer());
                     const char *errbuff = errbuff_str.data();
                     size_t errbuff_len = errbuff_str.size();
 
diff --git a/src/io.cpp b/src/io.cpp
index 995877a19..e18be8f75 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -31,6 +31,17 @@ void io_buffer_t::print() const {
              is_input ? "yes" : "no", (unsigned long)out_buffer_size());
 }
 
+void io_buffer_t::append_from_stream(const output_stream_t &stream) {
+    if (output_discarded())
+        return;
+    if (stream.output_discarded()) {
+        set_discard();
+        return;
+    }
+    const std::string str = wcs2string(stream.buffer());
+    out_buffer_append(str.data(), str.size());
+}
+
 void io_buffer_t::read() {
     exec_close(pipe_fd[1]);
 
diff --git a/src/io.h b/src/io.h
index 3d54679c0..7b789fb67 100644
--- a/src/io.h
+++ b/src/io.h
@@ -98,6 +98,7 @@ class io_pipe_t : public io_data_t {
 };
 
 class io_chain_t;
+class output_stream_t;
 class io_buffer_t : public io_pipe_t {
    private:
     /// True if we're discarding input.
@@ -121,8 +122,7 @@ class io_buffer_t : public io_pipe_t {
     void out_buffer_append(const char *ptr, size_t count) {
         if (discard) return;
         if (buffer_limit && out_buffer.size() + count > buffer_limit) {
-            discard = true;
-            out_buffer.clear();
+            set_discard();
             return;
         }
         out_buffer.insert(out_buffer.end(), ptr, ptr + count);
@@ -153,6 +153,10 @@ class io_buffer_t : public io_pipe_t {
     /// Close output pipe, and read from input pipe until eof.
     void read();
 
+    /// Appends data from a given output_stream_t.
+    /// Marks the receiver as discarded if the stream was discarded.
+    void append_from_stream(const output_stream_t &stream);
+
     /// Create a IO_BUFFER type io redirection, complete with a pipe and a vector<char> for output.
     /// The default file descriptor used is STDOUT_FILENO for buffering.
     ///
@@ -258,7 +262,7 @@ class output_stream_t {
     const wcstring &buffer() const { return buffer_; }
 
     /// Function that returns true if we discarded the input because there was too much data.
-    bool output_discarded() { return discard; }
+    bool output_discarded() const { return discard; }
 
     bool empty() const { return buffer_.empty(); }
 };

From 90a4af511235fcb5959913bbe18c835fcd89c38a Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Tue, 29 May 2018 21:11:34 -0700
Subject: [PATCH 3/8] Add separated_buffer_t and adopt it in output_stream_t

separated_buffer_t encapsulates the logic around discarding (which
was previously duplicated between output_stream_t and io_buffer_t),
and will also encapsulate the logic around explicitly separated
output.
---
 src/exec.cpp            |  16 ++--
 src/fish_tests.cpp      |   4 +-
 src/io.cpp              |   4 +-
 src/io.h                | 192 +++++++++++++++++++++++++++++-----------
 src/parse_execution.cpp |   2 +-
 5 files changed, 154 insertions(+), 64 deletions(-)

diff --git a/src/exec.cpp b/src/exec.cpp
index a12c33c9e..95d7ec573 100644
--- a/src/exec.cpp
+++ b/src/exec.cpp
@@ -938,7 +938,7 @@ void exec_job(parser_t &parser, job_t *j) {
                     const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER;
                     const bool no_stdout_output = stdout_stream.empty();
                     const bool no_stderr_output = stderr_stream.empty();
-                    const bool stdout_discarded = stdout_stream.output_discarded();
+                    const bool stdout_discarded = stdout_stream.buffer().discarded();
 
                     if (!stdout_discarded && no_stdout_output && no_stderr_output) {
                         // The builtin produced no output and is not inside of a pipeline. No
@@ -950,6 +950,12 @@ void exec_job(parser_t &parser, job_t *j) {
                         // The builtin produced no stderr, and its stdout is going to an
                         // internal buffer. There is no need to fork. This helps out the
                         // performance quite a bit in complex completion code.
+                        // TODO: we're sloppy about handling explicitly separated output.
+                        // Theoretically we could have explicitly separated output on stdout and
+                        // also stderr output; in that case we ought to thread the exp-sep output
+                        // through to the io buffer. We're getting away with this because the only
+                        // thing that can output exp-sep output is `string split0` which doesn't
+                        // also produce stderr.
                         debug(3, L"Skipping fork: buffered output for internal builtin '%ls'",
                               p->argv0());
 
@@ -960,8 +966,8 @@ void exec_job(parser_t &parser, job_t *j) {
                         // We are writing to normal stdout and stderr. Just do it - no need to fork.
                         debug(3, L"Skipping fork: ordinary output for internal builtin '%ls'",
                               p->argv0());
-                        const std::string outbuff = wcs2string(stdout_stream.buffer());
-                        const std::string errbuff = wcs2string(stderr_stream.buffer());
+                        const std::string outbuff = wcs2string(stdout_stream.contents());
+                        const std::string errbuff = wcs2string(stderr_stream.contents());
                         bool builtin_io_done = do_builtin_io(outbuff.data(), outbuff.size(),
                                                              errbuff.data(), errbuff.size());
                         if (!builtin_io_done && errno != EPIPE) {
@@ -990,11 +996,11 @@ void exec_job(parser_t &parser, job_t *j) {
                     // in the child.
                     //
                     // These strings may contain embedded nulls, so don't treat them as C strings.
-                    const std::string outbuff_str = wcs2string(stdout_stream.buffer());
+                    const std::string outbuff_str = wcs2string(stdout_stream.contents());
                     const char *outbuff = outbuff_str.data();
                     size_t outbuff_len = outbuff_str.size();
 
-                    const std::string errbuff_str = wcs2string(stderr_stream.buffer());
+                    const std::string errbuff_str = wcs2string(stderr_stream.contents());
                     const char *errbuff = errbuff_str.data();
                     size_t errbuff_len = errbuff_str.size();
 
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 070e5705c..65d3f55bb 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4107,10 +4107,10 @@ static void run_one_string_test(const wchar_t *const *argv, int expected_rc,
     if (rc != expected_rc) {
         err(L"Test failed on line %lu: [%ls]: expected return code %d but got %d", __LINE__,
             args.c_str(), expected_rc, rc);
-    } else if (streams.out.buffer() != expected_out) {
+    } else if (streams.out.contents() != expected_out) {
         err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]", __LINE__, args.c_str(),
             escape_string(expected_out, ESCAPE_ALL).c_str(),
-            escape_string(streams.out.buffer(), ESCAPE_ALL).c_str());
+            escape_string(streams.out.contents(), ESCAPE_ALL).c_str());
     }
 }
 
diff --git a/src/io.cpp b/src/io.cpp
index e18be8f75..d1573a9fb 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -34,11 +34,11 @@ void io_buffer_t::print() const {
 void io_buffer_t::append_from_stream(const output_stream_t &stream) {
     if (output_discarded())
         return;
-    if (stream.output_discarded()) {
+    if (stream.buffer().discarded()) {
         set_discard();
         return;
     }
-    const std::string str = wcs2string(stream.buffer());
+    const std::string str = wcs2string(stream.contents());
     out_buffer_append(str.data(), str.size());
 }
 
diff --git a/src/io.h b/src/io.h
index 7b789fb67..a58ccf362 100644
--- a/src/io.h
+++ b/src/io.h
@@ -22,6 +22,127 @@ using std::tr1::shared_ptr;
 #include "common.h"
 #include "env.h"
 
+/// separated_buffer_t is composed of a sequence of elements, some of which may be explicitly
+/// separated (e.g. through string spit0) and some of which the separation is inferred. This enum
+/// tracks the type.
+enum class separation_type_t {
+    /// This element's separation should be inferred, e.g. through IFS.
+    inferred,
+    /// This element was explicitly separated and should not be separated further.
+    explicitly
+};
+
+/// A separated_buffer_t contains a list of elements, some of which may be separated explicitly and
+/// others which must be separated further by the user (e.g. via IFS).
+template <typename StringType>
+class separated_buffer_t {
+    struct element_t {
+        StringType contents;
+        separation_type_t separation;
+
+        element_t(StringType contents, separation_type_t sep)
+            : contents(std::move(contents)), separation(sep) {}
+
+        bool is_explicitly_separated() const { return separation == separation_type_t::explicitly; }
+    };
+
+    /// Limit on how much data we'll buffer. Zero means no limit.
+    size_t buffer_limit_;
+
+    /// Current size of all contents.
+    size_t contents_size_{0};
+
+    /// List of buffer elements.
+    std::vector<element_t> elements_;
+
+    /// True if we're discarding input because our buffer_limit has been exceeded.
+    bool discard = false;
+
+    /// Mark that we are about to add the given size \p delta to the buffer. \return true if we
+    /// succeed, false if we exceed buffer_limit.
+    bool try_add_size(size_t delta) {
+        if (discard) return false;
+        contents_size_ += delta;
+        if (contents_size_ < delta) {
+            // Overflow!
+            set_discard();
+            return false;
+        }
+        if (buffer_limit_ > 0 && contents_size_ > buffer_limit_) {
+            set_discard();
+            return false;
+        }
+        return true;
+    }
+
+    /// separated_buffer_t may not be copied.
+    separated_buffer_t(const separated_buffer_t &) = delete;
+    void operator=(const separated_buffer_t &) = delete;
+
+public:
+ /// Construct a separated_buffer_t with the given buffer limit \p limit, or 0 for no limit.
+ separated_buffer_t(size_t limit) : buffer_limit_(limit) {}
+
+ /// \return the buffer limit size, or 0 for no limit.
+ size_t limit() const { return buffer_limit_; }
+
+ /// \return the contents size.
+ size_t size() const { return contents_size_; }
+
+ /// \return whether the output has been discarded.
+ bool discarded() const { return discard; }
+
+ /// Mark the contents as discarded.
+ void set_discard() {
+     elements_.clear();
+     contents_size_ = 0;
+     discard = true;
+ }
+
+ /// Serialize the contents to a single string, where explicitly separated elements have a
+ /// newline appended.
+ StringType newline_serialized() const {
+     StringType result;
+     result.reserve(size());
+     for (const auto &elem : elements_) {
+         result.append(elem.contents);
+         if (elem.is_explicitly_separated()) {
+             result.push_back('\n');
+         }
+     }
+     return result;
+    }
+
+    /// \return the list of elements.
+    const std::vector<element_t> &elements() const { return elements_; }
+
+    /// Append an element with range [begin, end) and the given separation type \p sep.
+    template <typename Iterator>
+    void append(Iterator begin, Iterator end, separation_type_t sep = separation_type_t::inferred) {
+        if (!try_add_size(std::distance(begin, end))) return;
+        // Try merging with the last element.
+        if (sep == separation_type_t::inferred && !elements_.empty() && !elements_.back().is_explicitly_separated()) {
+            elements_.back().contents.append(begin, end);
+        } else {
+            elements_.emplace_back(StringType(begin, end), sep);
+        }
+    }
+
+    /// Append a string \p str with the given separation type \p sep.
+    void append(const StringType &str, separation_type_t sep = separation_type_t::inferred) {
+        append(str.begin(), str.end(), sep);
+    }
+
+    // Given that this is a narrow stream, convert a wide stream \p rhs to narrow and then append
+    // it.
+    template <typename RHSStringType>
+    void append_wide_buffer(const separated_buffer_t<RHSStringType> &rhs) {
+        for (const auto &rhs_elem : rhs.elements()) {
+            append(wcs2string(rhs_elem.contents), rhs_elem.separation);
+        }
+    }
+};
+
 /// Describes what type of IO operation an io_data_t represents.
 enum io_mode_t { IO_FILE, IO_PIPE, IO_FD, IO_BUFFER, IO_CLOSE };
 
@@ -194,77 +315,40 @@ bool pipe_avoid_conflicts_with_io_chain(int fds[2], const io_chain_t &ios);
 /// Class representing the output that a builtin can generate.
 class output_stream_t {
    private:
-    /// Limit on how much data we'll buffer. Zero means no limit.
-    size_t buffer_limit;
-    /// True if we're discarding input.
-    bool discard;
+    /// Storage for our data.
+    separated_buffer_t<wcstring> buffer_;
+
     // No copying.
-    output_stream_t(const output_stream_t &s);
-    void operator=(const output_stream_t &s);
-
-    wcstring buffer_;
-
-    void check_for_overflow() {
-        if (buffer_limit && buffer_.size() > buffer_limit) {
-            discard = true;
-            buffer_.clear();
-        }
-    }
+    output_stream_t(const output_stream_t &s) = delete;
+    void operator=(const output_stream_t &s) = delete;
 
    public:
-    output_stream_t(size_t buffer_limit_) : buffer_limit(buffer_limit_), discard(false) {}
+    output_stream_t(size_t buffer_limit) : buffer_(buffer_limit) {}
 
-    void append(const wcstring &s) {
-        if (discard) return;
-        buffer_.append(s);
-        check_for_overflow();
-    }
+    void append(const wcstring &s) { buffer_.append(s.begin(), s.end()); }
 
-    void append(const wchar_t *s) {
-        if (discard) return;
-        buffer_.append(s);
-        check_for_overflow();
-    }
+    const separated_buffer_t<wcstring> &buffer() const { return buffer_; }
 
-    void append(wchar_t s) {
-        if (discard) return;
-        buffer_.push_back(s);
-        check_for_overflow();
-    }
+    void append(const wchar_t *s) { append(s, wcslen(s)); }
 
-    void append(const wchar_t *s, size_t amt) {
-        if (discard) return;
-        buffer_.append(s, amt);
-        check_for_overflow();
-    }
+    void append(wchar_t s) { append(&s, 1); }
 
-    void push_back(wchar_t c) {
-        if (discard) return;
-        buffer_.push_back(c);
-        check_for_overflow();
-    }
+    void append(const wchar_t *s, size_t amt) { buffer_.append(s, s + amt); }
+
+    void push_back(wchar_t c) { append(c); }
 
     void append_format(const wchar_t *format, ...) {
-        if (discard) return;
         va_list va;
         va_start(va, format);
-        ::append_formatv(buffer_, format, va);
+        append_formatv(format, va);
         va_end(va);
-        check_for_overflow();
     }
 
-    void append_formatv(const wchar_t *format, va_list va_orig) {
-        if (discard) return;
-        ::append_formatv(buffer_, format, va_orig);
-        check_for_overflow();
-    }
+    void append_formatv(const wchar_t *format, va_list va) { append(vformat_string(format, va)); }
 
-    const wcstring &buffer() const { return buffer_; }
+    bool empty() const { return buffer_.size() == 0; }
 
-    /// Function that returns true if we discarded the input because there was too much data.
-    bool output_discarded() const { return discard; }
-
-    bool empty() const { return buffer_.empty(); }
+    wcstring contents() const { return buffer_.newline_serialized(); }
 };
 
 struct io_streams_t {
diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp
index b728435ec..15289ba56 100644
--- a/src/parse_execution.cpp
+++ b/src/parse_execution.cpp
@@ -328,7 +328,7 @@ parse_execution_result_t parse_execution_context_t::run_function_statement(
     proc_set_last_status(err);
 
     if (!streams.err.empty()) {
-        this->report_error(header, L"%ls", streams.err.buffer().c_str());
+        this->report_error(header, L"%ls", streams.err.contents().c_str());
         result = parse_execution_errored;
     }
 

From f998afaa23b15251694e11a5e11bd272f1822dae Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Mon, 28 May 2018 01:27:26 -0700
Subject: [PATCH 4/8] Adopt separated_buffer_t in io_buffer_t

---
 src/exec.cpp       | 15 +++++++++------
 src/fish_tests.cpp |  4 ++--
 src/io.cpp         | 21 +++++++++------------
 src/io.h           | 44 +++++++++-----------------------------------
 src/proc.cpp       | 10 ++++------
 5 files changed, 33 insertions(+), 61 deletions(-)

diff --git a/src/exec.cpp b/src/exec.cpp
index 95d7ec573..b9e56959e 100644
--- a/src/exec.cpp
+++ b/src/exec.cpp
@@ -540,7 +540,7 @@ void exec_job(parser_t &parser, job_t *j) {
         if ((io->io_mode == IO_BUFFER)) {
             io_buffer_t *io_buffer = static_cast<io_buffer_t *>(io.get());
             assert(!io_buffer->is_input);
-            stdout_read_limit = io_buffer->get_buffer_limit();
+            stdout_read_limit = io_buffer->buffer().limit();
         }
     }
 
@@ -891,8 +891,10 @@ void exec_job(parser_t &parser, job_t *j) {
 
                 block_output_io_buffer->read();
 
-                const char *buffer = block_output_io_buffer->out_buffer_ptr();
-                size_t count = block_output_io_buffer->out_buffer_size();
+                const std::string buffer_contents =
+                    block_output_io_buffer->buffer().newline_serialized();
+                const char *buffer = buffer_contents.data();
+                size_t count = buffer_contents.size();
                 if (count > 0) {
                     // We don't have to drain threads here because our child process is simple.
                     const char *fork_reason = p->type == INTERNAL_BLOCK_NODE ? "internal block io" : "internal function io";
@@ -1192,7 +1194,7 @@ static int exec_subshell_internal(const wcstring &cmd, wcstring_list_t *lst, boo
         io_buffer->read();
     }
 
-    if (io_buffer->output_discarded()) subcommand_status = STATUS_READ_TOO_MUCH;
+    if (io_buffer->buffer().discarded()) subcommand_status = STATUS_READ_TOO_MUCH;
 
     // If the caller asked us to preserve the exit status, restore the old status. Otherwise set the
     // status of the subcommand.
@@ -1203,8 +1205,9 @@ static int exec_subshell_internal(const wcstring &cmd, wcstring_list_t *lst, boo
         return subcommand_status;
     }
 
-    const char *begin = io_buffer->out_buffer_ptr();
-    const char *end = begin + io_buffer->out_buffer_size();
+    const std::string buffer_contents = io_buffer->buffer().newline_serialized();
+    const char *begin = buffer_contents.data();
+    const char *end = begin + buffer_contents.size();
     if (split_output) {
         const char *cursor = begin;
         while (cursor < end) {
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 65d3f55bb..cfcddbcd1 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -929,9 +929,9 @@ static void test_1_cancellation(const wchar_t *src) {
     });
     parser_t::principal_parser().eval(src, io_chain, TOP);
     out_buff->read();
-    if (out_buff->out_buffer_size() != 0) {
+    if (out_buff->buffer().size() != 0) {
         err(L"Expected 0 bytes in out_buff, but instead found %lu bytes\n",
-            out_buff->out_buffer_size());
+            out_buff->buffer().size());
     }
     iothread_drain_all();
 }
diff --git a/src/io.cpp b/src/io.cpp
index d1573a9fb..ae4fa0478 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -27,19 +27,17 @@ void io_pipe_t::print() const {
 }
 
 void io_buffer_t::print() const {
-    fwprintf(stderr, L"buffer %p (input: %s, size %lu)\n", out_buffer_ptr(),
-             is_input ? "yes" : "no", (unsigned long)out_buffer_size());
+    fwprintf(stderr, L"buffer (input: %s, size %lu)\n",
+             is_input ? "yes" : "no", (unsigned long)buffer_.size());
 }
 
 void io_buffer_t::append_from_stream(const output_stream_t &stream) {
-    if (output_discarded())
-        return;
+    if (buffer_.discarded()) return;
     if (stream.buffer().discarded()) {
-        set_discard();
+        buffer_.set_discard();
         return;
     }
-    const std::string str = wcs2string(stream.contents());
-    out_buffer_append(str.data(), str.size());
+    buffer_.append_wide_buffer(stream.buffer());
 }
 
 void io_buffer_t::read() {
@@ -55,11 +53,10 @@ void io_buffer_t::read() {
         debug(4, L"io_buffer_t::read: blocking read on fd %d", pipe_fd[0]);
         while (1) {
             char b[4096];
-            long l;
-            l = read_blocked(pipe_fd[0], b, 4096);
-            if (l == 0) {
+            long len = read_blocked(pipe_fd[0], b, 4096);
+            if (len == 0) {
                 break;
-            } else if (l < 0) {
+            } else if (len < 0) {
                 // exec_read_io_buffer is only called on jobs that have exited, and will therefore
                 // never block. But a broken pipe seems to cause some flags to reset, causing the
                 // EOF flag to not be set. Therefore, EAGAIN is ignored and we exit anyway.
@@ -72,7 +69,7 @@ void io_buffer_t::read() {
 
                 break;
             } else {
-                out_buffer_append(b, l);
+                buffer_.append(&b[0], &b[len]);
             }
         }
     }
diff --git a/src/io.h b/src/io.h
index a58ccf362..ecc27be43 100644
--- a/src/io.h
+++ b/src/io.h
@@ -222,51 +222,25 @@ class io_chain_t;
 class output_stream_t;
 class io_buffer_t : public io_pipe_t {
    private:
-    /// True if we're discarding input.
-    bool discard{false};
-    /// Limit on how much data we'll buffer. Zero means no limit.
-    size_t buffer_limit;
-    /// Buffer to save output in.
-    std::vector<char> out_buffer;
+    separated_buffer_t<std::string> buffer_;
 
     explicit io_buffer_t(int f, size_t limit)
         : io_pipe_t(IO_BUFFER, f, false /* not input */),
-          buffer_limit(limit),
-          out_buffer() {}
+          buffer_(limit) {}
 
    public:
     void print() const override;
 
     ~io_buffer_t() override;
 
+    /// Access the underlying buffer.
+    const separated_buffer_t<std::string> &buffer() const { return buffer_; }
+
+    /// Access the underlying buffer.
+    separated_buffer_t<std::string> &buffer() { return buffer_; }
+
     /// Function to append to the buffer.
-    void out_buffer_append(const char *ptr, size_t count) {
-        if (discard) return;
-        if (buffer_limit && out_buffer.size() + count > buffer_limit) {
-            set_discard();
-            return;
-        }
-        out_buffer.insert(out_buffer.end(), ptr, ptr + count);
-    }
-
-    /// Function to get a pointer to the buffer.
-    const char *out_buffer_ptr() const { return out_buffer.empty() ? NULL : &out_buffer.at(0); }
-
-    /// Function to get the size of the buffer.
-    size_t out_buffer_size() const { return out_buffer.size(); }
-
-    /// Function that returns true if we discarded the input because there was too much data.
-    bool output_discarded() { return discard; }
-
-    /// Function to explicitly put the object in discard mode. Meant to be used when moving
-    /// the results from an output_stream_t to an io_buffer_t.
-    void set_discard() {
-        discard = true;
-        out_buffer.clear();
-    }
-
-    /// This is used to transfer the buffer limit for this object to a output_stream_t object.
-    size_t get_buffer_limit() { return buffer_limit; }
+    void append(const char *ptr, size_t count) { buffer_.append(ptr, ptr + count); }
 
     /// Ensures that the pipes do not conflict with any fd redirections in the chain.
     bool avoid_conflicts_with_io_chain(const io_chain_t &ios);
diff --git a/src/proc.cpp b/src/proc.cpp
index 1a8761a2a..9974bf686 100644
--- a/src/proc.cpp
+++ b/src/proc.cpp
@@ -758,19 +758,17 @@ static void read_try(job_t *j) {
         debug(3, L"proc::read_try('%ls')", j->command_wcstr());
         while (1) {
             char b[BUFFER_SIZE];
-            long l;
-
-            l = read_blocked(buff->pipe_fd[0], b, BUFFER_SIZE);
-            if (l == 0) {
+            long len = read_blocked(buff->pipe_fd[0], b, BUFFER_SIZE);
+            if (len == 0) {
                 break;
-            } else if (l < 0) {
+            } else if (len < 0) {
                 if (errno != EAGAIN) {
                     debug(1, _(L"An error occured while reading output from code block"));
                     wperror(L"read_try");
                 }
                 break;
             } else {
-                buff->out_buffer_append(b, l);
+                buff->append(b, len);
             }
         }
     }

From d34a300818e5d37c2b57cc9931804c2cfb8ecee0 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Tue, 29 May 2018 21:11:50 -0700
Subject: [PATCH 5/8] Add string split0

This adds a new string command split0, which splits on zero bytes.
split0 has superpowers because its output is not further split on
newlines when used in command substitutions.
---
 src/builtin_string.cpp | 75 +++++++++++++++++++++++-------------------
 src/exec.cpp           | 57 ++++++++++++++++++--------------
 src/io.h               |  7 ++--
 tests/string.err       |  6 ++++
 tests/string.in        | 18 ++++++++++
 tests/string.out       | 13 ++++++++
 6 files changed, 115 insertions(+), 61 deletions(-)

diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp
index 4d5eccbac..113738e61 100644
--- a/src/builtin_string.cpp
+++ b/src/builtin_string.cpp
@@ -75,25 +75,29 @@ class arg_iterator_t {
     int argidx_;
     // If not using argv, a string to store bytes that have been read but not yet returned.
     std::string buffer_;
+    // If set, when reading from a stream, split on zeros instead of newlines.
+    const bool split0_;
     // Backing storage for the next() string.
     wcstring storage_;
     const io_streams_t &streams_;
 
-    /// \return the next argument from stdin
-    const wchar_t *get_arg_stdin() {
+    /// Reads the next argument from stdin, returning true if an argument was produced and false if
+    /// not. On true, the string is stored in storage_.
+    bool get_arg_stdin() {
         assert(string_args_from_stdin(streams_) && "should not be reading from stdin");
-        // Read in chunks from fd until buffer has a line.
+        // Read in chunks from fd until buffer has a line (or zero if split0_ is set).
+        const char sep = split0_ ? '\0' : '\n';
         size_t pos;
-        while ((pos = buffer_.find('\n')) == std::string::npos) {
+        while ((pos = buffer_.find(sep)) == std::string::npos) {
             char buf[STRING_CHUNK_SIZE];
             long n = read_blocked(streams_.stdin_fd, buf, STRING_CHUNK_SIZE);
             if (n == 0) {
                 // If we still have buffer contents, flush them,
-                // in case there was no trailing '\n'.
-                if (buffer_.empty()) return NULL;
+                // in case there was no trailing sep.
+                if (buffer_.empty()) return false;
                 storage_ = str2wcstring(buffer_);
                 buffer_.clear();
-                return storage_.c_str();
+                return true;
             }
             if (n == -1) {
                 // Some error happened. We can't do anything about it,
@@ -101,20 +105,21 @@ class arg_iterator_t {
                 // (read_blocked already retries for EAGAIN and EINTR)
                 storage_ = str2wcstring(buffer_);
                 buffer_.clear();
-                return NULL;
+                return false;
             }
             buffer_.append(buf, n);
         }
 
-        // Split the buffer on the '\n' and return the first part.
+        // Split the buffer on the sep and return the first part.
         storage_ = str2wcstring(buffer_, pos);
         buffer_.erase(0, pos + 1);
-        return storage_.c_str();
+        return true;
     }
 
    public:
-    arg_iterator_t(const wchar_t *const *argv, int argidx, const io_streams_t &streams)
-        : argv_(argv), argidx_(argidx), streams_(streams) {}
+    arg_iterator_t(const wchar_t *const *argv, int argidx, const io_streams_t &streams,
+                   bool split0 = false)
+        : argv_(argv), argidx_(argidx), split0_(split0), streams_(streams) {}
 
     const wcstring *nextstr() {
         if (string_args_from_stdin(streams_)) {
@@ -1037,7 +1042,8 @@ static int string_replace(parser_t &parser, io_streams_t &streams, int argc, wch
     return replacer->replace_count() > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
 }
 
-static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
+static int string_split_maybe0(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv,
+                               bool is_split0) {
     options_t opts;
     opts.quiet_valid = true;
     opts.right_valid = true;
@@ -1045,14 +1051,14 @@ static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar
     opts.max = LONG_MAX;
     opts.no_empty_valid = true;
     int optind;
-    int retval = parse_opts(&opts, &optind, 1, argc, argv, parser, streams);
+    int retval = parse_opts(&opts, &optind, is_split0 ? 0 : 1, argc, argv, parser, streams);
     if (retval != STATUS_CMD_OK) return retval;
 
-    const wcstring sep(opts.arg1);
+    const wcstring sep = is_split0 ? wcstring(1, L'\0') : wcstring(opts.arg1);
 
     wcstring_list_t splits;
     size_t arg_count = 0;
-    arg_iterator_t aiter(argv, optind, streams);
+    arg_iterator_t aiter(argv, optind, streams, is_split0);
     while (const wcstring *arg = aiter.nextstr()) {
         if (opts.right) {
             split_about(arg->rbegin(), arg->rend(), sep.rbegin(), sep.rend(), &splits, opts.max, opts.no_empty);
@@ -1070,15 +1076,24 @@ static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar
         std::reverse(splits.begin(), splits.end());
     }
 
+    const size_t split_count = splits.size();
     if (!opts.quiet) {
-        for (wcstring_list_t::const_iterator si = splits.begin(); si != splits.end(); ++si) {
-            streams.out.append(*si);
-            streams.out.append(L'\n');
+        auto &buff = streams.out.buffer();
+        for (const wcstring &split : splits) {
+            buff.append(split, separation_type_t::explicitly);
         }
     }
 
     // We split something if we have more split values than args.
-    return splits.size() > arg_count ? STATUS_CMD_OK : STATUS_CMD_ERROR;
+    return split_count > arg_count ? STATUS_CMD_OK : STATUS_CMD_ERROR;
+}
+
+static int string_split(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
+    return string_split_maybe0(parser, streams, argc, argv, false /* is_split0 */);
+}
+
+static int string_split0(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
+    return string_split_maybe0(parser, streams, argc, argv, true /* is_split0 */);
 }
 
 // Helper function to abstract the repeat logic from string_repeat
@@ -1256,19 +1271,13 @@ static const struct string_subcommand {
                    wchar_t **argv);                       //!OCLINT(unused param)
 }
 
-string_subcommands[] = {{L"escape", &string_escape},
-                        {L"join", &string_join},
-                        {L"length", &string_length},
-                        {L"match", &string_match},
-                        {L"replace", &string_replace},
-                        {L"split", &string_split},
-                        {L"sub", &string_sub},
-                        {L"trim", &string_trim},
-                        {L"lower", &string_lower},
-                        {L"upper", &string_upper},
-                        {L"repeat", &string_repeat},
-                        {L"unescape", &string_unescape},
-                        {NULL, NULL}};
+string_subcommands[] = {{L"escape", &string_escape},     {L"join", &string_join},
+                        {L"length", &string_length},     {L"match", &string_match},
+                        {L"replace", &string_replace},   {L"split", &string_split},
+                        {L"split0", &string_split0},     {L"sub", &string_sub},
+                        {L"trim", &string_trim},         {L"lower", &string_lower},
+                        {L"upper", &string_upper},       {L"repeat", &string_repeat},
+                        {L"unescape", &string_unescape}, {NULL, NULL}};
 
 /// The string builtin, for manipulating strings.
 int builtin_string(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
diff --git a/src/exec.cpp b/src/exec.cpp
index b9e56959e..25dc0f7fa 100644
--- a/src/exec.cpp
+++ b/src/exec.cpp
@@ -1204,34 +1204,41 @@ static int exec_subshell_internal(const wcstring &cmd, wcstring_list_t *lst, boo
     if (lst == NULL || io_buffer.get() == NULL) {
         return subcommand_status;
     }
+    // Walk over all the elements.
+    for (const auto &elem : io_buffer->buffer().elements()) {
+        if (elem.is_explicitly_separated()) {
+            // Just append this one.
+            lst->push_back(str2wcstring(elem.contents));
+            continue;
+        }
 
-    const std::string buffer_contents = io_buffer->buffer().newline_serialized();
-    const char *begin = buffer_contents.data();
-    const char *end = begin + buffer_contents.size();
-    if (split_output) {
-        const char *cursor = begin;
-        while (cursor < end) {
-            // Look for the next separator.
-            const char *stop = (const char *)memchr(cursor, '\n', end - cursor);
-            const bool hit_separator = (stop != NULL);
-            if (!hit_separator) {
-                // If it's not found, just use the end.
-                stop = end;
+        // Not explicitly separated. We have to split it explicitly.
+        assert(!elem.is_explicitly_separated() && "should not be explicitly separated");
+        const char *begin = elem.contents.data();
+        const char *end = begin + elem.contents.size();
+        if (split_output) {
+            const char *cursor = begin;
+            while (cursor < end) {
+                // Look for the next separator.
+                const char *stop = (const char *)memchr(cursor, '\n', end - cursor);
+                const bool hit_separator = (stop != NULL);
+                if (!hit_separator) {
+                    // If it's not found, just use the end.
+                    stop = end;
+                }
+                // Stop now points at the first character we do not want to copy.
+                lst->push_back(str2wcstring(cursor, stop - cursor));
+
+                // If we hit a separator, skip over it; otherwise we're at the end.
+                cursor = stop + (hit_separator ? 1 : 0);
             }
-            // Stop now points at the first character we do not want to copy.
-            const wcstring wc = str2wcstring(cursor, stop - cursor);
-            lst->push_back(wc);
-
-            // If we hit a separator, skip over it; otherwise we're at the end.
-            cursor = stop + (hit_separator ? 1 : 0);
+        } else {
+            // We're not splitting output, but we still want to trim off a trailing newline.
+            if (end != begin && end[-1] == '\n') {
+                --end;
+            }
+            lst->push_back(str2wcstring(begin, end - begin));
         }
-    } else {
-        // We're not splitting output, but we still want to trim off a trailing newline.
-        if (end != begin && end[-1] == '\n') {
-            --end;
-        }
-        const wcstring wc = str2wcstring(begin, end - begin);
-        lst->push_back(wc);
     }
 
     return subcommand_status;
diff --git a/src/io.h b/src/io.h
index ecc27be43..93ccab718 100644
--- a/src/io.h
+++ b/src/io.h
@@ -36,6 +36,7 @@ enum class separation_type_t {
 /// others which must be separated further by the user (e.g. via IFS).
 template <typename StringType>
 class separated_buffer_t {
+   public:
     struct element_t {
         StringType contents;
         separation_type_t separation;
@@ -46,6 +47,7 @@ class separated_buffer_t {
         bool is_explicitly_separated() const { return separation == separation_type_t::explicitly; }
     };
 
+   private:
     /// Limit on how much data we'll buffer. Zero means no limit.
     size_t buffer_limit_;
 
@@ -236,9 +238,6 @@ class io_buffer_t : public io_pipe_t {
     /// Access the underlying buffer.
     const separated_buffer_t<std::string> &buffer() const { return buffer_; }
 
-    /// Access the underlying buffer.
-    separated_buffer_t<std::string> &buffer() { return buffer_; }
-
     /// Function to append to the buffer.
     void append(const char *ptr, size_t count) { buffer_.append(ptr, ptr + count); }
 
@@ -301,6 +300,8 @@ class output_stream_t {
 
     void append(const wcstring &s) { buffer_.append(s.begin(), s.end()); }
 
+    separated_buffer_t<wcstring> &buffer() { return buffer_; }
+
     const separated_buffer_t<wcstring> &buffer() const { return buffer_; }
 
     void append(const wchar_t *s) { append(s, wcslen(s)); }
diff --git a/tests/string.err b/tests/string.err
index 98c1702b8..c7cd024fc 100644
--- a/tests/string.err
+++ b/tests/string.err
@@ -294,3 +294,9 @@ string repeat -l fakearg
 
 ####################
 # Check NUL
+
+####################
+# string split0
+
+####################
+# string split0 in functions
diff --git a/tests/string.in b/tests/string.in
index cfef38b68..60dcaaf01 100644
--- a/tests/string.in
+++ b/tests/string.in
@@ -340,4 +340,22 @@ printf 'a\0b' | string replace -r b g | string escape
 # TODO: These do not yet work!
 # printf 'a\0b' | string match '*b' | string escape
 
+logmsg string split0
+count (echo -ne 'abcdefghi' | string split0)
+count (echo -ne 'abc\x00def\x00ghi\x00' | string split0)
+count (echo -ne 'abc\x00def\x00ghi\x00\x00' | string split0)
+count (echo -ne 'abc\x00def\x00ghi' | string split0)
+count (echo -ne 'abc\ndef\x00ghi\x00' | string split0)
+count (echo -ne 'abc\ndef\nghi' | string split0)
+
+logmsg string split0 in functions
+# This function outputs some newline-separated content, and some
+# explicitly separated content.
+function dualsplit
+  echo alpha
+  echo beta
+  echo -ne 'gamma\x00delta' | string split0
+end
+count (dualsplit)
+
 exit 0
diff --git a/tests/string.out b/tests/string.out
index 7ff40deba..bded690e7 100644
--- a/tests/string.out
+++ b/tests/string.out
@@ -433,3 +433,16 @@ d
 a\x00b
 a\x00g
 a\x00g
+
+####################
+# string split0
+1
+3
+4
+3
+2
+1
+
+####################
+# string split0 in functions
+4

From b1176323e715fe05fc24579cec15d721071c8419 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 24 Jun 2018 13:07:14 -0700
Subject: [PATCH 6/8] Document string split0

---
 doc_src/string.txt | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/doc_src/string.txt b/doc_src/string.txt
index 04b44884d..6d6b7c908 100644
--- a/doc_src/string.txt
+++ b/doc_src/string.txt
@@ -14,6 +14,8 @@ string replace [(-a | --all)] [(-f | --filter)] [(-i | --ignore-case)] [(-r | --
                [(-q | --quiet)] PATTERN REPLACEMENT [STRING...]
 string split [(-m | --max) MAX] [(-n | --no-empty)] [(-q | --quiet)] [(-r | --right)] SEP
              [STRING...]
+string split0 [(-m | --max) MAX] [(-n | --no-empty)] [(-q | --quiet)] [(-r | --right)]
+              [STRING...]
 string sub [(-s | --start) START] [(-l | --length) LENGTH] [(-q | --quiet)]
            [STRING...]
 string trim [(-l | --left)] [(-r | --right)] [(-c | --chars CHARS)]
@@ -93,6 +95,12 @@ Exit status: 0 if at least one replacement was performed, or 1 otherwise.
 
 See also `read --delimiter`.
 
+\subsection string-split0 "split0" subcommand
+
+`string split0` splits each STRING on the zero byte (NUL). Options are the same as `string split` except that no separator is given.
+
+`split0` has the important property that its output is not further split when used in a command substitution, allowing for the command substitution to produce elements containing newlines. This is most useful when used with Unix tools that produce zero bytes, such as `find -print0` or `sort -z`. See split0 examples below.
+
 \subsection string-sub "sub" subcommand
 
 `string sub` prints a substring of each string argument. The start of the substring can be specified with `-s` or `--start` followed by a 1-based index value. Positive index values are relative to the start of the string and negative index values are relative to the end of the string. The default start value is 1. The length of the substring can be specified with `-l` or `--length`. If the length is not specified, the substring continues to the end of each STRING. Exit status: 0 if at least one substring operation was performed, 1 otherwise.
@@ -240,6 +248,13 @@ foo2
 <outp>0xBadC0de</outp>
 \endfish
 
+\subsection string-example-split0 Split0 Examples
+
+\fish{cli-dark}
+# Count files in a directory, without being confused by newlines.
+>_ count (find . -print0 | string split0)
+\endfish
+
 \subsection string-example-replace-literal Replace Literal Examples
 
 \fish{cli-dark}

From 73c747d162e1f6168294abb9376e0c2154f64bca Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 24 Jun 2018 14:03:13 -0700
Subject: [PATCH 7/8] Add string join0

string join0 joins its arguments using NUL byte, which complements
string split0. For example it allows piping a variable through sort -z.
---
 doc_src/string.txt     | 16 ++++++++++++++--
 src/builtin_string.cpp | 30 +++++++++++++++++++-----------
 tests/string.err       |  3 +++
 tests/string.in        |  6 ++++++
 tests/string.out       |  6 ++++++
 5 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/doc_src/string.txt b/doc_src/string.txt
index 6d6b7c908..02f4bd074 100644
--- a/doc_src/string.txt
+++ b/doc_src/string.txt
@@ -4,6 +4,7 @@
 \fish{synopsis}
 string escape [(-n | --no-quoted)] [--style=xxx] [STRING...]
 string join [(-q | --quiet)] SEP [STRING...]
+string join0 [(-q | --quiet)] [STRING...]
 string length [(-q | --quiet)] [STRING...]
 string lower [(-q | --quiet)] [STRING...]
 string match [(-a | --all)] [(-e | --entire)] [(-i | --ignore-case)] [(-r | --regex)]
@@ -51,6 +52,10 @@ The third is `--style=url` which ensures the string can be used as a URL by hex
 
 `string join` joins its STRING arguments into a single string separated by SEP, which can be an empty string. Exit status: 0 if at least one join was performed, or 1 otherwise.
 
+\subsection string-join0 "join0" subcommand
+
+`string join` joins its STRING arguments into a single string separated by the zero byte (NUL), and adds a trailing NUL. This is most useful in conjunction with tools that accept NUL-delimited input, such as `sort -z`. Exit status: 0 if at least one join was performed, or 1 otherwise.
+
 \subsection string-length "length" subcommand
 
 `string length` reports the length of each string argument in characters. Exit status: 0 if at least one non-empty STRING was given, or 1 otherwise.
@@ -248,11 +253,18 @@ foo2
 <outp>0xBadC0de</outp>
 \endfish
 
-\subsection string-example-split0 Split0 Examples
+\subsection string-example-split0 NUL Delimited Examples
 
 \fish{cli-dark}
-# Count files in a directory, without being confused by newlines.
+>_ # Count files in a directory, without being confused by newlines.
 >_ count (find . -print0 | string split0)
+<outp>42</outp>
+
+>_ # Sort a list of elements which may contain newlines
+>_ set foo beta alpha\ngamma
+>_ set foo (string join0 $foo | sort -z | string split0)
+>_ string escape $foo[1]
+<outp>alpha\ngamma</outp>
 \endfish
 
 \subsection string-example-replace-literal Replace Literal Examples
diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp
index 113738e61..01f079517 100644
--- a/src/builtin_string.cpp
+++ b/src/builtin_string.cpp
@@ -542,14 +542,15 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc, wc
     DIE("should never reach this statement");
 }
 
-static int string_join(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
+static int string_join_maybe0(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv,
+                              bool is_join0) {
     options_t opts;
     opts.quiet_valid = true;
     int optind;
-    int retval = parse_opts(&opts, &optind, 1, argc, argv, parser, streams);
+    int retval = parse_opts(&opts, &optind, is_join0 ? 0 : 1, argc, argv, parser, streams);
     if (retval != STATUS_CMD_OK) return retval;
 
-    const wchar_t *sep = opts.arg1;
+    const wcstring sep = is_join0 ? wcstring(1, L'\0') : wcstring(opts.arg1);
     int nargs = 0;
     arg_iterator_t aiter(argv, optind, streams);
     while (const wcstring *arg = aiter.nextstr()) {
@@ -562,12 +563,20 @@ static int string_join(parser_t &parser, io_streams_t &streams, int argc, wchar_
         nargs++;
     }
     if (nargs > 0 && !opts.quiet) {
-        streams.out.push_back(L'\n');
+        streams.out.push_back(is_join0 ? L'\0' : L'\n');
     }
 
     return nargs > 1 ? STATUS_CMD_OK : STATUS_CMD_ERROR;
 }
 
+static int string_join(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
+    return string_join_maybe0(parser, streams, argc, argv, false /* is_join0 */);
+}
+
+static int string_join0(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
+    return string_join_maybe0(parser, streams, argc, argv, true /* is_join0 */);
+}
+
 static int string_length(parser_t &parser, io_streams_t &streams, int argc, wchar_t **argv) {
     options_t opts;
     opts.quiet_valid = true;
@@ -1271,13 +1280,12 @@ static const struct string_subcommand {
                    wchar_t **argv);                       //!OCLINT(unused param)
 }
 
-string_subcommands[] = {{L"escape", &string_escape},     {L"join", &string_join},
-                        {L"length", &string_length},     {L"match", &string_match},
-                        {L"replace", &string_replace},   {L"split", &string_split},
-                        {L"split0", &string_split0},     {L"sub", &string_sub},
-                        {L"trim", &string_trim},         {L"lower", &string_lower},
-                        {L"upper", &string_upper},       {L"repeat", &string_repeat},
-                        {L"unescape", &string_unescape}, {NULL, NULL}};
+string_subcommands[] = {
+    {L"escape", &string_escape}, {L"join", &string_join},         {L"join0", &string_join0},
+    {L"length", &string_length}, {L"match", &string_match},       {L"replace", &string_replace},
+    {L"split", &string_split},   {L"split0", &string_split0},     {L"sub", &string_sub},
+    {L"trim", &string_trim},     {L"lower", &string_lower},       {L"upper", &string_upper},
+    {L"repeat", &string_repeat}, {L"unescape", &string_unescape}, {NULL, NULL}};
 
 /// The string builtin, for manipulating strings.
 int builtin_string(parser_t &parser, io_streams_t &streams, wchar_t **argv) {
diff --git a/tests/string.err b/tests/string.err
index c7cd024fc..aebb82052 100644
--- a/tests/string.err
+++ b/tests/string.err
@@ -298,5 +298,8 @@ string repeat -l fakearg
 ####################
 # string split0
 
+####################
+# string join0
+
 ####################
 # string split0 in functions
diff --git a/tests/string.in b/tests/string.in
index 60dcaaf01..e28f98bab 100644
--- a/tests/string.in
+++ b/tests/string.in
@@ -348,6 +348,12 @@ count (echo -ne 'abc\x00def\x00ghi' | string split0)
 count (echo -ne 'abc\ndef\x00ghi\x00' | string split0)
 count (echo -ne 'abc\ndef\nghi' | string split0)
 
+logmsg string join0
+set tmp beta alpha\ngamma
+count (string join \n $tmp)
+count (string join0 $tmp)
+count (string join0 $tmp | string split0)
+
 logmsg string split0 in functions
 # This function outputs some newline-separated content, and some
 # explicitly separated content.
diff --git a/tests/string.out b/tests/string.out
index bded690e7..5fee2fbe1 100644
--- a/tests/string.out
+++ b/tests/string.out
@@ -443,6 +443,12 @@ a\x00g
 2
 1
 
+####################
+# string join0
+3
+2
+2
+
 ####################
 # string split0 in functions
 4

From 62d73bee5e4cf6fd10299f66982bdb2beb2faf5f Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Sun, 1 Jul 2018 16:35:09 -0700
Subject: [PATCH 8/8] Add string split0 and join0 to changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ca637b2d6..b046698b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -59,6 +59,7 @@ This section is for changes merged to the `major` branch that are not also merge
 - The universal variables file no longer contains the MAC address. It is now at the fixed location `.config/fish/fish_universal_variables` (#1912).
 - `alias` now has a `-s` and `--save` option to save the function generated by the alias using `funcsave` (#4878).
 - Path completions now support expansions, meaning expressions like `python ~/<TAB>` now provides file suggestions just like any other relative or absolute path. (This includes support for other expansions, too.)
+- The `string` builtin has new commands `split0` and `join0` for working with NUL-delimited output.
 
 ## Other significant changes
 - Command substitution output is now limited to 10 MB by default (#3822).