fish-shell/src/wcstringutil.h

// Helper functions for working with wcstring.
#ifndef FISH_WCSTRINGUTIL_H
#define FISH_WCSTRINGUTIL_H

#include <algorithm>
#include <cstring>
#include <string>
#include <utility>

#include "common.h"
#include "expand.h"

/// Test if a string prefixes another. Returns true if a is a prefix of b.
bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value);
bool string_prefixes_string(const wchar_t *proposed_prefix, const wcstring &value);
bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value);
bool string_prefixes_string(const char *proposed_prefix, const std::string &value);
bool string_prefixes_string(const char *proposed_prefix, const char *value);

/// Test if a string is a suffix of another.
bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value);
bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value);
bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,
                                             const wcstring &value);

/// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.
bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix,
                                             const wcstring &value);

/// Case-insensitive string search, modeled after std::string::find().
/// \param fuzzy indicates this is being used for fuzzy matching and case insensitivity is
/// expanded to include symbolic characters (#3584).
/// \return the offset of the first case-insensitive matching instance of `needle` within
/// `haystack`, or `string::npos()` if no results were found.
size_t ifind(const wcstring &haystack, const wcstring &needle, bool fuzzy = false);
size_t ifind(const std::string &haystack, const std::string &needle, bool fuzzy = false);

// Ways that a string may fuzzily match another.
enum fuzzy_match_type_t {
    // We match the string exactly: FOOBAR matches FOOBAR.
    fuzzy_match_exact = 0,

    // We match a prefix of the string: FO matches FOOBAR.
    fuzzy_match_prefix,

    // We match the string exactly, but in a case insensitive way: foobar matches FOOBAR.
    fuzzy_match_case_insensitive,

    // We match a prefix of the string, in a case insensitive way: foo matches FOOBAR.
    fuzzy_match_prefix_case_insensitive,

    // We match a substring of the string: OOBA matches FOOBAR.
    fuzzy_match_substring,

    // We match a substring of the string: ooBA matches FOOBAR.
    fuzzy_match_substring_case_insensitive,

    // A subsequence match with insertions only: FBR matches FOOBAR.
    fuzzy_match_subsequence_insertions_only,

    // We don't match the string.
    fuzzy_match_none
};

/// Indicates where a match type requires replacing the entire token.
static inline bool match_type_requires_full_replacement(fuzzy_match_type_t t) {
    switch (t) {
        case fuzzy_match_exact:
        case fuzzy_match_prefix: {
            return false;
        }
        case fuzzy_match_case_insensitive:
        case fuzzy_match_prefix_case_insensitive:
        case fuzzy_match_substring:
        case fuzzy_match_substring_case_insensitive:
        case fuzzy_match_subsequence_insertions_only:
        case fuzzy_match_none: {
            return true;
        }
        default: {
            DIE("Unreachable");
            return false;
        }
    }
}

/// Indicates where a match shares a prefix with the string it matches.
static inline bool match_type_shares_prefix(fuzzy_match_type_t t) {
    switch (t) {
        case fuzzy_match_exact:
        case fuzzy_match_prefix:
        case fuzzy_match_case_insensitive:
        case fuzzy_match_prefix_case_insensitive: {
            return true;
        }
        case fuzzy_match_substring:
        case fuzzy_match_substring_case_insensitive:
        case fuzzy_match_subsequence_insertions_only:
        case fuzzy_match_none: {
            return false;
        }
        default: {
            DIE("Unreachable");
            return false;
        }
    }
}

/// Test if string is a fuzzy match to another.
struct string_fuzzy_match_t {
    enum fuzzy_match_type_t type;

    // Strength of the match. The value depends on the type. Lower is stronger.
    size_t match_distance_first;
    size_t match_distance_second;

    // Constructor.
    explicit string_fuzzy_match_t(enum fuzzy_match_type_t t, size_t distance_first = 0,
                                  size_t distance_second = 0);

    // Return -1, 0, 1 if this match is (respectively) better than, equal to, or worse than rhs.
    int compare(const string_fuzzy_match_t &rhs) const;
};

/// Compute a fuzzy match for a string. If maximum_match is not fuzzy_match_none, limit the type to
/// matches at or below that type.
string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string,
                                               const wcstring &match_against,
                                               fuzzy_match_type_t limit_type = fuzzy_match_none);

/// Split a string by a separator character.
wcstring_list_t split_string(const wcstring &val, wchar_t sep);

/// Join a list of strings by a separator character.
wcstring join_strings(const wcstring_list_t &vals, wchar_t sep);

inline wcstring to_string(long x) {
    wchar_t buff[64];
    format_long_safe(buff, x);
    return wcstring(buff);
}

inline wcstring to_string(unsigned long long x) {
    wchar_t buff[64];
    format_ullong_safe(buff, x);
    return wcstring(buff);
}

inline wcstring to_string(int x) { return to_string(static_cast<long>(x)); }

inline wcstring to_string(size_t x) { return to_string(static_cast<unsigned long long>(x)); }

inline bool bool_from_string(const std::string &x) {
    if (x.empty()) return false;
    switch (x.front()) {
        case 'Y':
        case 'T':
        case 'y':
        case 't':
        case '1':
            return true;
        default:
            return false;
    }
}

inline bool bool_from_string(const wcstring &x) {
    return !x.empty() && std::wcschr(L"YTyt1", x.at(0));
}

/// @typedef wcstring_range represents a range in a wcstring.
/// The first element is the location, the second is the count.
typedef std::pair<wcstring::size_type, wcstring::size_type> wcstring_range;

/// wcstring equivalent of wcstok(). Supports NUL. For convenience and wcstok() compatibility, the
/// first character of each token separator is replaced with NUL.
/// @return Returns a pair of (pos, count).
///         This will be (npos, npos) when it's done. In the form of (pos, npos)
///         when the token is already known to be the final token.
/// @note The final token may not necessarily return (pos, npos).
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle,
                            wcstring_range last = wcstring_range(0, 0));

/// Given iterators into a string (forward or reverse), splits the haystack iterators
/// about the needle sequence, up to max times. Inserts splits into the output array.
/// If the iterators are forward, this does the normal thing.
/// If the iterators are backward, this returns reversed strings, in reversed order!
/// If the needle is empty, split on individual elements (characters).
/// Max output entries will be max + 1 (after max splits)
template <typename ITER>
void split_about(ITER haystack_start, ITER haystack_end, ITER needle_start, ITER needle_end,
                 wcstring_list_t *output, long max = LONG_MAX, bool no_empty = false) {
    long remaining = max;
    ITER haystack_cursor = haystack_start;
    while (remaining > 0 && haystack_cursor != haystack_end) {
        ITER split_point;
        if (needle_start == needle_end) {  // empty needle, we split on individual elements
            split_point = haystack_cursor + 1;
        } else {
            split_point = std::search(haystack_cursor, haystack_end, needle_start, needle_end);
        }
        if (split_point == haystack_end) {  // not found
            break;
        }
        if (!no_empty || haystack_cursor != split_point) {
            output->emplace_back(haystack_cursor, split_point);
        }
        remaining--;
        // Need to skip over the needle for the next search note that the needle may be empty.
        haystack_cursor = split_point + std::distance(needle_start, needle_end);
    }
    // Trailing component, possibly empty.
    if (!no_empty || haystack_cursor != haystack_end) {
        output->emplace_back(haystack_cursor, haystack_end);
    }
}

enum class ellipsis_type {
    None,
    // Prefer niceness over minimalness
    Prettiest,
    // Make every character count ($ instead of ...)
    Shortest,
};

wcstring truncate(const wcstring &input, int max_len,
                  ellipsis_type etype = ellipsis_type::Prettiest);
wcstring trim(wcstring input);
wcstring trim(wcstring input, const wchar_t *any_of);

/// Converts a string to lowercase.
wcstring wcstolower(wcstring input);

// Out-of-line helper for wcs2string_callback.
void wcs2string_bad_char(wchar_t);

/// Implementation of wcs2string that accepts a callback.
/// This invokes \p func with (const char*, size_t) pairs.
/// If \p func returns false, it stops; otherwise it continues.
/// \return false if the callback returned false, otherwise true.
template <typename Func>
bool wcs2string_callback(const wchar_t *input, size_t len, const Func &func) {
    mbstate_t state = {};
    char converted[MB_LEN_MAX];

    for (size_t i = 0; i < len; i++) {
        wchar_t wc = input[i];
        // TODO: this doesn't seem sound.
        if (wc == INTERNAL_SEPARATOR) {
            // do nothing
        } else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
            converted[0] = wc - ENCODE_DIRECT_BASE;
            if (!func(converted, 1)) return false;
        } else if (MB_CUR_MAX == 1) {  // single-byte locale (C/POSIX/ISO-8859)
            // If `wc` contains a wide character we emit a question-mark.
            if (wc & ~0xFF) {
                wc = '?';
            }
            converted[0] = wc;
            if (!func(converted, 1)) return false;
        } else {
            std::memset(converted, 0, sizeof converted);
            size_t len = std::wcrtomb(converted, wc, &state);
            if (len == static_cast<size_t>(-1)) {
                wcs2string_bad_char(wc);
                std::memset(&state, 0, sizeof(state));
            } else {
                if (!func(converted, len)) return false;
            }
        }
    }
    return true;
}

/// Support for iterating over a newline-separated string.
template <typename Collection>
class line_iterator_t {
    // Storage for each line.
    Collection storage;

    // The collection we're iterating. Note we hold this by reference.
    const Collection &coll;

    // The current location in the iteration.
    typename Collection::const_iterator current;

   public:
    /// Construct from a collection (presumably std::string or std::wcstring).
    line_iterator_t(const Collection &coll) : coll(coll), current(coll.cbegin()) {}

    /// Access the storage in which the last line was stored.
    const Collection &line() const { return storage; }

    /// Advances to the next line. \return true on success, false if we have exhausted the string.
    bool next() {
        if (current == coll.end()) return false;
        auto newline_or_end = std::find(current, coll.cend(), '\n');
        storage.assign(current, newline_or_end);
        current = newline_or_end;

        // Skip the newline.
        if (current != coll.cend()) ++current;
        return true;
    }
};

#endif
restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-04 06:18:24 +08:00			`// Helper functions for working with wcstring.`
Implement new `read --null` flag The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694. 2014-09-22 10:18:56 +08:00			`#ifndef FISH_WCSTRINGUTIL_H`
			`#define FISH_WCSTRINGUTIL_H`

Extract split_about from string Put it into wcstringutil for use with builtin_read. 2017-07-27 21:05:35 +08:00			`#include <algorithm>`
Introduce wcs2string_callback This is like wcs2string, but instead of returning a std::string, it invokes a user-supplied function with each converted character. The idea is to allow interleaved conversion and output. 2020-07-30 08:16:51 +08:00			`#include <cstring>`
Initial pass with Include What You Use 2015-07-25 23:14:25 +08:00			`#include <string>`
Implement new `read --null` flag The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694. 2014-09-22 10:18:56 +08:00			`#include <utility>`
add better support for IWYU and fix things Remove the "make iwyu" build target. Move the functionality into the recently introduced lint.fish script. Fix a lot, but not all, of the include-what-you-use errors. Specifically, it fixes all of the IWYU errors on my OS X server but only removes some of them on my Ubuntu 14.04 server. Fixes #2957 2016-04-21 14:00:54 +08:00
Implement new `read --null` flag The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694. 2014-09-22 10:18:56 +08:00			`#include "common.h"`
Introduce wcs2string_callback This is like wcs2string, but instead of returning a std::string, it invokes a user-supplied function with each converted character. The idea is to allow interleaved conversion and output. 2020-07-30 08:16:51 +08:00			`#include "expand.h"`
Implement new `read --null` flag The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694. 2014-09-22 10:18:56 +08:00
Migrate a bunch of code out of common.h Put it into wcstringutil, path, or a new file null_terminated_array. 2020-01-16 05:16:43 +08:00			`/// Test if a string prefixes another. Returns true if a is a prefix of b.`
			`bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value);`
			`bool string_prefixes_string(const wchar_t *proposed_prefix, const wcstring &value);`
			`bool string_prefixes_string(const wchar_t proposed_prefix, const wchar_t value);`
			`bool string_prefixes_string(const char *proposed_prefix, const std::string &value);`
			`bool string_prefixes_string(const char proposed_prefix, const char value);`

			`/// Test if a string is a suffix of another.`
			`bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value);`
			`bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value);`
			`bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix,`
			`const wcstring &value);`

			`/// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b.`
			`bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix,`
			`const wcstring &value);`

			`/// Case-insensitive string search, modeled after std::string::find().`
			`/// \param fuzzy indicates this is being used for fuzzy matching and case insensitivity is`
			`/// expanded to include symbolic characters (#3584).`
			/// \return the offset of the first case-insensitive matching instance of `needle` within
			/// `haystack`, or `string::npos()` if no results were found.
			`size_t ifind(const wcstring &haystack, const wcstring &needle, bool fuzzy = false);`
			`size_t ifind(const std::string &haystack, const std::string &needle, bool fuzzy = false);`

Migrate string_fuzzy_match from common.h to wcstringutil.h This is a more appropriate location for this functionality. Also take this opportunity to clean up subsequence_in_string. 2020-11-28 07:43:07 +08:00			`// Ways that a string may fuzzily match another.`
			`enum fuzzy_match_type_t {`
			`// We match the string exactly: FOOBAR matches FOOBAR.`
			`fuzzy_match_exact = 0,`

			`// We match a prefix of the string: FO matches FOOBAR.`
			`fuzzy_match_prefix,`

			`// We match the string exactly, but in a case insensitive way: foobar matches FOOBAR.`
			`fuzzy_match_case_insensitive,`

			`// We match a prefix of the string, in a case insensitive way: foo matches FOOBAR.`
			`fuzzy_match_prefix_case_insensitive,`

			`// We match a substring of the string: OOBA matches FOOBAR.`
			`fuzzy_match_substring,`

			`// We match a substring of the string: ooBA matches FOOBAR.`
			`fuzzy_match_substring_case_insensitive,`

			`// A subsequence match with insertions only: FBR matches FOOBAR.`
			`fuzzy_match_subsequence_insertions_only,`

			`// We don't match the string.`
			`fuzzy_match_none`
			`};`

			`/// Indicates where a match type requires replacing the entire token.`
			`static inline bool match_type_requires_full_replacement(fuzzy_match_type_t t) {`
			`switch (t) {`
			`case fuzzy_match_exact:`
			`case fuzzy_match_prefix: {`
			`return false;`
			`}`
			`case fuzzy_match_case_insensitive:`
			`case fuzzy_match_prefix_case_insensitive:`
			`case fuzzy_match_substring:`
			`case fuzzy_match_substring_case_insensitive:`
			`case fuzzy_match_subsequence_insertions_only:`
			`case fuzzy_match_none: {`
			`return true;`
			`}`
			`default: {`
			`DIE("Unreachable");`
			`return false;`
			`}`
			`}`
			`}`

			`/// Indicates where a match shares a prefix with the string it matches.`
			`static inline bool match_type_shares_prefix(fuzzy_match_type_t t) {`
			`switch (t) {`
			`case fuzzy_match_exact:`
			`case fuzzy_match_prefix:`
			`case fuzzy_match_case_insensitive:`
			`case fuzzy_match_prefix_case_insensitive: {`
			`return true;`
			`}`
			`case fuzzy_match_substring:`
			`case fuzzy_match_substring_case_insensitive:`
			`case fuzzy_match_subsequence_insertions_only:`
			`case fuzzy_match_none: {`
			`return false;`
			`}`
			`default: {`
			`DIE("Unreachable");`
			`return false;`
			`}`
			`}`
			`}`

			`/// Test if string is a fuzzy match to another.`
			`struct string_fuzzy_match_t {`
			`enum fuzzy_match_type_t type;`

			`// Strength of the match. The value depends on the type. Lower is stronger.`
			`size_t match_distance_first;`
			`size_t match_distance_second;`

			`// Constructor.`
			`explicit string_fuzzy_match_t(enum fuzzy_match_type_t t, size_t distance_first = 0,`
			`size_t distance_second = 0);`

			`// Return -1, 0, 1 if this match is (respectively) better than, equal to, or worse than rhs.`
			`int compare(const string_fuzzy_match_t &rhs) const;`
			`};`

			`/// Compute a fuzzy match for a string. If maximum_match is not fuzzy_match_none, limit the type to`
			`/// matches at or below that type.`
			`string_fuzzy_match_t string_fuzzy_match_string(const wcstring &string,`
			`const wcstring &match_against,`
			`fuzzy_match_type_t limit_type = fuzzy_match_none);`

Migrate a bunch of code out of common.h Put it into wcstringutil, path, or a new file null_terminated_array. 2020-01-16 05:16:43 +08:00			`/// Split a string by a separator character.`
			`wcstring_list_t split_string(const wcstring &val, wchar_t sep);`

			`/// Join a list of strings by a separator character.`
			`wcstring join_strings(const wcstring_list_t &vals, wchar_t sep);`

			`inline wcstring to_string(long x) {`
			`wchar_t buff[64];`
			`format_long_safe(buff, x);`
			`return wcstring(buff);`
			`}`

			`inline wcstring to_string(unsigned long long x) {`
			`wchar_t buff[64];`
			`format_ullong_safe(buff, x);`
			`return wcstring(buff);`
			`}`

			`inline wcstring to_string(int x) { return to_string(static_cast<long>(x)); }`

			`inline wcstring to_string(size_t x) { return to_string(static_cast<unsigned long long>(x)); }`

			`inline bool bool_from_string(const std::string &x) {`
			`if (x.empty()) return false;`
			`switch (x.front()) {`
			`case 'Y':`
			`case 'T':`
			`case 'y':`
			`case 't':`
			`case '1':`
			`return true;`
			`default:`
			`return false;`
			`}`
			`}`

			`inline bool bool_from_string(const wcstring &x) {`
			`return !x.empty() && std::wcschr(L"YTyt1", x.at(0));`
			`}`

Be a bit more consistent and proper. 2016-06-06 12:30:24 +08:00			`/// @typedef wcstring_range represents a range in a wcstring.`
			`/// The first element is the location, the second is the count.`
Implement new `read --null` flag The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694. 2014-09-22 10:18:56 +08:00			`typedef std::pair<wcstring::size_type, wcstring::size_type> wcstring_range;`

restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-04 06:18:24 +08:00			`/// wcstring equivalent of wcstok(). Supports NUL. For convenience and wcstok() compatibility, the`
			`/// first character of each token separator is replaced with NUL.`
Be a bit more consistent and proper. 2016-06-06 12:30:24 +08:00			`/// @return Returns a pair of (pos, count).`
			`/// This will be (npos, npos) when it's done. In the form of (pos, npos)`
			`/// when the token is already known to be the final token.`
			`/// @note The final token may not necessarily return (pos, npos).`
Reformat all files This runs build_tools/style.fish, which runs clang-format on C++, fish_indent on fish and (new) black on python. If anything is wrong with the formatting, we should fix the tools, but automated formatting is worth it. 2019-05-05 18:09:25 +08:00			`wcstring_range wcstring_tok(wcstring &str, const wcstring &needle,`
restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-04 06:18:24 +08:00			`wcstring_range last = wcstring_range(0, 0));`
Implement new `read --null` flag The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694. 2014-09-22 10:18:56 +08:00
Extract split_about from string Put it into wcstringutil for use with builtin_read. 2017-07-27 21:05:35 +08:00			`/// Given iterators into a string (forward or reverse), splits the haystack iterators`
			`/// about the needle sequence, up to max times. Inserts splits into the output array.`
			`/// If the iterators are forward, this does the normal thing.`
			`/// If the iterators are backward, this returns reversed strings, in reversed order!`
			`/// If the needle is empty, split on individual elements (characters).`
Add line-delimited read presets with --line and --all-lines Refer to changes in doc_src/read.txt for more info. Closes #4861. 2018-04-17 19:57:33 +08:00			`/// Max output entries will be max + 1 (after max splits)`
Extract split_about from string Put it into wcstringutil for use with builtin_read. 2017-07-27 21:05:35 +08:00			`template <typename ITER>`
			`void split_about(ITER haystack_start, ITER haystack_end, ITER needle_start, ITER needle_end,`
Reformat all files This runs build_tools/style.fish, which runs clang-format on C++, fish_indent on fish and (new) black on python. If anything is wrong with the formatting, we should fix the tools, but automated formatting is worth it. 2019-05-05 18:09:25 +08:00			`wcstring_list_t *output, long max = LONG_MAX, bool no_empty = false) {`
Extract split_about from string Put it into wcstringutil for use with builtin_read. 2017-07-27 21:05:35 +08:00			`long remaining = max;`
			`ITER haystack_cursor = haystack_start;`
			`while (remaining > 0 && haystack_cursor != haystack_end) {`
			`ITER split_point;`
			`if (needle_start == needle_end) { // empty needle, we split on individual elements`
			`split_point = haystack_cursor + 1;`
			`} else {`
			`split_point = std::search(haystack_cursor, haystack_end, needle_start, needle_end);`
			`}`
			`if (split_point == haystack_end) { // not found`
			`break;`
			`}`
Optimize split_about 2018-04-17 10:49:26 +08:00			`if (!no_empty \|\| haystack_cursor != split_point) {`
			`output->emplace_back(haystack_cursor, split_point);`
Default `string split` to keeping empty entries with option to remove The official fish documentation makes no mention of how `string split` treats empty tokens, e.g. splitting 'key1##key2' on '#' or (more confusingly) splitting '/path' on '/'. With this commit, `string split` now has an option to exclude zero-length substrings from the resulting array with a new `--no-empty/-n`. The default behavior of preserving empty entries is kept so as to avoid breakage. 2018-03-29 21:12:08 +08:00			`}`
Extract split_about from string Put it into wcstringutil for use with builtin_read. 2017-07-27 21:05:35 +08:00			`remaining--;`
			`// Need to skip over the needle for the next search note that the needle may be empty.`
			`haystack_cursor = split_point + std::distance(needle_start, needle_end);`
			`}`
			`// Trailing component, possibly empty.`
Optimize split_about 2018-04-17 10:49:26 +08:00			`if (!no_empty \|\| haystack_cursor != haystack_end) {`
			`output->emplace_back(haystack_cursor, haystack_end);`
			`}`
Extract split_about from string Put it into wcstringutil for use with builtin_read. 2017-07-27 21:05:35 +08:00			`}`
Add wcstringutil.h truncate function 2018-03-10 04:52:12 +08:00
			`enum class ellipsis_type {`
			`None,`
Reformat all files This runs build_tools/style.fish, which runs clang-format on C++, fish_indent on fish and (new) black on python. If anything is wrong with the formatting, we should fix the tools, but automated formatting is worth it. 2019-05-05 18:09:25 +08:00			`// Prefer niceness over minimalness`
Add wcstringutil.h truncate function 2018-03-10 04:52:12 +08:00			`Prettiest,`
Reformat all files This runs build_tools/style.fish, which runs clang-format on C++, fish_indent on fish and (new) black on python. If anything is wrong with the formatting, we should fix the tools, but automated formatting is worth it. 2019-05-05 18:09:25 +08:00			`// Make every character count ($ instead of ...)`
Add wcstringutil.h truncate function 2018-03-10 04:52:12 +08:00			`Shortest,`
			`};`

Reformat all files This runs build_tools/style.fish, which runs clang-format on C++, fish_indent on fish and (new) black on python. If anything is wrong with the formatting, we should fix the tools, but automated formatting is worth it. 2019-05-05 18:09:25 +08:00			`wcstring truncate(const wcstring &input, int max_len,`
			`ellipsis_type etype = ellipsis_type::Prettiest);`
Use move semantics in trim and history_item_t 2019-08-26 04:37:06 +08:00			`wcstring trim(wcstring input);`
			`wcstring trim(wcstring input, const wchar_t *any_of);`
Add wcstringutil.h truncate function 2018-03-10 04:52:12 +08:00
Revert "use std::tolower" This reverts commit a3db4128bc18c09cddf8b03cb3e908a2815d3216. This broke the build. 2019-09-23 06:33:08 +08:00			`/// Converts a string to lowercase.`
			`wcstring wcstolower(wcstring input);`

Introduce wcs2string_callback This is like wcs2string, but instead of returning a std::string, it invokes a user-supplied function with each converted character. The idea is to allow interleaved conversion and output. 2020-07-30 08:16:51 +08:00			`// Out-of-line helper for wcs2string_callback.`
			`void wcs2string_bad_char(wchar_t);`

			`/// Implementation of wcs2string that accepts a callback.`
			`/// This invokes \p func with (const char*, size_t) pairs.`
			`/// If \p func returns false, it stops; otherwise it continues.`
			`/// \return false if the callback returned false, otherwise true.`
			`template <typename Func>`
			`bool wcs2string_callback(const wchar_t *input, size_t len, const Func &func) {`
			`mbstate_t state = {};`
			`char converted[MB_LEN_MAX];`

			`for (size_t i = 0; i < len; i++) {`
			`wchar_t wc = input[i];`
			`// TODO: this doesn't seem sound.`
			`if (wc == INTERNAL_SEPARATOR) {`
			`// do nothing`
			`} else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {`
			`converted[0] = wc - ENCODE_DIRECT_BASE;`
			`if (!func(converted, 1)) return false;`
			`} else if (MB_CUR_MAX == 1) { // single-byte locale (C/POSIX/ISO-8859)`
			// If `wc` contains a wide character we emit a question-mark.
			`if (wc & ~0xFF) {`
			`wc = '?';`
			`}`
			`converted[0] = wc;`
			`if (!func(converted, 1)) return false;`
			`} else {`
			`std::memset(converted, 0, sizeof converted);`
			`size_t len = std::wcrtomb(converted, wc, &state);`
			`if (len == static_cast<size_t>(-1)) {`
			`wcs2string_bad_char(wc);`
			`std::memset(&state, 0, sizeof(state));`
			`} else {`
			`if (!func(converted, len)) return false;`
			`}`
			`}`
			`}`
			`return true;`
			`}`

Migrate a bunch of code out of common.h Put it into wcstringutil, path, or a new file null_terminated_array. 2020-01-16 05:16:43 +08:00			`/// Support for iterating over a newline-separated string.`
			`template <typename Collection>`
			`class line_iterator_t {`
			`// Storage for each line.`
			`Collection storage;`

			`// The collection we're iterating. Note we hold this by reference.`
			`const Collection &coll;`

			`// The current location in the iteration.`
			`typename Collection::const_iterator current;`

			`public:`
			`/// Construct from a collection (presumably std::string or std::wcstring).`
			`line_iterator_t(const Collection &coll) : coll(coll), current(coll.cbegin()) {}`

			`/// Access the storage in which the last line was stored.`
			`const Collection &line() const { return storage; }`

			`/// Advances to the next line. \return true on success, false if we have exhausted the string.`
			`bool next() {`
			`if (current == coll.end()) return false;`
			`auto newline_or_end = std::find(current, coll.cend(), '\n');`
			`storage.assign(current, newline_or_end);`
			`current = newline_or_end;`

			`// Skip the newline.`
			`if (current != coll.cend()) ++current;`
			`return true;`
			`}`
			`};`

Implement new `read --null` flag The `--null` flag to `read` makes it split incoming lines on NUL instead of newlines. This is intended for processing the output of a command that uses NUL separators (such as `find -print0`). Fixes #1694. 2014-09-22 10:18:56 +08:00			`#endif`