Port common.{h,cpp} to Rust

Most of it is duplicated, hence untested. Functions like mbrtowc are not exposed by the libc crate, so declare them ourselves. Since we don't know the definition of C macros, add two big hacks to make this work: 1. Replace MB_LEN_MAX and mbstate_t with values (resp types) that should be large enough for any implementation. 2. Detect the definition of MB_CUR_MAX in the build script. This requires more changes for each new libc. We could also use this approach for 1. Additionally, this commit brings a small behavior change to read_unquoted_escape(): we cannot decode surrogate code points like \UDE01 into a Rust char, so use � (\UFFFD, replacement character) instead. Previously, we added such code points to a wcstring; looks like they were ignored when printed.
2025-03-03 18:04:27 +08:00 · 2023-03-26 17:23:05 +02:00 · 2023-03-26 17:23:05 +02:00 · 05bad5eda1
commit 05bad5eda1
parent 998cb7f1cd
33 changed files with 1837 additions and 556 deletions
--- a/fish-rust/Cargo.lock
+++ b/fish-rust/Cargo.lock
@ -368,6 +368,7 @@ dependencies = [
 "autocxx",
 "autocxx-build",
 "bitflags",
+ "cc",
 "cxx",
 "cxx-build",
 "cxx-gen",
--- a/fish-rust/Cargo.toml
+++ b/fish-rust/Cargo.toml
@ -26,6 +26,7 @@ widestring = "1.0.2"

 [build-dependencies]
 autocxx-build = "0.23.1"
+cc = { git = "https://github.com/mqudsi/cc-rs", branch = "fish" }
 cxx-build = { git = "https://github.com/fish-shell/cxx", branch = "fish" }
 cxx-gen = { git = "https://github.com/fish-shell/cxx", branch = "fish" }
 miette = { version = "5", features = ["fancy"] }
--- a/fish-rust/build.rs
+++ b/fish-rust/build.rs
@ -1,6 +1,8 @@
 use miette::miette;

 fn main() -> miette::Result<()> {
+    cc::Build::new().file("src/compat.c").compile("libcompat.a");
+
    let rust_dir = std::env::var("CARGO_MANIFEST_DIR").expect("Env var CARGO_MANIFEST_DIR missing");
    let target_dir =
        std::env::var("FISH_RUST_TARGET_DIR").unwrap_or(format!("{}/{}", rust_dir, "target/"));
@ -25,6 +27,7 @@ fn main() -> miette::Result<()> {
    let source_files = vec![
        "src/abbrs.rs",
        "src/event.rs",
+        "src/common.rs",
        "src/fd_monitor.rs",
        "src/fd_readable_set.rs",
        "src/fds.rs",
--- a/fish-rust/src/common.rs
+++ b/fish-rust/src/common.rs
--- a/fish-rust/src/compat.c
+++ b/fish-rust/src/compat.c
@ -0,0 +1,3 @@
+#include <stdlib.h>
+
+size_t C_MB_CUR_MAX() { return MB_CUR_MAX; }
--- a/fish-rust/src/compat.rs
+++ b/fish-rust/src/compat.rs
@ -0,0 +1,8 @@
+#[allow(non_snake_case)]
+pub fn MB_CUR_MAX() -> usize {
+    unsafe { C_MB_CUR_MAX() }
+}
+
+extern "C" {
+    fn C_MB_CUR_MAX() -> usize;
+}
--- a/fish-rust/src/env.rs
+++ b/fish-rust/src/env.rs
@ -38,6 +38,11 @@ pub mod flags {
            c_int(i32::from(val.bits()))
        }
    }
+    impl From<EnvMode> for u16 {
+        fn from(val: EnvMode) -> Self {
+            val.bits()
+        }
+    }
 }

 /// Return values for `env_stack_t::set()`.
--- a/fish-rust/src/expand.rs
+++ b/fish-rust/src/expand.rs
@ -1,39 +1,34 @@
-use crate::wchar::{EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
+use crate::common::{char_offset, EXPAND_RESERVED_BASE, EXPAND_RESERVED_END};
+use crate::wchar::wstr;
+use widestring_suffix::widestrs;

-/// Private use area characters used in expansions
-#[repr(u32)]
-pub enum ExpandChars {
-    /// Character representing a home directory.
-    HomeDirectory = EXPAND_RESERVED_BASE as u32,
-    /// Character representing process expansion for %self.
-    ProcessExpandSelf,
-    /// Character representing variable expansion.
-    VariableExpand,
-    /// Character representing variable expansion into a single element.
-    VariableExpandSingle,
-    /// Character representing the start of a bracket expansion.
-    BraceBegin,
-    /// Character representing the end of a bracket expansion.
-    BraceEnd,
-    /// Character representing separation between two bracket elements.
-    BraceSep,
-    /// Character that takes the place of any whitespace within non-quoted text in braces
-    BraceSpace,
-    /// Separate subtokens in a token with this character.
-    InternalSeparator,
-    /// Character representing an empty variable expansion. Only used transitively while expanding
-    /// variables.
-    VariableExpandEmpty,
-}
+/// Character representing a home directory.
+pub const HOME_DIRECTORY: char = char_offset(EXPAND_RESERVED_BASE, 0);
+/// Character representing process expansion for %self.
+pub const PROCESS_EXPAND_SELF: char = char_offset(EXPAND_RESERVED_BASE, 1);
+/// Character representing variable expansion.
+pub const VARIABLE_EXPAND: char = char_offset(EXPAND_RESERVED_BASE, 2);
+/// Character representing variable expansion into a single element.
+pub const VARIABLE_EXPAND_SINGLE: char = char_offset(EXPAND_RESERVED_BASE, 3);
+/// Character representing the start of a bracket expansion.
+pub const BRACE_BEGIN: char = char_offset(EXPAND_RESERVED_BASE, 4);
+/// Character representing the end of a bracket expansion.
+pub const BRACE_END: char = char_offset(EXPAND_RESERVED_BASE, 5);
+/// Character representing separation between two bracket elements.
+pub const BRACE_SEP: char = char_offset(EXPAND_RESERVED_BASE, 6);
+/// Character that takes the place of any whitespace within non-quoted text in braces
+pub const BRACE_SPACE: char = char_offset(EXPAND_RESERVED_BASE, 7);
+/// Separate subtokens in a token with this character.
+pub const INTERNAL_SEPARATOR: char = char_offset(EXPAND_RESERVED_BASE, 8);
+/// Character representing an empty variable expansion. Only used transitively while expanding
+/// variables.
+pub const VARIABLE_EXPAND_EMPTY: char = char_offset(EXPAND_RESERVED_BASE, 9);

 const _: () = assert!(
-    EXPAND_RESERVED_END as u32 > ExpandChars::VariableExpandEmpty as u32,
+    EXPAND_RESERVED_END as u32 > VARIABLE_EXPAND_EMPTY as u32,
    "Characters used in expansions must stay within private use area"
 );

-impl From<ExpandChars> for char {
-    fn from(val: ExpandChars) -> Self {
-        // We know this is safe because we limit the the range of this enum
-        unsafe { char::from_u32_unchecked(val as _) }
-    }
-}
+/// The string represented by PROCESS_EXPAND_SELF
+#[widestrs]
+pub const PROCESS_EXPAND_SELF_STR: &wstr = "%self"L;
--- a/fish-rust/src/ffi.rs
+++ b/fish-rust/src/ffi.rs
@ -53,8 +53,6 @@ include_cpp! {
    generate!("env_var_t")
    generate!("make_pipes_ffi")

-    generate!("valid_var_name_char")
-
    generate!("get_flog_file_fd")
    generate!("log_extra_to_flog_file")

@ -100,9 +98,6 @@ include_cpp! {
    generate!("re::regex_t")
    generate!("re::regex_result_ffi")
    generate!("re::try_compile_ffi")
-    generate!("wcs2string")
-    generate!("wcs2zstring")
-    generate!("str2wcstring")

    generate!("signal_handle")
    generate!("signal_check_cancel")
--- a/fish-rust/src/flog.rs
+++ b/fish-rust/src/flog.rs
@ -188,7 +188,15 @@ macro_rules! FLOG {
        }
    };
 }
-pub(crate) use FLOG;
+
+// TODO implement.
+macro_rules! FLOGF {
+    ($category:ident, $($elem:expr),+) => {
+        crate::flog::FLOG!($category, $($elem),*);
+    }
+}
+
+pub(crate) use {FLOG, FLOGF};

 /// For each category, if its name matches the wildcard, set its enabled to the given sense.
 fn apply_one_wildcard(wc_esc: &wstr, sense: bool) {
--- a/fish-rust/src/lib.rs
+++ b/fish-rust/src/lib.rs
@ -12,6 +12,7 @@ mod common;
 mod abbrs;
 mod builtins;
 mod color;
+mod compat;
 mod env;
 mod event;
 mod expand;
@ -51,6 +52,7 @@ mod wchar_ext;
 mod wchar_ffi;
 mod wcstringutil;
 mod wgetopt;
+mod wildcard;
 mod wutil;

 // Don't use `#[cfg(test)]` here to make sure ffi tests are built and tested
--- a/fish-rust/src/path.rs
+++ b/fish-rust/src/path.rs
@ -1,5 +1,5 @@
 use crate::{
-    expand::ExpandChars::HomeDirectory,
+    expand::HOME_DIRECTORY,
    wchar::{wstr, WExt, WString, L},
 };

@ -12,7 +12,7 @@ pub fn path_apply_working_directory(path: &wstr, working_directory: &wstr) -> WS

    // We're going to make sure that if we want to prepend the wd, that the string has no leading
    // "/".
-    let prepend_wd = path.char_at(0) != '/' && path.char_at(0) != HomeDirectory.into();
+    let prepend_wd = path.char_at(0) != '/' && path.char_at(0) != HOME_DIRECTORY;

    if !prepend_wd {
        // No need to prepend the wd, so just return the path we were given.
--- a/fish-rust/src/tokenizer.rs
+++ b/fish-rust/src/tokenizer.rs
@ -1,7 +1,8 @@
 //! A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be
 //! extended to support marks, tokenizing multiple strings and disposing of unused string segments.

-use crate::ffi::{valid_var_name_char, wcharz_t};
+use crate::common::valid_var_name_char;
+use crate::ffi::wcharz_t;
 use crate::future_feature_flags::{feature_test, FeatureFlag};
 use crate::parse_constants::SOURCE_OFFSET_INVALID;
 use crate::redirection::RedirectionMode;
@ -1357,7 +1358,7 @@ pub fn variable_assignment_equals_pos(txt: &wstr) -> Option<usize> {
    // TODO bracket indexing
    for (i, c) in txt.chars().enumerate() {
        if !found_potential_variable {
-            if !valid_var_name_char(c as wchar_t) {
+            if !valid_var_name_char(c) {
                return None;
            }
            found_potential_variable = true;
@ -1365,7 +1366,7 @@ pub fn variable_assignment_equals_pos(txt: &wstr) -> Option<usize> {
            if c == '=' {
                return Some(i);
            }
-            if !valid_var_name_char(c as wchar_t) {
+            if !valid_var_name_char(c) {
                return None;
            }
        }
--- a/fish-rust/src/wchar.rs
+++ b/fish-rust/src/wchar.rs
@ -4,6 +4,7 @@
 //!   - wstr: a string slice without a nul terminator. Like `&str` but wide chars.
 //!   - WString: an owning string without a nul terminator. Like `String` but wide chars.

+use crate::common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END};
 pub use widestring::{Utf32Str as wstr, Utf32String as WString};

 /// Pull in our extensions.
@ -30,43 +31,6 @@ pub(crate) use L;
 /// Note: the resulting string is NOT nul-terminated.
 pub use widestring_suffix::widestrs;

-// Use Unicode "non-characters" for internal characters as much as we can. This
-// gives us 32 "characters" for internal use that we can guarantee should not
-// appear in our input stream. See http://www.unicode.org/faq/private_use.html.
-pub const RESERVED_CHAR_BASE: char = '\u{FDD0}';
-pub const RESERVED_CHAR_END: char = '\u{FDF0}';
-// Split the available non-character values into two ranges to ensure there are
-// no conflicts among the places we use these special characters.
-pub const EXPAND_RESERVED_BASE: char = RESERVED_CHAR_BASE;
-pub const EXPAND_RESERVED_END: char = match char::from_u32(EXPAND_RESERVED_BASE as u32 + 16u32) {
-    Some(c) => c,
-    None => panic!("private use codepoint in expansion region should be valid char"),
-};
-pub const WILDCARD_RESERVED_BASE: char = EXPAND_RESERVED_END;
-pub const WILDCARD_RESERVED_END: char = match char::from_u32(WILDCARD_RESERVED_BASE as u32 + 16u32)
-{
-    Some(c) => c,
-    None => panic!("private use codepoint in wildcard region should be valid char"),
-};
-
-// These are in the Unicode private-use range. We really shouldn't use this
-// range but have little choice in the matter given how our lexer/parser works.
-// We can't use non-characters for these two ranges because there are only 66 of
-// them and we need at least 256 + 64.
-//
-// If sizeof(wchar_t)==4 we could avoid using private-use chars; however, that
-// would result in fish having different behavior on machines with 16 versus 32
-// bit wchar_t. It's better that fish behave the same on both types of systems.
-//
-// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
-// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
-// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
-pub const ENCODE_DIRECT_BASE: char = '\u{F600}';
-pub const ENCODE_DIRECT_END: char = match char::from_u32(ENCODE_DIRECT_BASE as u32 + 256) {
-    Some(c) => c,
-    None => panic!("private use codepoint in encode direct region should be valid char"),
-};
-
 /// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
 /// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
 /// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
@ -78,3 +42,16 @@ pub fn encode_byte_to_char(byte: u8) -> char {
    char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
        .expect("private-use codepoint should be valid char")
 }
+
+/// Decode a literal byte from a UTF-32 character.
+pub fn decode_byte_from_char(c: char) -> Option<u8> {
+    if c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END {
+        Some(
+            (u32::from(c) - u32::from(ENCODE_DIRECT_BASE))
+                .try_into()
+                .unwrap(),
+        )
+    } else {
+        None
+    }
+}
--- a/fish-rust/src/wcstringutil.rs
+++ b/fish-rust/src/wcstringutil.rs
@ -1,6 +1,66 @@
 //! Helper functions for working with wcstring.

-use crate::wchar::{wstr, WString};
+use crate::compat::MB_CUR_MAX;
+use crate::expand::INTERNAL_SEPARATOR;
+use crate::flog::FLOGF;
+use crate::wchar::{decode_byte_from_char, wstr, WString, L};
+use crate::wutil::encoding::{wcrtomb, zero_mbstate, AT_LEAST_MB_LEN_MAX};
+
+/// Implementation of wcs2string that accepts a callback.
+/// This invokes \p func with (const char*, size_t) pairs.
+/// If \p func returns false, it stops; otherwise it continues.
+/// \return false if the callback returned false, otherwise true.
+pub fn wcs2string_callback(input: &wstr, mut func: impl FnMut(&[u8]) -> bool) -> bool {
+    let mut state = zero_mbstate();
+    let mut converted = [0_u8; AT_LEAST_MB_LEN_MAX];
+
+    for mut c in input.chars() {
+        // TODO: this doesn't seem sound.
+        if c == INTERNAL_SEPARATOR {
+            // do nothing
+        } else if let Some(byte) = decode_byte_from_char(c) {
+            converted[0] = byte;
+            if !func(&converted[..1]) {
+                return false;
+            }
+        } else if MB_CUR_MAX() == 1 {
+            // single-byte locale (C/POSIX/ISO-8859)
+            // If `c` contains a wide character we emit a question-mark.
+            if u32::from(c) & !0xFF != 0 {
+                c = '?';
+            }
+
+            converted[0] = c as u8;
+            if !func(&converted[..1]) {
+                return false;
+            }
+        } else {
+            converted = [0; AT_LEAST_MB_LEN_MAX];
+            let len = unsafe {
+                wcrtomb(
+                    std::ptr::addr_of_mut!(converted[0]).cast(),
+                    c as libc::wchar_t,
+                    std::ptr::addr_of_mut!(state),
+                )
+            };
+            if len == 0_usize.wrapping_sub(1) {
+                wcs2string_bad_char(c);
+                state = zero_mbstate();
+            } else if !func(&converted[..len]) {
+                return false;
+            }
+        }
+    }
+    true
+}
+
+fn wcs2string_bad_char(c: char) {
+    FLOGF!(
+        char_encoding,
+        L!("Wide character U+%4X has no narrow representation"),
+        c
+    );
+}

 /// Joins strings with a separator.
 pub fn join_strings(strs: &[&wstr], sep: char) -> WString {
--- a/fish-rust/src/wildcard.rs
+++ b/fish-rust/src/wildcard.rs
@ -0,0 +1,13 @@
+// Enumeration of all wildcard types.
+
+use crate::common::{char_offset, WILDCARD_RESERVED_BASE};
+
+/// Character representing any character except '/' (slash).
+pub const ANY_CHAR: char = char_offset(WILDCARD_RESERVED_BASE, 0);
+/// Character representing any character string not containing '/' (slash).
+pub const ANY_STRING: char = char_offset(WILDCARD_RESERVED_BASE, 1);
+/// Character representing any character string.
+pub const ANY_STRING_RECURSIVE: char = char_offset(WILDCARD_RESERVED_BASE, 2);
+/// This is a special pseudo-char that is not used other than to mark the
+/// end of the the special characters so we can sanity check the enum range.
+pub const ANY_SENTINEL: char = char_offset(WILDCARD_RESERVED_BASE, 3);
--- a/fish-rust/src/wutil/encoding.rs
+++ b/fish-rust/src/wutil/encoding.rs
@ -0,0 +1,19 @@
+extern "C" {
+    pub fn wcrtomb(s: *mut libc::c_char, wc: libc::wchar_t, ps: *mut mbstate_t) -> usize;
+    pub fn mbrtowc(
+        pwc: *mut libc::wchar_t,
+        s: *const libc::c_char,
+        n: usize,
+        p: *mut mbstate_t,
+    ) -> usize;
+}
+
+// HACK This should be mbstate_t from libc but that's not exposed.  Since it's only written by
+// libc, we define it as opaque type that should be large enough for all implementations.
+pub type mbstate_t = [u64; 16];
+pub fn zero_mbstate() -> mbstate_t {
+    [0; 16]
+}
+
+// HACK This should be the MB_LEN_MAX macro from libc but that's not easy to get.
+pub const AT_LEAST_MB_LEN_MAX: usize = 32;
--- a/fish-rust/src/wutil/mod.rs
+++ b/fish-rust/src/wutil/mod.rs
@ -1,3 +1,4 @@
+pub mod encoding;
 pub mod errors;
 pub mod gettext;
 mod normalize_path;
@ -6,6 +7,7 @@ pub mod wcstod;
 pub mod wcstoi;
 mod wrealpath;

+use crate::common::fish_reserved_codepoint;
 pub(crate) use gettext::{wgettext, wgettext_fmt};
 pub use normalize_path::*;
 pub(crate) use printf::sprintf;
@ -28,3 +30,21 @@ pub fn perror(s: &str) {
    let _ = stderr.write_all(slice);
    let _ = stderr.write_all(b"\n");
 }
+
+const PUA1_START: char = '\u{E000}';
+const PUA1_END: char = '\u{F900}';
+const PUA2_START: char = '\u{F0000}';
+const PUA2_END: char = '\u{FFFFE}';
+const PUA3_START: char = '\u{100000}';
+const PUA3_END: char = '\u{10FFFE}';
+
+/// Return one if the code point is in a Unicode private use area.
+fn fish_is_pua(c: char) -> bool {
+    PUA1_START <= c && c < PUA1_END
+}
+
+/// We need this because there are too many implementations that don't return the proper answer for
+/// some code points. See issue #3050.
+pub fn fish_iswalnum(c: char) -> bool {
+    !fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
+}
--- a/fish-rust/src/wutil/wrealpath.rs
+++ b/fish-rust/src/wutil/wrealpath.rs
@ -4,13 +4,8 @@ use std::{
    os::unix::prelude::{OsStrExt, OsStringExt},
 };

-use cxx::let_cxx_string;
-
-use crate::{
-    ffi::{str2wcstring, wcs2zstring},
-    wchar::{wstr, WString},
-    wchar_ffi::{WCharFromFFI, WCharToFFI},
-};
+use crate::common::{str2wcstring, wcs2zstring};
+use crate::wchar::{wstr, WString};

 /// Wide character realpath. The last path component does not need to be valid. If an error occurs,
 /// `wrealpath()` returns `None`
@ -19,7 +14,7 @@ pub fn wrealpath(pathname: &wstr) -> Option<WString> {
        return None;
    }

-    let mut narrow_path: Vec<u8> = wcs2zstring(&pathname.to_ffi()).from_ffi();
+    let mut narrow_path: Vec<u8> = wcs2zstring(pathname).into();

    // Strip trailing slashes. This is treats "/a//" as equivalent to "/a" if /a is a non-directory.
    while narrow_path.len() > 1 && narrow_path[narrow_path.len() - 1] == b'/' {
@ -68,7 +63,5 @@ pub fn wrealpath(pathname: &wstr) -> Option<WString> {
        }
    };

-    let_cxx_string!(s = real_path);
-
-    Some(str2wcstring(&s).from_ffi())
+    Some(str2wcstring(&real_path))
 }
--- a/src/ast.cpp
+++ b/src/ast.cpp
@ -67,9 +67,8 @@ static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token
        if (!needs_expand) {
            result = keyword_with_name(token);
        } else {
-            wcstring storage;
-            if (unescape_string(token, &storage, 0)) {
-                result = keyword_with_name(storage);
+            if (auto unescaped = unescape_string(token, 0)) {
+                result = keyword_with_name(*unescaped);
            }
        }
    }
--- a/src/builtins/complete.cpp
+++ b/src/builtins/complete.cpp
@ -204,12 +204,11 @@ maybe_t<int> builtin_complete(parser_t &parser, io_streams_t &streams, const wch
            }
            case 'p':
            case 'c': {
-                wcstring tmp;
-                if (unescape_string(w.woptarg, &tmp, UNESCAPE_SPECIAL)) {
+                if (auto tmp = unescape_string(w.woptarg, UNESCAPE_SPECIAL)) {
                    if (opt == 'p')
-                        path.push_back(tmp);
+                        path.push_back(*tmp);
                    else
-                        cmd_to_complete.push_back(tmp);
+                        cmd_to_complete.push_back(*tmp);
                } else {
                    streams.err.append_format(_(L"%ls: Invalid token '%ls'\n"), cmd, w.woptarg);
                    return STATUS_INVALID_ARGS;
--- a/src/builtins/read.cpp
+++ b/src/builtins/read.cpp
@ -531,14 +531,13 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t

        if (opts.tokenize) {
            auto tok = new_tokenizer(buff.c_str(), TOK_ACCEPT_UNFINISHED);
-            wcstring out;
            if (opts.array) {
                // Array mode: assign each token as a separate element of the sole var.
                wcstring_list_t tokens;
                while (auto t = tok->next()) {
                    auto text = *tok->text_of(*t);
-                    if (unescape_string(text, &out, UNESCAPE_DEFAULT)) {
-                        tokens.push_back(out);
+                    if (auto out = unescape_string(text, UNESCAPE_DEFAULT)) {
+                        tokens.push_back(*out);
                    } else {
                        tokens.push_back(text);
                    }
@ -549,8 +548,8 @@ maybe_t<int> builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t
                std::unique_ptr<tok_t> t;
                while ((vars_left() - 1 > 0) && (t = tok->next())) {
                    auto text = *tok->text_of(*t);
-                    if (unescape_string(text, &out, UNESCAPE_DEFAULT)) {
-                        parser.set_var_and_fire(*var_ptr++, opts.place, out);
+                    if (auto out = unescape_string(text, UNESCAPE_DEFAULT)) {
+                        parser.set_var_and_fire(*var_ptr++, opts.place, *out);
                    } else {
                        parser.set_var_and_fire(*var_ptr++, opts.place, text);
                    }
--- a/src/builtins/string.cpp
+++ b/src/builtins/string.cpp
@ -737,10 +737,9 @@ static int string_unescape(parser_t &parser, io_streams_t &streams, int argc,

    arg_iterator_t aiter(argv, optind, streams);
    while (const wcstring *arg = aiter.nextstr()) {
-        wcstring result;
        wcstring sep = aiter.want_newline() ? L"\n" : L"";
-        if (unescape_string(*arg, &result, flags, opts.escape_style)) {
-            streams.out.append(result + sep);
+        if (auto result = unescape_string(*arg, flags, opts.escape_style)) {
+            streams.out.append(*result + sep);
            nesc++;
        }
    }
--- a/src/common.cpp
+++ b/src/common.cpp
@ -33,6 +33,7 @@
 #include <memory>

 #include "common.h"
+#include "common.rs.h"
 #include "expand.h"
 #include "fallback.h"  // IWYU pragma: keep
 #include "flog.h"
@ -119,17 +120,6 @@ long convert_digit(wchar_t d, int base) {
 /// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
 static bool is_hex_digit(int c) { return std::strchr("0123456789ABCDEF", c) != nullptr; }

-/// This is a specialization of `convert_digit()` that only handles base 16 and only uppercase.
-static long convert_hex_digit(wchar_t d) {
-    if ((d <= L'9') && (d >= L'0')) {
-        return d - L'0';
-    } else if ((d <= L'Z') && (d >= L'A')) {
-        return 10 + d - L'A';
-    }
-
-    return -1;
-}
-
 bool is_windows_subsystem_for_linux() {
 #if defined(WSL)
    return true;
@ -749,38 +739,6 @@ static void escape_string_url(const wcstring &in, wcstring &out) {
    }
 }

-/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
-/// chars.
-static bool unescape_string_url(const wchar_t *in, wcstring *out) {
-    std::string result;
-    result.reserve(out->size());
-    for (wchar_t c = *in; c; c = *++in) {
-        if (c > 0x7F) return false;  // invalid character means we can't decode the string
-        if (c == '%') {
-            int c1 = in[1];
-            if (c1 == 0) return false;  // found unexpected end of string
-            if (c1 == '%') {
-                result.push_back('%');
-                in++;
-            } else {
-                int c2 = in[2];
-                if (c2 == 0) return false;  // string ended prematurely
-                long d1 = convert_digit(c1, 16);
-                if (d1 < 0) return false;
-                long d2 = convert_digit(c2, 16);
-                if (d2 < 0) return false;
-                result.push_back(16 * d1 + d2);
-                in += 2;
-            }
-        } else {
-            result.push_back(c);
-        }
-    }
-
-    *out = str2wcstring(result);
-    return true;
-}
-
 /// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
 static void escape_string_var(const wcstring &in, wcstring &out) {
    bool prev_was_hex_encoded = false;
@ -812,46 +770,6 @@ static void escape_string_var(const wcstring &in, wcstring &out) {
    }
 }

-/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
-/// chars.
-static bool unescape_string_var(const wchar_t *in, wcstring *out) {
-    std::string result;
-    result.reserve(out->size());
-    bool prev_was_hex_encoded = false;
-    for (wchar_t c = *in; c; c = *++in) {
-        if (c > 0x7F) return false;  // invalid character means we can't decode the string
-        if (c == '_') {
-            int c1 = in[1];
-            if (c1 == 0) {
-                if (prev_was_hex_encoded) break;
-                return false;  // found unexpected escape char at end of string
-            }
-            if (c1 == '_') {
-                result.push_back('_');
-                in++;
-            } else if (is_hex_digit(c1)) {
-                int c2 = in[2];
-                if (c2 == 0) return false;  // string ended prematurely
-                long d1 = convert_hex_digit(c1);
-                if (d1 < 0) return false;
-                long d2 = convert_hex_digit(c2);
-                if (d2 < 0) return false;
-                result.push_back(16 * d1 + d2);
-                in += 2;
-                prev_was_hex_encoded = true;
-            }
-            // No "else" clause because if the first char after an underscore is not another
-            // underscore or a valid hex character then the underscore is there to improve
-            // readability after we've encoded a character not valid in a var name.
-        } else {
-            result.push_back(c);
-        }
-    }
-
-    *out = str2wcstring(result);
-    return true;
-}
-
 wcstring escape_string_for_double_quotes(wcstring in) {
    // We need to escape backslashes, double quotes, and dollars only.
    wcstring result = std::move(in);
@ -1130,12 +1048,6 @@ wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_s
    return result;
 }

-/// Helper to return the last character in a string, or none.
-static maybe_t<wchar_t> string_last_char(const wcstring &str) {
-    if (str.empty()) return none();
-    return str.back();
-}
-
 /// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
 /// appending to result. Return the number of characters consumed, or none on error.
 maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
@ -1329,320 +1241,30 @@ maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, boo
    return in_pos;
 }

-/// Returns the unescaped version of input_str into output_str (by reference). Returns true if
-/// successful. If false, the contents of output_str are unchanged.
-static bool unescape_string_internal(const wchar_t *const input, const size_t input_len,
-                                     wcstring *output_str, unescape_flags_t flags) {
-    // Set up result string, which we'll swap with the output on success.
-    wcstring result;
-    result.reserve(input_len);
-
-    const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
-    const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
-    const bool ignore_backslashes = static_cast<bool>(flags & UNESCAPE_NO_BACKSLASHES);
-
-    // The positions of open braces.
-    std::vector<size_t> braces;
-    // The positions of variable expansions or brace ","s.
-    // We only read braces as expanders if there's a variable expansion or "," in them.
-    std::vector<size_t> vars_or_seps;
-    int brace_count = 0;
-
-    bool errored = false;
-    enum {
-        mode_unquoted,
-        mode_single_quotes,
-        mode_double_quotes,
-    } mode = mode_unquoted;
-
-    for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
-        const wchar_t c = input[input_position];
-        // Here's the character we'll append to result, or none() to suppress it.
-        maybe_t<wchar_t> to_append_or_none = c;
-        if (mode == mode_unquoted) {
-            switch (c) {
-                case L'\\': {
-                    if (!ignore_backslashes) {
-                        // Backslashes (escapes) are complicated and may result in errors, or
-                        // appending INTERNAL_SEPARATORs, so we have to handle them specially.
-                        auto escape_chars = read_unquoted_escape(
-                            input + input_position, &result, allow_incomplete, unescape_special);
-                        if (!escape_chars.has_value()) {
-                            // A none() return indicates an error.
-                            errored = true;
-                        } else {
-                            // Skip over the characters we read, minus one because the outer loop
-                            // will increment it.
-                            assert(*escape_chars > 0);
-                            input_position += *escape_chars - 1;
-                        }
-                        // We've already appended, don't append anything else.
-                        to_append_or_none = none();
-                    }
-                    break;
-                }
-                case L'~': {
-                    if (unescape_special && (input_position == 0)) {
-                        to_append_or_none = HOME_DIRECTORY;
-                    }
-                    break;
-                }
-                case L'%': {
-                    // Note that this only recognizes %self if the string is literally %self.
-                    // %self/foo will NOT match this.
-                    if (unescape_special && input_position == 0 &&
-                        !std::wcscmp(input, PROCESS_EXPAND_SELF_STR)) {
-                        to_append_or_none = PROCESS_EXPAND_SELF;
-                        input_position += PROCESS_EXPAND_SELF_STR_LEN - 1;  // skip over 'self's
-                    }
-                    break;
-                }
-                case L'*': {
-                    if (unescape_special) {
-                        // In general, this is ANY_STRING. But as a hack, if the last appended char
-                        // is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to
-                        // reflect the fact that ** is the recursive wildcard.
-                        if (string_last_char(result) == ANY_STRING) {
-                            assert(!result.empty());
-                            result.resize(result.size() - 1);
-                            to_append_or_none = ANY_STRING_RECURSIVE;
-                        } else {
-                            to_append_or_none = ANY_STRING;
-                        }
-                    }
-                    break;
-                }
-                case L'?': {
-                    if (unescape_special && !feature_test(feature_flag_t::qmark_noglob)) {
-                        to_append_or_none = ANY_CHAR;
-                    }
-                    break;
-                }
-                case L'$': {
-                    if (unescape_special) {
-                        bool is_cmdsub =
-                            input_position + 1 < input_len && input[input_position + 1] == L'(';
-                        if (!is_cmdsub) {
-                            to_append_or_none = VARIABLE_EXPAND;
-                            vars_or_seps.push_back(input_position);
-                        }
-                    }
-                    break;
-                }
-                case L'{': {
-                    if (unescape_special) {
-                        brace_count++;
-                        to_append_or_none = BRACE_BEGIN;
-                        // We need to store where the brace *ends up* in the output.
-                        braces.push_back(result.size());
-                    }
-                    break;
-                }
-                case L'}': {
-                    if (unescape_special) {
-                        // HACK: The completion machinery sometimes hands us partial tokens.
-                        // We can't parse them properly, but it shouldn't hurt,
-                        // so we don't assert here.
-                        // See #4954.
-                        // assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
-                        // shouldn't be able to get here");
-                        brace_count--;
-                        to_append_or_none = BRACE_END;
-                        if (!braces.empty()) {
-                            // HACK: To reduce accidental use of brace expansion, treat a brace
-                            // with zero or one items as literal input. See #4632. (The hack is
-                            // doing it here and like this.)
-                            if (vars_or_seps.empty() || vars_or_seps.back() < braces.back()) {
-                                result[braces.back()] = L'{';
-                                // We also need to turn all spaces back.
-                                for (size_t i = braces.back() + 1; i < result.size(); i++) {
-                                    if (result[i] == BRACE_SPACE) result[i] = L' ';
-                                }
-                                to_append_or_none = L'}';
-                            }
-
-                            // Remove all seps inside the current brace pair, so if we have a
-                            // surrounding pair we only get seps inside *that*.
-                            if (!vars_or_seps.empty()) {
-                                while (!vars_or_seps.empty() && vars_or_seps.back() > braces.back())
-                                    vars_or_seps.pop_back();
-                            }
-                            braces.pop_back();
-                        }
-                    }
-                    break;
-                }
-                case L',': {
-                    if (unescape_special && brace_count > 0) {
-                        to_append_or_none = BRACE_SEP;
-                        vars_or_seps.push_back(input_position);
-                    }
-                    break;
-                }
-                case L' ': {
-                    if (unescape_special && brace_count > 0) {
-                        to_append_or_none = BRACE_SPACE;
-                    }
-                    break;
-                }
-                case L'\'': {
-                    mode = mode_single_quotes;
-                    to_append_or_none =
-                        unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                    break;
-                }
-                case L'\"': {
-                    mode = mode_double_quotes;
-                    to_append_or_none =
-                        unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                    break;
-                }
-                default: {
-                    break;
-                }
-            }
-        } else if (mode == mode_single_quotes) {
-            if (c == L'\\') {
-                // A backslash may or may not escape something in single quotes.
-                switch (input[input_position + 1]) {
-                    case '\\':
-                    case L'\'': {
-                        to_append_or_none = input[input_position + 1];
-                        input_position += 1;  // skip over the backslash
-                        break;
-                    }
-                    case L'\0': {
-                        if (!allow_incomplete) {
-                            errored = true;
-                        } else {
-                            // PCA this line had the following cryptic comment: 'We may ever escape
-                            // a NULL character, but still appending a \ in case I am wrong.' Not
-                            // sure what it means or the importance of this.
-                            input_position += 1; /* Skip over the backslash */
-                            to_append_or_none = L'\\';
-                        }
-                        break;
-                    }
-                    default: {
-                        // Literal backslash that doesn't escape anything! Leave things alone; we'll
-                        // append the backslash itself.
-                        break;
-                    }
-                }
-            } else if (c == L'\'') {
-                to_append_or_none =
-                    unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                mode = mode_unquoted;
-            }
-        } else if (mode == mode_double_quotes) {
-            switch (c) {
-                case L'"': {
-                    mode = mode_unquoted;
-                    to_append_or_none =
-                        unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
-                    break;
-                }
-                case '\\': {
-                    switch (input[input_position + 1]) {
-                        case L'\0': {
-                            if (!allow_incomplete) {
-                                errored = true;
-                            } else {
-                                to_append_or_none = L'\0';
-                            }
-                            break;
-                        }
-                        case '\\':
-                        case L'$':
-                        case '"': {
-                            to_append_or_none = input[input_position + 1];
-                            input_position += 1; /* Skip over the backslash */
-                            break;
-                        }
-                        case '\n': {
-                            /* Swallow newline */
-                            to_append_or_none = none();
-                            input_position += 1; /* Skip over the backslash */
-                            break;
-                        }
-                        default: {
-                            /* Literal backslash that doesn't escape anything! Leave things alone;
-                             * we'll append the backslash itself */
-                            break;
-                        }
-                    }
-                    break;
-                }
-                case '$': {
-                    if (unescape_special) {
-                        to_append_or_none = VARIABLE_EXPAND_SINGLE;
-                        vars_or_seps.push_back(input_position);
-                    }
-                    break;
-                }
-                default: {
-                    break;
-                }
-            }
-        }
-
-        // Now maybe append the char.
-        if (to_append_or_none.has_value()) {
-            result.push_back(*to_append_or_none);
-        }
-    }
-
-    // Return the string by reference, and then success.
-    if (!errored) {
-        *output_str = std::move(result);
-    }
-    return !errored;
-}
-
 bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) {
    assert(str != nullptr);
    wcstring output;
-    bool success = unescape_string_internal(str->c_str(), str->size(), &output, escape_special);
-    if (success) {
-        *str = std::move(output);
+    if (auto unescaped = unescape_string(str->c_str(), str->size(), escape_special)) {
+        *str = *unescaped;
+        return true;
    }
-    return success;
+    return false;
 }

-bool unescape_string(const wchar_t *input, size_t len, wcstring *output,
-                     unescape_flags_t escape_special, escape_string_style_t style) {
-    bool success = false;
-    switch (style) {
-        case STRING_STYLE_SCRIPT: {
-            success = unescape_string_internal(input, len, output, escape_special);
-            break;
-        }
-        case STRING_STYLE_URL: {
-            success = unescape_string_url(input, output);
-            break;
-        }
-        case STRING_STYLE_VAR: {
-            success = unescape_string_var(input, output);
-            break;
-        }
-        case STRING_STYLE_REGEX: {
-            // unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
-            success = false;
-            break;
-        }
-    }
-    if (!success) output->clear();
-    return success;
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, unescape_flags_t escape_special,
+                                          escape_string_style_t style) {
+    return unescape_string(input, std::wcslen(input), escape_special, style);
 }

-bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style) {
-    return unescape_string(input, std::wcslen(input), output, escape_special, style);
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, size_t len,
+                                          unescape_flags_t escape_special,
+                                          escape_string_style_t style) {
+    return rust_unescape_string(input, len, escape_special, style);
 }

-bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style) {
-    return unescape_string(input.c_str(), input.size(), output, escape_special, style);
+std::unique_ptr<wcstring> unescape_string(const wcstring &input, unescape_flags_t escape_special,
+                                          escape_string_style_t style) {
+    return unescape_string(input.c_str(), input.size(), escape_special, style);
 }

 wcstring format_size(long long sz) {
--- a/src/common.h
+++ b/src/common.h
@ -521,15 +521,15 @@ bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special);

 /// Reverse the effects of calling `escape_string`. Returns the unescaped value by reference. On
 /// failure, the output is set to an empty string.
-bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style = STRING_STYLE_SCRIPT);
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, unescape_flags_t escape_special,
+                                          escape_string_style_t style = STRING_STYLE_SCRIPT);

-bool unescape_string(const wchar_t *input, size_t len, wcstring *output,
-                     unescape_flags_t escape_special,
-                     escape_string_style_t style = STRING_STYLE_SCRIPT);
+std::unique_ptr<wcstring> unescape_string(const wchar_t *input, size_t len,
+                                          unescape_flags_t escape_special,
+                                          escape_string_style_t style = STRING_STYLE_SCRIPT);

-bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
-                     escape_string_style_t style = STRING_STYLE_SCRIPT);
+std::unique_ptr<wcstring> unescape_string(const wcstring &input, unescape_flags_t escape_special,
+                                          escape_string_style_t style = STRING_STYLE_SCRIPT);

 /// Write the given paragraph of output, redoing linebreaks to fit \p termsize.
 wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize);
--- a/src/complete.cpp
+++ b/src/complete.cpp
@ -1469,8 +1469,8 @@ void completer_t::escape_opening_brackets(const wcstring &argument) {
    if (!have_unquoted_unescaped_bracket) return;
    // Since completion_apply_to_command_line will escape the completion, we need to provide an
    // unescaped version.
-    wcstring unescaped_argument;
-    if (!unescape_string(argument, &unescaped_argument, UNESCAPE_INCOMPLETE)) return;
+    auto unescaped_argument = unescape_string(argument, UNESCAPE_INCOMPLETE);
+    if (!unescaped_argument) return;
    for (completion_t &comp : completions.get_list()) {
        if (comp.flags & COMPLETE_REPLACES_TOKEN) continue;
        comp.flags |= COMPLETE_REPLACES_TOKEN;
@ -1482,7 +1482,7 @@ void completer_t::escape_opening_brackets(const wcstring &argument) {
        if (comp.flags & COMPLETE_DONT_ESCAPE) {
            FLOG(warning, L"unexpected completion flag");
        }
-        comp.completion = unescaped_argument + comp.completion;
+        comp.completion = *unescaped_argument + comp.completion;
    }
 }

@ -1494,9 +1494,8 @@ void completer_t::mark_completions_duplicating_arguments(const wcstring &cmd,
    wcstring_list_t arg_strs;
    for (const auto &arg : args) {
        wcstring argstr = *arg.get_source(cmd);
-        wcstring argstr_unesc;
-        if (unescape_string(argstr, &argstr_unesc, UNESCAPE_DEFAULT)) {
-            arg_strs.push_back(std::move(argstr_unesc));
+        if (auto argstr_unesc = unescape_string(argstr, UNESCAPE_DEFAULT)) {
+            arg_strs.push_back(std::move(*argstr_unesc));
        }
    }
    std::sort(arg_strs.begin(), arg_strs.end());
@ -1668,11 +1667,14 @@ void completer_t::perform_for_commandline(wcstring cmdline) {
        source_range_t command_range = {cmd_tok.offset - bias, cmd_tok.length};

        wcstring exp_command = *cmd_tok.get_source(cmdline);
-        bool unescaped =
-            expand_command_token(ctx, exp_command) &&
-            unescape_string(previous_argument, &arg_data.previous_argument, UNESCAPE_DEFAULT) &&
-            unescape_string(current_argument, &arg_data.current_argument, UNESCAPE_INCOMPLETE);
+        std::unique_ptr<wcstring> prev;
+        std::unique_ptr<wcstring> cur;
+        bool unescaped = expand_command_token(ctx, exp_command) &&
+                         (prev = unescape_string(previous_argument, UNESCAPE_DEFAULT)) &&
+                         (cur = unescape_string(current_argument, UNESCAPE_INCOMPLETE));
        if (unescaped) {
+            arg_data.previous_argument = *prev;
+            arg_data.current_argument = *cur;
            // Have to walk over the command and its entire wrap chain. If any command
            // disables do_file, then they all do.
            walk_wrap_chain(exp_command, *effective_cmdline, command_range, &arg_data);
--- a/src/env.cpp
+++ b/src/env.cpp
@ -472,11 +472,11 @@ void env_init(const struct config_paths_t *paths, bool do_uvars, bool default_pa
        for (const auto &kv : table) {
            if (string_prefixes_string(prefix, kv.first)) {
                wcstring escaped_name = kv.first.substr(prefix_len);
-                wcstring name;
-                if (unescape_string(escaped_name, &name, unescape_flags_t{}, STRING_STYLE_VAR)) {
-                    wcstring key = name;
+                if (auto name =
+                        unescape_string(escaped_name, unescape_flags_t{}, STRING_STYLE_VAR)) {
+                    wcstring key = *name;
                    wcstring replacement = join_strings(kv.second.as_list(), L' ');
-                    abbrs->add(std::move(name), std::move(key), std::move(replacement),
+                    abbrs->add(std::move(*name), std::move(key), std::move(replacement),
                               abbrs_position_t::command, from_universal);
                }
            }
--- a/src/env_universal_common.cpp
+++ b/src/env_universal_common.cpp
@ -800,9 +800,11 @@ bool env_universal_t::populate_1_variable(const wchar_t *input, env_var_t::env_v

    // Parse out the value into storage, and decode it into a variable.
    storage->clear();
-    if (!unescape_string(colon + 1, storage, 0)) {
+    auto unescaped = unescape_string(colon + 1, 0);
+    if (!unescaped) {
        return false;
    }
+    *storage = *unescaped;
    env_var_t var{decode_serialized(*storage), flags};

    // Parse out the key and write into the map.
--- a/src/expand.cpp
+++ b/src/expand.cpp
@ -971,7 +971,8 @@ expand_result_t expander_t::stage_variables(wcstring input, completion_receiver_
    // We accept incomplete strings here, since complete uses expand_string to expand incomplete
    // strings from the commandline.
    wcstring next;
-    unescape_string(input, &next, UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE);
+    if (auto unescaped = unescape_string(input, UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE))
+        next = *unescaped;

    if (flags & expand_flag::skip_variables) {
        for (auto &i : next) {
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -376,27 +376,26 @@ static void test_unescape_sane() {
        {L"\"abcd\\n\"", L"abcd\\n"}, {L"\\143", L"c"},
        {L"'\\143'", L"\\143"},       {L"\\n", L"\n"}  // \n normally becomes newline
    };
-    wcstring output;
    for (const auto &test : tests) {
-        bool ret = unescape_string(test.input, &output, UNESCAPE_DEFAULT);
-        if (!ret) {
+        auto output = unescape_string(test.input, UNESCAPE_DEFAULT);
+        if (!output) {
            err(L"Failed to unescape '%ls'\n", test.input);
-        } else if (output != test.expected) {
+        } else if (*output != test.expected) {
            err(L"In unescaping '%ls', expected '%ls' but got '%ls'\n", test.input, test.expected,
-                output.c_str());
+                output->c_str());
        }
    }

    // Test for overflow.
-    if (unescape_string(L"echo \\UFFFFFF", &output, UNESCAPE_DEFAULT)) {
+    if (unescape_string(L"echo \\UFFFFFF", UNESCAPE_DEFAULT)) {
        err(L"Should not have been able to unescape \\UFFFFFF\n");
    }
-    if (unescape_string(L"echo \\U110000", &output, UNESCAPE_DEFAULT)) {
+    if (unescape_string(L"echo \\U110000", UNESCAPE_DEFAULT)) {
        err(L"Should not have been able to unescape \\U110000\n");
    }
 #if WCHAR_MAX != 0xffff
    // TODO: Make this work on MS Windows.
-    if (!unescape_string(L"echo \\U10FFFF", &output, UNESCAPE_DEFAULT)) {
+    if (!unescape_string(L"echo \\U10FFFF", UNESCAPE_DEFAULT)) {
        err(L"Should have been able to unescape \\U10FFFF\n");
    }
 #endif
@ -408,8 +407,6 @@ static void test_escape_crazy() {
    say(L"Testing escaping and unescaping");
    wcstring random_string;
    wcstring escaped_string;
-    wcstring unescaped_string;
-    bool unescaped_success;
    for (size_t i = 0; i < ESCAPE_TEST_COUNT; i++) {
        random_string.clear();
        while (random() % ESCAPE_TEST_LENGTH) {
@ -417,14 +414,14 @@ static void test_escape_crazy() {
        }

        escaped_string = escape_string(random_string);
-        unescaped_success = unescape_string(escaped_string, &unescaped_string, UNESCAPE_DEFAULT);
+        auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);

-        if (!unescaped_success) {
+        if (!unescaped_string) {
            err(L"Failed to unescape string <%ls>", escaped_string.c_str());
            break;
-        } else if (unescaped_string != random_string) {
+        } else if (*unescaped_string != random_string) {
            err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
-                random_string.c_str(), unescaped_string.c_str());
+                random_string.c_str(), unescaped_string->c_str());
            break;
        }
    }
@ -432,12 +429,12 @@ static void test_escape_crazy() {
    // Verify that ESCAPE_NO_PRINTABLES also escapes backslashes so we don't regress on issue #3892.
    random_string = L"line 1\\n\nline 2";
    escaped_string = escape_string(random_string, ESCAPE_NO_PRINTABLES | ESCAPE_NO_QUOTED);
-    unescaped_success = unescape_string(escaped_string, &unescaped_string, UNESCAPE_DEFAULT);
-    if (!unescaped_success) {
+    auto unescaped_string = unescape_string(escaped_string, UNESCAPE_DEFAULT);
+    if (!unescaped_string) {
        err(L"Failed to unescape string <%ls>", escaped_string.c_str());
-    } else if (unescaped_string != random_string) {
+    } else if (*unescaped_string != random_string) {
        err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'",
-            random_string.c_str(), unescaped_string.c_str());
+            random_string.c_str(), unescaped_string->c_str());
    }
 }

--- a/src/parse_util.cpp
+++ b/src/parse_util.cpp
@ -960,8 +960,8 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
    parser_test_error_bits_t err = 0;

    auto check_subtoken = [&arg_src, &out_errors, source_start](size_t begin, size_t end) -> int {
-        wcstring unesc;
-        if (!unescape_string(arg_src.c_str() + begin, end - begin, &unesc, UNESCAPE_SPECIAL)) {
+        auto maybe_unesc = unescape_string(arg_src.c_str() + begin, end - begin, UNESCAPE_SPECIAL);
+        if (!maybe_unesc) {
            if (out_errors) {
                const wchar_t *fmt = L"Invalid token '%ls'";
                if (arg_src.length() == 2 && arg_src[0] == L'\\' &&
@ -975,6 +975,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen
            }
            return 1;
        }
+        const wcstring &unesc = *maybe_unesc;

        parser_test_error_bits_t err = 0;
        // Check for invalid variable expansions.
--- a/src/wildcard.cpp
+++ b/src/wildcard.cpp
@ -60,7 +60,9 @@ bool wildcard_has(const wchar_t *str, size_t len) {
        return false;
    }
    wcstring unescaped;
-    unescape_string(str, len, &unescaped, UNESCAPE_SPECIAL);
+    if (auto tmp = unescape_string(wcstring{str, len}, UNESCAPE_SPECIAL)) {
+        unescaped = *tmp;
+    }
    return wildcard_has_internal(unescaped);
 }

--- a/tests/checks/basic.fish
+++ b/tests/checks/basic.fish
@ -158,6 +158,9 @@ echo -e 'abc\x211def'
 #CHECK: abc!def
 #CHECK: abc!1def

+echo \UDE01
+#CHECK: �
+
 # Comments allowed in between lines (#1987)
 echo before comment \
  # comment