Slightly refactor unescape_string_xxx() functions

* Since we already have an allocation of length wstr.len(), it's probably better to allocate the result (which is strictly less than or equal to the input length) up-front rather than risk thrashing the Vec allocation, * There's no need to compare c2 against '\0' since that will just cause to_digit(16) to return None anyway, * Our convert_hex() specialization of to_digit(16) that only checks capital letters A-F without also checking lowercase a-f isn't significantly faster than just use to_digit(16), and we already assert that the input *wasn't* a lowercase a-f before making the call, so there's no point in using a special function to handle that.
2024-11-25 09:39:52 +08:00 · 2023-04-26 15:18:27 -05:00 · 2023-04-26 15:18:27 -05:00 · 67124dfb11
commit 67124dfb11
parent c55ec59e22
1 changed files with 11 additions and 30 deletions
--- a/fish-rust/src/common.rs
+++ b/fish-rust/src/common.rs
@ -687,10 +687,10 @@ fn unescape_string_internal(input: &wstr, flags: UnescapeFlags) -> Option<WStrin
    Some(result)
 }

-/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
-/// chars.
+/// Reverse the effects of `escape_string_url()`. By definition the input should consist of just
+/// ASCII chars.
 fn unescape_string_url(input: &wstr) -> Option<WString> {
-    let mut result: Vec<u8> = vec![];
+    let mut result: Vec<u8> = Vec::with_capacity(input.len());
    let mut i = 0;
    while i < input.len() {
        let c = input.char_at(i);
@ -705,12 +705,9 @@ fn unescape_string_url(input: &wstr) -> Option<WString> {
                result.push(b'%');
                i += 1;
            } else {
-                let c2 = input.char_at(i + 2);
-                if c2 == '\0' {
-                    return None; // string ended prematurely
-                }
                let d1 = c1.to_digit(16)?;
-                let d2 = c2.to_digit(16)?;
+                let c2 = input.char_at(i + 2);
+                let d2 = c2.to_digit(16)?; // also fails if '\0' i.e. premature end
                result.push((16 * d1 + d2) as u8);
                i += 2;
            }
@ -723,10 +720,10 @@ fn unescape_string_url(input: &wstr) -> Option<WString> {
    Some(str2wcstring(&result))
 }

-/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
-/// chars.
+/// Reverse the effects of `escape_string_var()`. By definition the string should consist of just
+/// ASCII chars.
 fn unescape_string_var(input: &wstr) -> Option<WString> {
-    let mut result: Vec<u8> = vec![];
+    let mut result: Vec<u8> = Vec::with_capacity(input.len());
    let mut prev_was_hex_encoded = false;
    let mut i = 0;
    while i < input.len() {
@ -741,17 +738,13 @@ fn unescape_string_var(input: &wstr) -> Option<WString> {
                    break;
                }
                return None; // found unexpected escape char at end of string
-            }
-            if c1 == '_' {
+            } else if c1 == '_' {
                result.push(b'_');
                i += 1;
            } else if ('0'..='9').contains(&c1) || ('A'..='F').contains(&c1) {
+                let d1 = c1.to_digit(16)?;
                let c2 = input.char_at(i + 2);
-                if c2 == '\0' {
-                    return None; // string ended prematurely
-                }
-                let d1 = convert_hex_digit(c1)?;
-                let d2 = convert_hex_digit(c2)?;
+                let d2 = c2.to_digit(16)?; // also fails if '\0' i.e. premature end
                result.push((16 * d1 + d2) as u8);
                i += 2;
                prev_was_hex_encoded = true;
@ -946,18 +939,6 @@ pub fn read_unquoted_escape(
    Some(in_pos)
 }

-/// This is a specialization of `char::to_digit()` that only handles base 16 and only uppercase.
-fn convert_hex_digit(d: char) -> Option<u32> {
-    let val = if ('0'..='9').contains(&d) {
-        u32::from(d) - u32::from('0')
-    } else if ('A'..='Z').contains(&d) {
-        10 + u32::from(d) - u32::from('A')
-    } else {
-        return None;
-    };
-    Some(val)
-}
-
 pub const fn char_offset(base: char, offset: u32) -> char {
    match char::from_u32(base as u32 + offset) {
        Some(c) => c,