Large cleanup and refactoring of unescape() function.

2025-03-21 18:25:14 +08:00 · 2013-11-24 22:57:49 -08:00 · 2013-11-24 22:57:49 -08:00 · 9f6223311e
commit 9f6223311e
parent 90b78326d3
13 changed files with 633 additions and 117 deletions
--- a/builtin_complete.cpp
+++ b/builtin_complete.cpp
@ -423,8 +423,8 @@ static int builtin_complete(parser_t &parser, wchar_t **argv)
            case 'p':
            case 'c':
            {
-                wcstring tmp = woptarg;
-                if (unescape_string(tmp, 1))
+                wcstring tmp;
+                if (unescape_string(woptarg, &tmp, UNESCAPE_SPECIAL))
                {
                    if (opt=='p')
                        path.push_back(tmp);
--- a/common.cpp
+++ b/common.cpp
@ -72,6 +72,7 @@ parts of fish.
 #include "util.cpp"
 #include "fallback.cpp"

+#define NOT_A_WCHAR WEOF

 struct termios shell_modes;

@ -1125,6 +1126,513 @@ wcstring escape_string(const wcstring &in, escape_flags_t flags)
    return result;
 }

+/* Helper to return the last character in a string, or NOT_A_WCHAR */
+static wint_t string_last_char(const wcstring &str)
+{
+    size_t len = str.size();
+    return len == 0 ? NOT_A_WCHAR : str.at(len - 1);
+}
+
+/* Given a null terminated string starting with a backslash, read the escape as if it is unquoted, appending to result. Return the number of characters consumed, or 0 on error */
+static size_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, bool unescape_special)
+{
+    if (input[0] != L'\\')
+    {
+        // not an escape
+        return 0;
+    }
+
+    /* Here's the character we'll ultimately append. Note that L'\0' is a valid thing to append. */
+    wchar_t result_char = NOT_A_WCHAR;
+
+    bool errored = false;
+    size_t in_pos = 1; //in_pos always tracks the next character to read (and therefore the number of characters read so far)
+    const wchar_t c = input[in_pos++];
+    switch (c)
+    {
+
+        /* A null character after a backslash is an error */
+        case L'\0':
+        {
+            /* Adjust in_pos to only include the backslash */
+            assert(in_pos > 0);
+            in_pos--;
+
+            /* It's an error, unless we're allowing incomplete escapes */
+            if (! allow_incomplete)
+                errored = true;
+            break;
+        }
+
+        /* Numeric escape sequences. No prefix means octal escape, otherwise hexadecimal. */
+        case L'0':
+        case L'1':
+        case L'2':
+        case L'3':
+        case L'4':
+        case L'5':
+        case L'6':
+        case L'7':
+        case L'u':
+        case L'U':
+        case L'x':
+        case L'X':
+        {
+            long long res=0;
+            size_t chars=2;
+            int base=16;
+
+            bool byte_literal = false;
+            wchar_t max_val = ASCII_MAX;
+
+            switch (c)
+            {
+                case L'u':
+                {
+                    chars=4;
+                    max_val = UCS2_MAX;
+                    break;
+                }
+
+                case L'U':
+                {
+                    chars=8;
+                    max_val = WCHAR_MAX;
+                    break;
+                }
+
+                case L'x':
+                {
+                    chars = 2;
+                    max_val = ASCII_MAX;
+                    break;
+                }
+
+                case L'X':
+                {
+                    byte_literal = true;
+                    max_val = BYTE_MAX;
+                    break;
+                }
+
+                default:
+                {
+                    base=8;
+                    chars=3;
+                    // note that in_pos currently is just after the first post-backslash character; we want to start our escape from there
+                    assert(in_pos > 0);
+                    in_pos--;
+                    break;
+                }
+            }
+
+            for (size_t i=0; i<chars; i++)
+            {
+                long d = convert_digit(input[in_pos],base);
+                if (d < 0)
+                {
+                    break;
+                }
+
+                res=(res*base)+d;
+                in_pos++;
+            }
+
+            if (res <= max_val)
+            {
+                result_char = (wchar_t)((byte_literal ? ENCODE_DIRECT_BASE : 0)+res);
+            }
+            else
+            {
+                errored = true;
+            }
+
+            break;
+        }
+
+        /* \a means bell (alert) */
+        case L'a':
+        {
+            result_char = L'\a';
+            break;
+        }
+
+        /* \b means backspace */
+        case L'b':
+        {
+            result_char = L'\b';
+            break;
+        }
+
+        /* \cX means control sequence X */
+        case L'c':
+        {
+            const wchar_t sequence_char = input[in_pos++];
+            if (sequence_char >= L'a' && sequence_char <= (L'a'+32))
+            {
+                result_char = sequence_char-L'a'+1;
+            }
+            else if (sequence_char >= L'A' && sequence_char <= (L'A'+32))
+            {
+                result_char = sequence_char-L'A'+1;
+            }
+            else
+            {
+                errored = true;
+            }
+            break;
+        }
+
+        /* \x1b means escape */
+        case L'e':
+        {
+            result_char = L'\x1b';
+            break;
+        }
+
+        /*
+          \f means form feed
+        */
+        case L'f':
+        {
+            result_char = L'\f';
+            break;
+        }
+
+        /*
+          \n means newline
+        */
+        case L'n':
+        {
+            result_char = L'\n';
+            break;
+        }
+
+        /*
+          \r means carriage return
+        */
+        case L'r':
+        {
+            result_char = L'\r';
+            break;
+        }
+
+        /*
+          \t means tab
+         */
+        case L't':
+        {
+            result_char = L'\t';
+            break;
+        }
+
+        /*
+          \v means vertical tab
+        */
+        case L'v':
+        {
+            result_char = L'\v';
+            break;
+        }
+
+        /* If a backslash is followed by an actual newline, swallow them both */
+        case L'\n':
+        {
+            result_char = NOT_A_WCHAR;
+            break;
+        }
+
+        default:
+        {
+            if (unescape_special)
+                result->push_back(INTERNAL_SEPARATOR);
+            result_char = c;
+            break;
+        }
+    }
+
+    if (! errored && result_char != NOT_A_WCHAR)
+    {
+        result->push_back(result_char);
+    }
+    return errored ? 0 : in_pos;
+}
+
+/* Returns the unescaped version of input_str into output_str (by reference). Returns true if successful. If false, the contents of output_str are undefined (!) */
+static bool unescape_string_internal(const wchar_t * const input, const size_t input_len, wcstring *output_str, unescape_flags_t flags)
+{
+    /* Set up result string, which we'll swap with the output on success */
+    wcstring result;
+    result.reserve(input_len);
+
+    const bool unescape_special = !!(flags & UNESCAPE_SPECIAL);
+    const bool allow_incomplete = !!(flags & UNESCAPE_INCOMPLETE);
+
+    int bracket_count = 0;
+
+    bool errored = false;
+    enum
+    {
+        mode_unquoted,
+        mode_single_quotes,
+        mode_double_quotes
+    } mode = mode_unquoted;
+
+    for (size_t input_position = 0; input_position < input_len && ! errored; input_position++)
+    {
+        const wchar_t c = input[input_position];
+        /* Here's the character we'll append to result, or NOT_A_WCHAR to suppress it */
+        wchar_t to_append = c;
+        if (mode == mode_unquoted)
+        {
+
+            switch (c)
+            {
+                case L'\\':
+                {
+                    /* Backslashes (escapes) are complicated and may result in errors, or appending INTERNAL_SEPARATORs, so we have to handle them specially */
+                    size_t escape_chars = read_unquoted_escape(input + input_position, &result, allow_incomplete, unescape_special);
+                    if (escape_chars == 0)
+                    {
+                        /* A 0 return indicates an error */
+                        errored = true;
+                    }
+                    else
+                    {
+                        /* Skip over the characters we read, minus one because the outer loop will increment it */
+                        assert(escape_chars > 0);
+                        input_position += escape_chars - 1;
+                    }
+                    /* We've already appended, don't append anything else */
+                    to_append = NOT_A_WCHAR;
+                    break;
+                }
+
+                case L'~':
+                {
+                    if (unescape_special && (input_position == 0))
+                    {
+                        to_append = HOME_DIRECTORY;
+                    }
+                    break;
+                }
+
+                case L'%':
+                {
+                    if (unescape_special && (input_position == 0))
+                    {
+                        to_append = PROCESS_EXPAND;
+                    }
+                    break;
+                }
+
+                case L'*':
+                {
+                    if (unescape_special)
+                    {
+                        /* In general, this is ANY_STRING. But as a hack, if the last appended char is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to reflect the fact that ** is the recursive wildcard. */
+                        if (string_last_char(result) == ANY_STRING)
+                        {
+                            assert(result.size() > 0);
+                            result.resize(result.size() - 1);
+                            to_append = ANY_STRING_RECURSIVE;
+                        }
+                        else
+                        {
+                            to_append = ANY_STRING;
+                        }
+                    }
+                    break;
+                }
+
+                case L'?':
+                {
+                    if (unescape_special)
+                    {
+                        to_append = ANY_CHAR;
+                    }
+                    break;
+                }
+
+                case L'$':
+                {
+                    if (unescape_special)
+                    {
+                        to_append = VARIABLE_EXPAND;
+                    }
+                    break;
+                }
+
+                case L'{':
+                {
+                    if (unescape_special)
+                    {
+                        bracket_count++;
+                        to_append = BRACKET_BEGIN;
+                    }
+                    break;
+                }
+
+                case L'}':
+                {
+                    if (unescape_special)
+                    {
+                        bracket_count--;
+                        to_append = BRACKET_END;
+                    }
+                    break;
+                }
+
+                case L',':
+                {
+                    /* If the last character was a separator, then treat this as a literal comma */
+                    if (unescape_special && bracket_count > 0 && string_last_char(result) != BRACKET_SEP)
+                    {
+                        to_append = BRACKET_SEP;
+                    }
+                    break;
+                }
+
+                case L'\'':
+                {
+                    mode = mode_single_quotes;
+                    to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR;
+                    break;
+                }
+
+                case L'\"':
+                {
+                    mode = mode_double_quotes;
+                    to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR;
+                    break;
+                }
+            }
+        }
+        else if (mode == mode_single_quotes)
+        {
+            if (c == L'\\')
+            {
+                /* A backslash may or may not escape something in single quotes */
+                switch (input[input_position + 1])
+                {
+                    case '\\':
+                    case L'\'':
+                    {
+                        to_append = input[input_position + 1];
+                        input_position += 1; /* Skip over the backslash */
+                        break;
+                    }
+
+                    case L'\0':
+                    {
+                        if (!allow_incomplete)
+                        {
+                            errored = true;
+                        }
+                        else
+                        {
+                            // PCA this line had the following cryptic comment:
+                            // 'We may ever escape a NULL character, but still appending a \ in case I am wrong.'
+                            // Not sure what it means or the importance of this
+                            input_position += 1; /* Skip over the backslash */
+                            to_append = L'\\';
+                        }
+                    }
+                    break;
+
+                    default:
+                    {
+                        /* Literal backslash that doesn't escape anything! Leave things alone; we'll append the backslash itself */
+                        break;
+                    }
+                }
+            }
+            else if (c == L'\'')
+            {
+                to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR;
+                mode = mode_unquoted;
+            }
+        }
+        else if (mode == mode_double_quotes)
+        {
+            switch (c)
+            {
+                case L'"':
+                {
+                    mode = mode_unquoted;
+                    to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR;
+                    break;
+                }
+
+                case '\\':
+                {
+                    switch (input[input_position + 1])
+                    {
+                        case L'\0':
+                        {
+                            if (!allow_incomplete)
+                            {
+                                errored = true;
+                            }
+                            else
+                            {
+                                to_append = L'\0';
+                            }
+                        }
+                        break;
+
+                        case '\\':
+                        case L'$':
+                        case '"':
+                        {
+                            to_append = input[input_position + 1];
+                            input_position += 1; /* Skip over the backslash */
+                            break;
+                        }
+
+                        case '\n':
+                        {
+                            /* Swallow newline */
+                            to_append = NOT_A_WCHAR;
+                            break;
+                        }
+
+                        default:
+                        {
+                            /* Literal backslash that doesn't escape anything! Leave things alone; we'll append the backslash itself */
+                            break;
+                        }
+                    }
+                    break;
+                }
+
+                case '$':
+                {
+                    if (unescape_special)
+                    {
+                        to_append = VARIABLE_EXPAND_SINGLE;
+                    }
+                    break;
+                }
+
+            }
+        }
+
+        /* Now maybe append the char */
+        if (to_append != NOT_A_WCHAR)
+        {
+            result.push_back(to_append);
+        }
+    }
+
+    /* Return the string by reference, and then success */
+    if (! errored)
+    {
+        output_str->swap(result);
+    }
+    return ! errored;
+}
+
 wchar_t *unescape(const wchar_t * orig, int flags)
 {
    int out_pos;
@ -1681,19 +2189,33 @@ wchar_t *unescape(const wchar_t * orig, int flags)
    return in;
 }

-bool unescape_string(wcstring &str, int escape_special)
+bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special)
 {
-    bool success = false;
-    wchar_t *result = unescape(str.c_str(), escape_special);
-    if (result)
+    assert(str != NULL);
+    wcstring output;
+    bool success = unescape_string_internal(str->c_str(), str->size(), &output, escape_special);
+    if (success)
    {
-        str.replace(str.begin(), str.end(), result);
-        free(result);
-        success = true;
+        str->swap(output);
    }
    return success;
 }

+bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special)
+{
+    bool success = unescape_string_internal(input, wcslen(input), output, escape_special);
+    if (! success)
+        output->clear();
+    return success;
+}
+
+bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special)
+{
+    bool success = unescape_string_internal(input.c_str(), input.size(), output, escape_special);
+    if (! success)
+        output->clear();
+    return success;
+}


 void common_handle_winch(int signal)
--- a/common.h
+++ b/common.h
@ -59,15 +59,19 @@ typedef std::vector<wcstring> wcstring_list_t;
 */
 #define BYTE_MAX 0xffu

-/**
-  Escape special fish syntax characters like the semicolon
- */
-#define UNESCAPE_SPECIAL 1
+/* Flags for unescape_string functions */
+enum
+{
+    /* Default behavior */
+    UNESCAPE_DEFAULT = 0,

-/**
-  Allow incomplete escape sequences
- */
-#define UNESCAPE_INCOMPLETE 2
+    /* Escape special fish syntax characters like the semicolon */
+    UNESCAPE_SPECIAL = 1 << 0,
+
+    /* Allow incomplete escape sequences */
+    UNESCAPE_INCOMPLETE = 1 << 1
+};
+typedef unsigned int unescape_flags_t;

 /* Flags for the escape() and escape_string() functions */
 enum
@ -715,16 +719,14 @@ wcstring escape_string(const wcstring &in, escape_flags_t flags);
   character and a few more into constants which are defined in a
   private use area of Unicode. This assumes wchar_t is a unicode
   character set.
-
-   The result must be free()d. The original string is not modified. If
-   an invalid sequence is specified, 0 is returned.
-
 */
-wchar_t *unescape(const wchar_t * in,
-                  int escape_special);

-bool unescape_string(wcstring &str,
-                     int escape_special);
+/** Unescapes a string in-place. A true result indicates the string was unescaped, a false result indicates the string was unmodified. */
+bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special);
+
+/** Unescapes a string, returning the unescaped value by reference. On failure, the output is set to an empty string. */
+bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special);
+bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special);


 /**
--- a/complete.cpp
+++ b/complete.cpp
@ -1982,13 +1982,10 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, completion_
        {
            bool do_file = false;

-            wcstring current_command_unescape = current_command;
-            wcstring prev_token_unescape = prev_token;
-            wcstring current_token_unescape = current_token;
-
-            if (unescape_string(current_command_unescape, 0) &&
-                    unescape_string(prev_token_unescape, 0) &&
-                    unescape_string(current_token_unescape, UNESCAPE_INCOMPLETE))
+            wcstring current_command_unescape, prev_token_unescape, current_token_unescape;
+            if (unescape_string(current_command, &current_command_unescape, UNESCAPE_DEFAULT) &&
+                unescape_string(prev_token, &prev_token_unescape, UNESCAPE_DEFAULT) &&
+                unescape_string(current_token, &current_token_unescape, UNESCAPE_INCOMPLETE))
            {
                do_file = completer.complete_param(current_command_unescape,
                                                   prev_token_unescape,
--- a/env_universal_common.cpp
+++ b/env_universal_common.cpp
@ -601,16 +601,13 @@ static void parse_message(wchar_t *msg,
        tmp = wcschr(name, L':');
        if (tmp)
        {
-            wchar_t *val;
            const wcstring key(name, tmp - name);

-            val = tmp+1;
-            val = unescape(val, 0);
-
-            if (val != NULL)
-                env_universal_common_set(key.c_str(), val, exportv);
-
-            free(val);
+            wcstring val;
+            if (unescape_string(tmp + 1, &val, 0))
+            {
+                env_universal_common_set(key.c_str(), val.c_str(), exportv);
+            }
        }
        else
        {
--- a/expand.cpp
+++ b/expand.cpp
@ -828,7 +828,7 @@ static int expand_pid(const wcstring &instr_with_sep,
 }


-void expand_variable_error(parser_t &parser, const wchar_t *token, size_t token_pos, int error_pos)
+void expand_variable_error(parser_t &parser, const wcstring &token, size_t token_pos, int error_pos)
 {
    size_t stop_pos = token_pos+1;

@ -836,7 +836,7 @@ void expand_variable_error(parser_t &parser, const wchar_t *token, size_t token_
    {
        case BRACKET_BEGIN:
        {
-            wchar_t *cpy = wcsdup(token);
+            wchar_t *cpy = wcsdup(token.c_str());
            *(cpy+token_pos)=0;
            wchar_t *name = &cpy[stop_pos+1];
            wchar_t *end = wcschr(name, BRACKET_END);
@ -1465,26 +1465,6 @@ static int expand_cmdsubst(parser_t &parser, const wcstring &input, std::vector<
    return 1;
 }

-/**
-   Wrapper around unescape funtion. Issues an error() on failiure.
-*/
-__attribute__((unused))
-static wchar_t *expand_unescape(parser_t &parser, const wchar_t * in, int escape_special)
-{
-    wchar_t *res = unescape(in, escape_special);
-    if (!res)
-        parser.error(SYNTAX_ERROR, -1, L"Unexpected end of string");
-    return res;
-}
-
-static wcstring expand_unescape_string(const wcstring &in, int escape_special)
-{
-    wcstring tmp = in;
-    unescape_string(tmp, escape_special);
-    /* Need to detect error here */
-    return tmp;
-}
-
 /* Given that input[0] is HOME_DIRECTORY or tilde (ugh), return the user's name. Return the empty string if it is just a tilde. Also return by reference the index of the first character of the remaining part of the string (e.g. the subsequent slash) */
 static wcstring get_home_directory_name(const wcstring &input, size_t *out_tail_idx)
 {
@ -1669,8 +1649,8 @@ int expand_string(const wcstring &input, std::vector<completion_t> &output, expa
         expand_string to expand incomplete strings from the
         commandline.
         */
-        int unescape_flags = UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE;
-        wcstring next = expand_unescape_string(in->at(i).completion, unescape_flags);
+        wcstring next;
+        unescape_string(in->at(i).completion, &next, UNESCAPE_SPECIAL | UNESCAPE_INCOMPLETE);

        if (EXPAND_SKIP_VARIABLES & flags)
        {
--- a/expand.h
+++ b/expand.h
@ -199,7 +199,7 @@ int expand_is_clean(const wchar_t *in);
   \param token_pos The position where the expansion begins
   \param error_pos The position on the line to report to the error function.
 */
-void expand_variable_error(parser_t &parser, const wchar_t *token, size_t token_pos, int error_pos);
+void expand_variable_error(parser_t &parser, const wcstring &token, size_t token_pos, int error_pos);

 /**
   Testing function for getting all process names.
--- a/fish_indent.cpp
+++ b/fish_indent.cpp
@ -106,8 +106,8 @@ static int indent(wcstring &out, const wcstring &in, int flags)
                    int next_indent = indent;
                    is_command = 0;

-                    wcstring unesc = last;
-                    unescape_string(unesc, UNESCAPE_SPECIAL);
+                    wcstring unesc;
+                    unescape_string(last, &unesc, UNESCAPE_SPECIAL);

                    if (parser_keywords_is_block(unesc))
                    {
--- a/fish_pager.cpp
+++ b/fish_pager.cpp
@ -1146,7 +1146,7 @@ static void read_array(FILE* file, wcstring_list_t &comp)
        {
            buffer.push_back(0);
            wcstring wcs = str2wcstring(&buffer.at(0));
-            if (unescape_string(wcs, false))
+            if (unescape_string_in_place(&wcs, false))
            {
                comp.push_back(wcs);
            }
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -65,8 +65,7 @@
 /**
   The number of tests to run
 */
-//#define ESCAPE_TEST_COUNT 1000000
-#define ESCAPE_TEST_COUNT 10000
+#define ESCAPE_TEST_COUNT 100000
 /**
   The average length of strings to unescape
 */
@ -118,45 +117,65 @@ static void err(const wchar_t *blah, ...)
    wprintf(L"\n");
 }

+/* Test sane escapes */
+static void test_unescape_sane()
+{
+    const struct test_t {const wchar_t * input; const wchar_t * expected;} tests[] =
+    {
+        {L"abcd", L"abcd"},
+        {L"'abcd'", L"abcd"},
+        {L"'abcd\\n'", L"abcd\\n"},
+        {L"\"abcd\\n\"", L"abcd\\n"},
+        {L"\"abcd\\n\"", L"abcd\\n"},
+        {L"\\143", L"c"},
+        {L"'\\143'", L"\\143"},
+        {L"\\n", L"\n"} // \n normally becomes newline
+    };
+    wcstring output;
+    for (size_t i=0; i < sizeof tests / sizeof *tests; i++)
+    {
+        bool ret = unescape_string(tests[i].input, &output, UNESCAPE_DEFAULT);
+        if (! ret)
+        {
+            err(L"Failed to unescape '%ls'\n", tests[i].input);
+        }
+        else if (output != tests[i].expected)
+        {
+            err(L"In unescaping '%ls', expected '%ls' but got '%ls'\n", tests[i].input, tests[i].expected, output.c_str());
+        }
+    }
+}
+
 /**
   Test the escaping/unescaping code by escaping/unescaping random
   strings and verifying that the original string comes back.
 */
-static void test_escape()
+
+static void test_escape_crazy()
 {
-    int i;
-    wcstring sb;
-
    say(L"Testing escaping and unescaping");
-
-    for (i=0; i<ESCAPE_TEST_COUNT; i++)
+    wcstring random_string;
+    wcstring escaped_string;
+    wcstring unescaped_string;
+    for (size_t i=0; i<ESCAPE_TEST_COUNT; i++)
    {
-        const wchar_t *o, *e, *u;
-
-        sb.clear();
+        random_string.clear();
        while (rand() % ESCAPE_TEST_LENGTH)
        {
-            sb.push_back((rand() %ESCAPE_TEST_CHAR) +1);
+            random_string.push_back((rand() % ESCAPE_TEST_CHAR) +1);
        }
-        o = (const wchar_t *)sb.c_str();
-        e = escape(o, 1);
-        u = unescape(e, 0);
-        if (!o || !e || !u)
+
+        escaped_string = escape_string(random_string, ESCAPE_ALL);
+        bool unescaped_success = unescape_string(escaped_string, &unescaped_string, UNESCAPE_DEFAULT);
+
+        if (! unescaped_success)
        {
-            err(L"Escaping cycle of string %ls produced null pointer on %ls", o, e?L"unescaping":L"escaping");
-
+            err(L"Failed to unescape string <%ls>", escaped_string.c_str());
        }
-
-
-        if (wcscmp(o, u))
+        else if (unescaped_string != random_string)
        {
-            err(L"Escaping cycle of string %ls produced different string %ls", o, u);
-
-
+            err(L"Escaped and then unescaped string '%ls', but got back a different string '%ls'", random_string.c_str(), unescaped_string.c_str());
        }
-        free((void *)e);
-        free((void *)u);
-
    }
 }

@ -1836,8 +1855,9 @@ int main(int argc, char **argv)
    reader_init();
    env_init();

+    test_unescape_sane();
+    test_escape_crazy();
    test_format();
-    test_escape();
    test_convert();
    test_convert_nulls();
    test_tok();
--- a/highlight.cpp
+++ b/highlight.cpp
@ -826,8 +826,8 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di
        outSuggestion.clear();

        /* Unescape the parameter */
-        wcstring unescaped_dir = escaped_dir;
-        bool unescaped = unescape_string(unescaped_dir, UNESCAPE_INCOMPLETE);
+        wcstring unescaped_dir;
+        bool unescaped = unescape_string(escaped_dir, &unescaped_dir, UNESCAPE_INCOMPLETE);

        /* Determine the quote type we got from the input directory. */
        wchar_t quote = L'\0';
@ -1404,12 +1404,13 @@ void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos,
        if (tok_begin && tok_end)
        {
            wcstring token(tok_begin, tok_end-tok_begin);
-            const wcstring_list_t working_directory_list(1, working_directory);
-            if (unescape_string(token, 1))
+            if (unescape_string_in_place(&token, UNESCAPE_SPECIAL))
            {
                /* Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. Put it back. */
                if (! token.empty() && token.at(0) == HOME_DIRECTORY)
                    token.at(0) = L'~';
+
+                const wcstring_list_t working_directory_list(1, working_directory);
                if (is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE))
                {
                    for (ptrdiff_t i=tok_begin-cbuff; i < (tok_end-cbuff); i++)
--- a/history.cpp
+++ b/history.cpp
@ -1731,8 +1731,9 @@ void history_t::add_with_file_detection(const wcstring &str)
            const wchar_t *token_cstr = tok_last(&tokenizer);
            if (token_cstr)
            {
-                wcstring potential_path = token_cstr;
-                if (unescape_string(potential_path, false) && string_could_be_path(potential_path))
+                wcstring potential_path;
+                bool unescaped = unescape_string(token_cstr, &potential_path, UNESCAPE_DEFAULT);
+                if (unescaped && string_could_be_path(potential_path))
                {
                    potential_paths.push_back(potential_path);

--- a/parser.cpp
+++ b/parser.cpp
@ -2728,8 +2728,6 @@ const wchar_t *parser_get_block_command(int type)
 */
 int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset)
 {
-    wchar_t *unesc;
-    wchar_t *pos;
    int err=0;

    wchar_t *paran_begin, *paran_end;
@ -2791,8 +2789,8 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
        }
    }

-    unesc = unescape(arg_cpy, 1);
-    if (!unesc)
+    wcstring unesc;
+    if (! unescape_string(arg_cpy, &unesc, UNESCAPE_SPECIAL))
    {
        if (out)
        {
@ -2805,26 +2803,25 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
    }
    else
    {
-        /*
-          Check for invalid variable expansions
-        */
-        for (pos = unesc; *pos; pos++)
+        /* Check for invalid variable expansions */
+        const size_t unesc_size = unesc.size();
+        for (size_t idx = 0; idx < unesc_size; idx++)
        {
-            switch (*pos)
+            switch (unesc.at(idx))
            {
                case VARIABLE_EXPAND:
                case VARIABLE_EXPAND_SINGLE:
                {
-                    wchar_t n = *(pos+1);
+                    wchar_t next_char = (idx + 1 < unesc_size ? unesc.at(idx + 1) : L'\0');

-                    if (n != VARIABLE_EXPAND &&
-                            n != VARIABLE_EXPAND_SINGLE &&
-                            !wcsvarchr(n))
+                    if (next_char != VARIABLE_EXPAND &&
+                            next_char != VARIABLE_EXPAND_SINGLE &&
+                            ! wcsvarchr(next_char))
                    {
                        err=1;
                        if (out)
                        {
-                            expand_variable_error(*this, unesc, pos-unesc, offset);
+                            expand_variable_error(*this, unesc, idx, offset);
                            print_errors(*out, prefix);
                        }
                    }
@ -2837,7 +2834,6 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha

    free(arg_cpy);

-    free(unesc);
    return err;

 }