Set of changes to improve detection of escape sequences for prompt width

computation. Addresses #767
2025-03-27 14:45:13 +08:00 · 2013-09-29 02:48:35 -07:00 · 2013-09-29 02:48:35 -07:00 · 991c900fc6
commit 991c900fc6
parent 0d2af9e742
3 changed files with 192 additions and 154 deletions
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -550,6 +550,16 @@ static void test_utils()
    if (begin != a + wcslen(L"echo (echo (")) err(L"parse_util_cmdsubst_extent failed on line %ld", (long)__LINE__);
 }
 static void test_escape_sequences(void)
 {
    say(L"Testing escape codes");
    if (escape_code_length(L"") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__);
    if (escape_code_length(L"abcd") != 0) err(L"test_escape_sequences failed on line %d\n", __LINE__);
    if (escape_code_length(L"\x1b[2J") != 4) err(L"test_escape_sequences failed on line %d\n", __LINE__);
    if (escape_code_length(L"\x1b[38;5;123mABC") != strlen("\x1b[38;5;123m")) err(L"test_escape_sequences failed on line %d\n", __LINE__);
    if (escape_code_length(L"\x1b@") != 2) err(L"test_escape_sequences failed on line %d\n", __LINE__);
 }
 class lru_node_test_t : public lru_node_t
 {
 public:
@ -1834,6 +1844,7 @@ int main(int argc, char **argv)
    test_fork();
    test_parser();
    test_utils();
    test_escape_sequences();
    test_lru();
    test_expand();
    test_fuzzy_match();
--- a/screen.cpp
+++ b/screen.cpp
@ -92,11 +92,9 @@ public:
   specified position of the specified wide character string. All of
   \c seq must match, but str may be longer than seq.
 */
-static int try_sequence(const char *seq, const wchar_t *str)
+static size_t try_sequence(const char *seq, const wchar_t *str)
 {
-    int i;
+    for (size_t i=0; ; i++)
    for (i=0;; i++)
    {
        if (!seq[i])
            return i;
@ -121,29 +119,6 @@ static size_t next_tab_stop(size_t in)
    return ((in/tab_width)+1)*tab_width;
 }
 // PCA for term256 support, let's just detect the escape codes directly
 static int is_term256_escape(const wchar_t *str)
 {
    // An escape code looks like this: \x1b[38;5;<num>m
    // or like this: \x1b[48;5;<num>m
    // parse out the required prefix
    int len = try_sequence("\x1b[38;5;", str);
    if (! len) len = try_sequence("\x1b[48;5;", str);
    if (! len) return 0;
    // now try parsing out a string of digits
    // we need at least one
    if (! iswdigit(str[len])) return 0;
    while (iswdigit(str[len])) len++;
    // look for the terminating m
    if (str[len++] != L'm') return 0;
    // success
    return len;
 }
 /* Like fish_wcwidth, but returns 0 for control characters instead of -1 */
 static int fish_wcwidth_min_0(wchar_t wc)
 {
@ -157,6 +132,178 @@ static bool allow_soft_wrap(void)
    return !! auto_right_margin;
 }
 /* Returns the number of characters in the escape code starting at 'code' (which should initially contain \x1b) */
 size_t escape_code_length(const wchar_t *code)
 {
    assert(code != NULL);
    /* The only escape codes we recognize start with \x1b */
    if (code[0] != L'\x1b')
        return 0;
    size_t resulting_length = 0;
    bool found = false;
    if (cur_term != NULL)
    {
        /*
         Detect these terminfo color escapes with parameter
         value 0..7, all of which don't move the cursor
         */
        char * const esc[] =
        {
            set_a_foreground,
            set_a_background,
            set_foreground,
            set_background,
        };
        for (size_t p=0; p < sizeof esc / sizeof *esc && !found; p++)
        {
            if (!esc[p])
                continue;
            for (size_t k=0; k<8; k++)
            {
                size_t len = try_sequence(tparm(esc[p],k), code);
                if (len)
                {
                    resulting_length = len;
                    found = true;
                    break;
                }
            }
        }
    }
    if (cur_term != NULL)
    {
        /*
         Detect these semi-common terminfo escapes without any
         parameter values, all of which don't move the cursor
         */
        char * const esc2[] =
        {
            enter_bold_mode,
            exit_attribute_mode,
            enter_underline_mode,
            exit_underline_mode,
            enter_standout_mode,
            exit_standout_mode,
            flash_screen,
            enter_subscript_mode,
            exit_subscript_mode,
            enter_superscript_mode,
            exit_superscript_mode,
            enter_blink_mode,
            enter_italics_mode,
            exit_italics_mode,
            enter_reverse_mode,
            enter_shadow_mode,
            exit_shadow_mode,
            enter_standout_mode,
            exit_standout_mode,
            enter_secure_mode
        };
        for (size_t p=0; p < sizeof esc2 / sizeof *esc2 && !found; p++)
        {
            if (!esc2[p])
                continue;
            /*
             Test both padded and unpadded version, just to
             be safe. Most versions of tparm don't actually
             seem to do anything these days.
             */
            size_t len = maxi(try_sequence(tparm(esc2[p]), code), try_sequence(esc2[p], code));
            if (len)
            {
                resulting_length = len;
                found = true;
            }
        }
    }
    if (!found)
    {
        if (code[1] == L'k')
        {
            /* This looks like the escape sequence for setting a screen name */
            const env_var_t term_name = env_get_string(L"TERM");
            if (!term_name.missing() && string_prefixes_string(L"screen", term_name))
            {
                const wchar_t * const screen_name_end_sentinel = L"\x1b\\";
                const wchar_t *screen_name_end = wcsstr(&code[2], screen_name_end_sentinel);
                if (screen_name_end != NULL)
                {
                    const wchar_t *escape_sequence_end = screen_name_end + wcslen(screen_name_end_sentinel);
                    resulting_length = escape_sequence_end - code;
                }
                else
                {
                    /* Consider just <esc>k to be the code */
                    resulting_length = 2;
                }
                found = true;
            }
        }
    }
    if (! found)
    {
        /* Generic VT100 one byte sequence: CSI followed by something in the range @ through _ */
        if (code[1] == L'[' && (code[2] >= L'@' && code[2] <= L'_'))
        {
            resulting_length = 3;
            found = true;
        }
    }
    if (! found)
    {
        /* Generic VT100 CSI-style sequence. <esc>, followed by zero or more ASCII characters NOT in the range [@,_], followed by one character in that range */
        if (code[1] == L'[')
        {
            // Start at 2 to skip over <esc>[
            size_t cursor = 2;
            for (; code[cursor] != L'\0'; cursor++)
            {
                /* Consume a sequence of ASCII characters not in the range [@, ~] */
                wchar_t c = code[cursor];
                /* If we're not in ASCII, just stop */
                if (c > 127)
                    break;
                /* If we're the end character, then consume it and then stop */
                if (c >= L'@' && c <= L'~')
                {
                    cursor++;
                    break;
                }
            }
            /* curs now indexes just beyond the end of the sequence (or at the terminating zero) */
            found = true;
            resulting_length = cursor;
        }
    }
    if (! found)
    {
        /* Generic VT100 two byte sequence: <esc> followed by something in the range @ through _ */
        if (code[1] >= L'@' && code[1] <= L'_')
        {
            resulting_length = 2;
            found = true;
        }
    }
    return resulting_length;
 }
 /* Information about a prompt layout */
 struct prompt_layout_t
 {
@ -178,7 +325,7 @@ struct prompt_layout_t
 static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
 {
    size_t current_line_width = 0;
-    size_t j, k;
+    size_t j;
    prompt_layout_t prompt_layout = {};
    prompt_layout.line_count = 1;
@ -187,134 +334,12 @@ static prompt_layout_t calc_prompt_layout(const wchar_t *prompt)
    {
        if (prompt[j] == L'\x1b')
        {
-            /*
+            /* This is the start of an escape code. Skip over it if it's at least one character long. */
-             This is the start of an escape code. Try to guess its width.
+            size_t escape_len = escape_code_length(&prompt[j]);
-             */
+            if (escape_len > 0)
            size_t p;
            int len=0;
            bool found = false;
            /*
             Detect these terminfo color escapes with parameter
             value 0..7, all of which don't move the cursor
             */
            char * const esc[] =
            {
-                set_a_foreground,
+                j += escape_len - 1;
                set_a_background,
                set_foreground,
                set_background,
            }
            ;
            /*
             Detect these semi-common terminfo escapes without any
             parameter values, all of which don't move the cursor
             */
            char * const esc2[] =
            {
                enter_bold_mode,
                exit_attribute_mode,
                enter_underline_mode,
                exit_underline_mode,
                enter_standout_mode,
                exit_standout_mode,
                flash_screen,
                enter_subscript_mode,
                exit_subscript_mode,
                enter_superscript_mode,
                exit_superscript_mode,
                enter_blink_mode,
                enter_italics_mode,
                exit_italics_mode,
                enter_reverse_mode,
                enter_shadow_mode,
                exit_shadow_mode,
                enter_standout_mode,
                exit_standout_mode,
                enter_secure_mode
            }
            ;
            for (p=0; p < sizeof esc / sizeof *esc && !found; p++)
            {
                if (!esc[p])
                    continue;
                for (k=0; k<8; k++)
                {
                    len = try_sequence(tparm(esc[p],k), &prompt[j]);
                    if (len)
                    {
                        j += (len-1);
                        found = true;
                        break;
                    }
                }
            }
            /* PCA for term256 support, let's just detect the escape codes directly */
            if (! found)
            {
                len = is_term256_escape(&prompt[j]);
                if (len)
                {
                    j += (len - 1);
                    found = true;
                }
            }
            for (p=0; p < (sizeof(esc2)/sizeof(char *)) && !found; p++)
            {
                if (!esc2[p])
                    continue;
                /*
                 Test both padded and unpadded version, just to
                 be safe. Most versions of tparm don't actually
                 seem to do anything these days.
                 */
                len = maxi(try_sequence(tparm(esc2[p]), &prompt[j]),
                           try_sequence(esc2[p], &prompt[j]));
                if (len)
                {
                    j += (len-1);
                    found = true;
                }
            }
            if (!found)
            {
                if (prompt[j+1] == L'k')
                {
                    const env_var_t term_name = env_get_string(L"TERM");
                    if (!term_name.missing() && string_prefixes_string(L"screen", term_name))
                    {
                        const wchar_t *end;
                        j+=2;
                        found = true;
                        end = wcsstr(&prompt[j], L"\x1b\\");
                        if (end)
                        {
                            /*
                             You'd thing this should be
                             '(end-prompt)+2', in order to move j
                             past the end of the string, but there is
                             a 'j++' at the end of each lap, so j
                             should always point to the last menged
                             character, e.g. +1.
                             */
                            j = (end-prompt)+1;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }
        }
        else if (prompt[j] == L'\t')
        {
--- a/screen.h
+++ b/screen.h
@ -227,5 +227,7 @@ enum screen_reset_mode_t
 void s_reset(screen_t *s, screen_reset_mode_t mode);
 /* Returns the length of an escape code. Exposed for testing purposes only. */
 size_t escape_code_length(const wchar_t *code);
 #endif