From f758d395356b81f8dd088855b59ce907585ac6bc Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Sun, 27 Sep 2020 21:51:20 +0200 Subject: [PATCH] string pad: handle padding characters of width > 1 If the padding is not divisible by the char's width without remainder, we pad the remainder with spaces, so the total width of the output is correct. Also add completions, changelog entry, adjust documentation, add examples with emoji and some tests. Apply some minor style nitpicks and avoid extra allocations of the input strings. --- CHANGELOG.rst | 1 + doc_src/cmds/string-pad.rst | 28 ++++++++++++++-------------- share/completions/string.fish | 6 +++++- src/builtin_string.cpp | 32 ++++++++++++++++++++++---------- tests/checks/string.fish | 29 ++++++++++++++++++++++++----- 5 files changed, 66 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b153b0c22..03a002bc7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -42,6 +42,7 @@ Syntax changes and new commands Scripting improvements ---------------------- +- New command ``string pad`` allows to extend text to a fixed width (#7340). - ``string sub`` has a new ``--end`` option to specify the end index of a substring (#6765). - ``string split`` has a new ``--fields`` option to specify fields to diff --git a/doc_src/cmds/string-pad.rst b/doc_src/cmds/string-pad.rst index 42fdcf11a..8f2492715 100644 --- a/doc_src/cmds/string-pad.rst +++ b/doc_src/cmds/string-pad.rst @@ -1,5 +1,5 @@ -string-pad - pad characters before and after string -=================================================== +string-pad - pad strings to a fixed width +========================================= Synopsis -------- @@ -17,15 +17,13 @@ Description .. BEGIN DESCRIPTION -``string pad`` pads each STRING with CHAR to the given width. +``string pad`` extends each STRING to the given width by adding CHAR to the left. -The default behavior is left padding with spaces and default width is the length of string (hence, no padding). +If ``-r`` or ``--right`` is given, add the padding after a string. -If ``-r`` or ``--right`` is given, only pad after string. +If ``-c`` or ``--char`` is given, pad with CHAR instead of whitespace. -The ``-c`` or ``--char`` switch causes padding with the character CHAR instead of default whitespace character. - -If ``-w`` or ``--width`` is given, pad the string to given width. Width less than the string width will result in an unchanged string. +The output is padded to the maximum width of all input strings. If ``-w`` or ``--width`` is given, use at least that. .. END DESCRIPTION @@ -36,15 +34,17 @@ Examples :: - >_ string pad -w 10 -c ' ' 'abc' + >_ string pad -w 10 abc abc - >_ string pad --right --width 12 --char=z foo barbaz - foozzzzzzzzz - barbazzzzzzz + >_ string pad --right --char=🐟 "fish are pretty" "rich. " + fish are pretty + rich. 🐟🐟🐟🐟 - >_ string pad -w 6 --char=- foo | string pad --right -w 9 --char=- - ---foo--- + >_ string pad -w 6 -c- " | " "|||" " | " | string pad -r -w 9 -c- + --- | --- + ---|||--- + --- | --- .. END EXAMPLES diff --git a/share/completions/string.fish b/share/completions/string.fish index c60ccc1c5..fa764982a 100644 --- a/share/completions/string.fish +++ b/share/completions/string.fish @@ -2,7 +2,7 @@ # This follows a strict command-then-options approach, so we can just test the number of tokens complete -f -c string complete -f -c string -n "test (count (commandline -opc)) -le 2" -s h -l help -d "Display help and exit" -complete -f -c string -n "test (count (commandline -opc)) -ge 2; and not contains -- (commandline -opc)[2] escape collect" -s q -l quiet -d "Do not print output" +complete -f -c string -n "test (count (commandline -opc)) -ge 2; and not contains -- (commandline -opc)[2] escape collect pad" -s q -l quiet -d "Do not print output" complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a lower complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a upper complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a length @@ -48,3 +48,7 @@ complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a repeat complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] repeat" -s n -l count -xa "(seq 1 10)" -d "Repetition count" complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] repeat" -s m -l max -xa "(seq 1 10)" -d "Maximum number of printed chars" complete -f -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] repeat" -s N -l no-newline -d "Remove newline" +complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a pad +complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] pad" -s r -l right -d "Pad right instead of left" +complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] pad" -s c -l char -x -d "Character to use for padding" +complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] pad" -s w -l width -x -d "Integer width of the result, default is maximum width of inputs" diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp index b89b34d40..4f22da517 100644 --- a/src/builtin_string.cpp +++ b/src/builtin_string.cpp @@ -183,7 +183,7 @@ using options_t = struct options_t { //!OCLINT(too many fields) long end = 0; size_t width = 0; - wchar_t char_to_pad = ' '; + wchar_t char_to_pad = L' '; std::vector fields; @@ -252,7 +252,8 @@ static int handle_flag_c(wchar_t **argv, parser_t &parser, io_streams_t &streams return STATUS_CMD_OK; } else if (opts->char_to_pad_valid) { if (wcslen(w.woptarg) != 1) { - string_error(streams, _(L"%ls: Padding should be a character '%ls'\n"), argv[0], w.woptarg); + string_error(streams, _(L"%ls: Padding should be a character '%ls'\n"), argv[0], + w.woptarg); return STATUS_INVALID_ARGS; } opts->char_to_pad = w.woptarg[0]; @@ -980,6 +981,12 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, wchar_t int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); if (retval != STATUS_CMD_OK) return retval; + size_t pad_char_width = fish_wcwidth(opts.char_to_pad); + if (pad_char_width == 0) { + string_error(streams, _(L"%ls: Invalid padding character of width zero\n"), argv[0]); + return STATUS_INVALID_ARGS; + } + // Pad left by default if (!opts.right) { opts.left = true; @@ -987,31 +994,36 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, wchar_t // Find max width of strings and keep the inputs size_t max_width = 0; - std::vector all_inputs; + std::vector inputs; arg_iterator_t aiter_width(argv, optind, streams); while (const wcstring *arg = aiter_width.nextstr()) { wcstring input_string = *arg; size_t width = fish_wcswidth(input_string); if (width > max_width) max_width = width; - all_inputs.push_back(input_string); + inputs.push_back(std::move(input_string)); } size_t pad_width = max_width > opts.width ? max_width : opts.width; - for (auto &input : all_inputs) { - wcstring padded = input; - size_t padded_width = fish_wcswidth(padded); + for (auto &input : inputs) { + wcstring padded; + size_t padded_width = fish_wcswidth(input); if (pad_width >= padded_width) { - size_t pad = pad_width - padded_width; + size_t pad = (pad_width - padded_width) / pad_char_width; + size_t remaining_width = (pad_width - padded_width) % pad_char_width; if (opts.left) { - padded.insert(0, pad, opts.char_to_pad); + padded.append(pad, opts.char_to_pad); + padded.append(remaining_width, L' '); + padded.append(input); } if (opts.right) { + padded.append(input); + padded.append(remaining_width, L' '); padded.append(pad, opts.char_to_pad); } } + padded.push_back(L'\n'); streams.out.append(padded); - streams.out.append(L'\n'); } return STATUS_CMD_OK; diff --git a/tests/checks/string.fish b/tests/checks/string.fish index 6efc705bb..511082521 100644 --- a/tests/checks/string.fish +++ b/tests/checks/string.fish @@ -45,10 +45,7 @@ string length -q ""; and echo not zero length; or echo zero length string pad foo # CHECK: foo -string pad -r -w 4 foo -# CHECK: foo - -string pad -r -w 7 -c '-' foo +string pad -r -w 7 -c - foo # CHECK: foo---- string pad --width 7 -c '=' foo @@ -57,14 +54,36 @@ string pad --width 7 -c '=' foo echo \|(string pad --width 10 --right foo)\| # CHECK: |foo | +# Pad string with multi-width emoji. string pad -w 4 -c . 🐟 # CHECK: ..🐟 +# Pad with multi-width character. +string pad -w 3 -c 🐟 . +# CHECK: 🐟. + +# Multi-width pad with remainder, complemented with a space. +string pad -w 4 -c 🐟 k kk +# CHECK: 🐟 k +# CHECK: 🐟kk + +# Pad to the maximum length. string pad -c . long longer longest # CHECK: ...long # CHECK: .longer # CHECK: longest +# This tests current behavior where the max width of an argument overrules +# the width parameter. This could be changed if needed. +string pad -c_ --width 5 longer-than-width-param x +# CHECK: longer-than-width-param +# CHECK: ______________________x + +# Current behavior is that only a single padding character is supported. +# We can support longer strings in future without breaking compatibilty. +string pad -c ab -w4 . +# CHECKERR: string pad: Padding should be a character 'ab' + string sub --length 2 abcde # CHECK: ab @@ -644,5 +663,5 @@ echo $status # CHECK: 0 # Unmatched capturing groups are treated as empty -echo 'az' | string replace -r -- 'a(b.+)?z' 'a:$1z' +echo az | string replace -r -- 'a(b.+)?z' 'a:$1z' # CHECK: a:z