string pad: handle padding characters of width > 1

If the padding is not divisible by the char's width without remainder,
we pad the remainder with spaces, so the total width of the output is correct.

Also add completions, changelog entry, adjust documentation, add examples
with emoji and some tests.  Apply some minor style nitpicks and avoid extra
allocations of the input strings.
This commit is contained in:
Johannes Altmanninger 2020-09-27 21:51:20 +02:00
parent 5ae03414d7
commit f758d39535
5 changed files with 66 additions and 30 deletions

View File

@ -42,6 +42,7 @@ Syntax changes and new commands
Scripting improvements
----------------------
- New command ``string pad`` allows to extend text to a fixed width (#7340).
- ``string sub`` has a new ``--end`` option to specify the end index of
a substring (#6765).
- ``string split`` has a new ``--fields`` option to specify fields to

View File

@ -1,5 +1,5 @@
string-pad - pad characters before and after string
===================================================
string-pad - pad strings to a fixed width
=========================================
Synopsis
--------
@ -17,15 +17,13 @@ Description
.. BEGIN DESCRIPTION
``string pad`` pads each STRING with CHAR to the given width.
``string pad`` extends each STRING to the given width by adding CHAR to the left.
The default behavior is left padding with spaces and default width is the length of string (hence, no padding).
If ``-r`` or ``--right`` is given, add the padding after a string.
If ``-r`` or ``--right`` is given, only pad after string.
If ``-c`` or ``--char`` is given, pad with CHAR instead of whitespace.
The ``-c`` or ``--char`` switch causes padding with the character CHAR instead of default whitespace character.
If ``-w`` or ``--width`` is given, pad the string to given width. Width less than the string width will result in an unchanged string.
The output is padded to the maximum width of all input strings. If ``-w`` or ``--width`` is given, use at least that.
.. END DESCRIPTION
@ -36,15 +34,17 @@ Examples
::
>_ string pad -w 10 -c ' ' 'abc'
>_ string pad -w 10 abc
abc
>_ string pad --right --width 12 --char=z foo barbaz
foozzzzzzzzz
barbazzzzzzz
>_ string pad --right --char=🐟 "fish are pretty" "rich. "
fish are pretty
rich. 🐟🐟🐟🐟
>_ string pad -w 6 --char=- foo | string pad --right -w 9 --char=-
---foo---
>_ string pad -w 6 -c- " | " "|||" " | " | string pad -r -w 9 -c-
--- | ---
---|||---
--- | ---
.. END EXAMPLES

View File

@ -2,7 +2,7 @@
# This follows a strict command-then-options approach, so we can just test the number of tokens
complete -f -c string
complete -f -c string -n "test (count (commandline -opc)) -le 2" -s h -l help -d "Display help and exit"
complete -f -c string -n "test (count (commandline -opc)) -ge 2; and not contains -- (commandline -opc)[2] escape collect" -s q -l quiet -d "Do not print output"
complete -f -c string -n "test (count (commandline -opc)) -ge 2; and not contains -- (commandline -opc)[2] escape collect pad" -s q -l quiet -d "Do not print output"
complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a lower
complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a upper
complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a length
@ -48,3 +48,7 @@ complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a repeat
complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] repeat" -s n -l count -xa "(seq 1 10)" -d "Repetition count"
complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] repeat" -s m -l max -xa "(seq 1 10)" -d "Maximum number of printed chars"
complete -f -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] repeat" -s N -l no-newline -d "Remove newline"
complete -f -c string -n "test (count (commandline -opc)) -lt 2" -a pad
complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] pad" -s r -l right -d "Pad right instead of left"
complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] pad" -s c -l char -x -d "Character to use for padding"
complete -x -c string -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] pad" -s w -l width -x -d "Integer width of the result, default is maximum width of inputs"

View File

@ -183,7 +183,7 @@ using options_t = struct options_t { //!OCLINT(too many fields)
long end = 0;
size_t width = 0;
wchar_t char_to_pad = ' ';
wchar_t char_to_pad = L' ';
std::vector<int> fields;
@ -252,7 +252,8 @@ static int handle_flag_c(wchar_t **argv, parser_t &parser, io_streams_t &streams
return STATUS_CMD_OK;
} else if (opts->char_to_pad_valid) {
if (wcslen(w.woptarg) != 1) {
string_error(streams, _(L"%ls: Padding should be a character '%ls'\n"), argv[0], w.woptarg);
string_error(streams, _(L"%ls: Padding should be a character '%ls'\n"), argv[0],
w.woptarg);
return STATUS_INVALID_ARGS;
}
opts->char_to_pad = w.woptarg[0];
@ -980,6 +981,12 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, wchar_t
int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams);
if (retval != STATUS_CMD_OK) return retval;
size_t pad_char_width = fish_wcwidth(opts.char_to_pad);
if (pad_char_width == 0) {
string_error(streams, _(L"%ls: Invalid padding character of width zero\n"), argv[0]);
return STATUS_INVALID_ARGS;
}
// Pad left by default
if (!opts.right) {
opts.left = true;
@ -987,31 +994,36 @@ static int string_pad(parser_t &parser, io_streams_t &streams, int argc, wchar_t
// Find max width of strings and keep the inputs
size_t max_width = 0;
std::vector<wcstring> all_inputs;
std::vector<wcstring> inputs;
arg_iterator_t aiter_width(argv, optind, streams);
while (const wcstring *arg = aiter_width.nextstr()) {
wcstring input_string = *arg;
size_t width = fish_wcswidth(input_string);
if (width > max_width) max_width = width;
all_inputs.push_back(input_string);
inputs.push_back(std::move(input_string));
}
size_t pad_width = max_width > opts.width ? max_width : opts.width;
for (auto &input : all_inputs) {
wcstring padded = input;
size_t padded_width = fish_wcswidth(padded);
for (auto &input : inputs) {
wcstring padded;
size_t padded_width = fish_wcswidth(input);
if (pad_width >= padded_width) {
size_t pad = pad_width - padded_width;
size_t pad = (pad_width - padded_width) / pad_char_width;
size_t remaining_width = (pad_width - padded_width) % pad_char_width;
if (opts.left) {
padded.insert(0, pad, opts.char_to_pad);
padded.append(pad, opts.char_to_pad);
padded.append(remaining_width, L' ');
padded.append(input);
}
if (opts.right) {
padded.append(input);
padded.append(remaining_width, L' ');
padded.append(pad, opts.char_to_pad);
}
}
padded.push_back(L'\n');
streams.out.append(padded);
streams.out.append(L'\n');
}
return STATUS_CMD_OK;

View File

@ -45,10 +45,7 @@ string length -q ""; and echo not zero length; or echo zero length
string pad foo
# CHECK: foo
string pad -r -w 4 foo
# CHECK: foo
string pad -r -w 7 -c '-' foo
string pad -r -w 7 -c - foo
# CHECK: foo----
string pad --width 7 -c '=' foo
@ -57,14 +54,36 @@ string pad --width 7 -c '=' foo
echo \|(string pad --width 10 --right foo)\|
# CHECK: |foo |
# Pad string with multi-width emoji.
string pad -w 4 -c . 🐟
# CHECK: ..🐟
# Pad with multi-width character.
string pad -w 3 -c 🐟 .
# CHECK: 🐟.
# Multi-width pad with remainder, complemented with a space.
string pad -w 4 -c 🐟 k kk
# CHECK: 🐟 k
# CHECK: 🐟kk
# Pad to the maximum length.
string pad -c . long longer longest
# CHECK: ...long
# CHECK: .longer
# CHECK: longest
# This tests current behavior where the max width of an argument overrules
# the width parameter. This could be changed if needed.
string pad -c_ --width 5 longer-than-width-param x
# CHECK: longer-than-width-param
# CHECK: ______________________x
# Current behavior is that only a single padding character is supported.
# We can support longer strings in future without breaking compatibilty.
string pad -c ab -w4 .
# CHECKERR: string pad: Padding should be a character 'ab'
string sub --length 2 abcde
# CHECK: ab
@ -644,5 +663,5 @@ echo $status
# CHECK: 0
# Unmatched capturing groups are treated as empty
echo 'az' | string replace -r -- 'a(b.+)?z' 'a:$1z'
echo az | string replace -r -- 'a(b.+)?z' 'a:$1z'
# CHECK: a:z