read: Support arrays, character splitting

Enhance the `read` builtin to support creating an array with the --array
flag. With --array, only a single variable name is allowed and the
entire input is tokenized and placed into that variable as an array.

Also add custom behavior if IFS is empty or unset. In that event, split
the input on every character, instead of the previous behavior of doing
no splitting at all.
This commit is contained in:
Kevin Ballard 2014-07-13 22:36:26 -07:00
parent 72e8489d50
commit 973dd6ffbd
7 changed files with 180 additions and 10 deletions

View File

@ -2301,6 +2301,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
int exit_res=STATUS_BUILTIN_OK;
const wchar_t *mode_name = READ_MODE_NAME;
int shell = 0;
int array = 0;
woptind=0;
@ -2345,6 +2346,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
L"shell", no_argument, 0, 's'
}
,
{
L"array", no_argument, 0, 'a'
}
,
{
L"help", no_argument, 0, 'h'
}
@ -2359,7 +2364,7 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
int opt = wgetopt_long(argc,
argv,
L"xglUup:c:hm:s",
L"xglUup:c:hm:sa",
long_options,
&opt_index);
if (opt == -1)
@ -2414,6 +2419,10 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
shell = 1;
break;
case 'a':
array = 1;
break;
case 'h':
builtin_print_help(parser, argv[0], stdout_buffer);
return STATUS_BUILTIN_OK;
@ -2446,6 +2455,14 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
return STATUS_BUILTIN_ERROR;
}
if (array && woptind+1 != argc)
{
append_format(stderr_buffer, _(L"%ls: --array option requires a single variable name.\n"), argv[0]);
builtin_print_help(parser, argv[0], stderr_buffer);
return STATUS_BUILTIN_ERROR;
}
/*
Verify all variable names
*/
@ -2580,18 +2597,64 @@ static int builtin_read(parser_t &parser, wchar_t **argv)
wchar_t *state;
env_var_t ifs = env_get_string(L"IFS");
if (ifs.missing())
ifs = L"";
nxt = wcstok(buff, (i<argc-1)?ifs.c_str():L"", &state);
while (i<argc)
if (ifs.missing_or_empty())
{
env_set(argv[i], nxt != 0 ? nxt: L"", place);
/* Every character is a separate token */
size_t bufflen = wcslen(buff);
if (array)
{
if (bufflen > 0)
{
wcstring chars(bufflen+(bufflen-1), ARRAY_SEP);
for (size_t j=0; j<bufflen; ++j)
{
chars[j*2] = buff[j];
}
env_set(argv[i], chars.c_str(), place);
}
else
{
env_set(argv[i], NULL, place);
}
}
else
{
size_t j = 0;
for (; i+1 < argc; ++i)
{
env_set(argv[i], j < bufflen ? (wchar_t[2]){buff[j], 0} : L"", place);
if (j < bufflen) ++j;
}
if (i < argc) env_set(argv[i], &buff[j], place);
}
}
else if (array)
{
wcstring tokens;
tokens.reserve(wcslen(buff));
bool empty = true;
i++;
if (nxt != 0)
nxt = wcstok(0, (i<argc-1)?ifs.c_str():L"", &state);
for (nxt = wcstok(buff, ifs.c_str(), &state); nxt != 0; nxt = wcstok(0, ifs.c_str(), &state))
{
if (! tokens.empty()) tokens.push_back(ARRAY_SEP);
tokens.append(nxt);
empty = false;
}
env_set(argv[i], empty ? NULL : tokens.c_str(), place);
}
else
{
nxt = wcstok(buff, (i<argc-1)?ifs.c_str():L"", &state);
while (i<argc)
{
env_set(argv[i], nxt != 0 ? nxt: L"", place);
i++;
if (nxt != 0)
nxt = wcstok(0, (i<argc-1)?ifs.c_str():L"", &state);
}
}
}

View File

@ -19,11 +19,17 @@ The following options are available:
- <code>-u</code> or <code>--unexport</code> prevents the variables from being exported to child processes (default behaviour).
- <code>-U</code> or <code>--universal</code> causes the specified shell variable to be made universal.
- <code>-x</code> or <code>--export</code> exports the variables to child processes.
- <code>-a</code> or <code>--array</code> stores the result as an array.
\c read reads a single line of input from stdin, breaks it into tokens
based on the <tt>IFS</tt> shell variable, and then assigns one
token to each variable specified in <tt>VARIABLES</tt>. If there are more
tokens than variables, the complete remainder is assigned to the last variable.
As a special case, if \c IFS is set to the empty string, each character of the
input is considered a separate token.
If \c -a or \c --array is provided, only one variable name is allowed and the
tokens are stored as an array in this variable.
See the documentation for \c set for more details on the scoping rules for
variables.

0
tests/read.err Normal file
View File

72
tests/read.in Normal file
View File

@ -0,0 +1,72 @@
#
# Test read builtin and IFS
#
count (echo one\ntwo)
set -l IFS \t
count (echo one\ntwo)
set -l IFS
count (echo one\ntwo)
set -le IFS
function print_vars --no-scope-shadowing
set -l space
set -l IFS \n # ensure our command substitution works right
for var in $argv
echo -n $space (count $$var) \'$$var\'
set space ''
end
echo
end
echo
echo 'hello there' | read -l one two
print_vars one two
echo 'hello there' | read -l one
print_vars one
echo '' | read -l one
print_vars one
echo '' | read -l one two
print_vars one two
echo 'test' | read -l one two three
print_vars one two three
echo
set -l IFS
echo 'hello' | read -l one
print_vars one
echo 'hello' | read -l one two
print_vars one two
echo 'hello' | read -l one two three
print_vars one two three
echo '' | read -l one
print_vars one
echo 't' | read -l one two
print_vars one two
echo 't' | read -l one two three
print_vars one two three
echo ' t' | read -l one two
print_vars one two
set -le IFS
echo
echo 'hello there' | read -la ary
print_vars ary
echo 'hello' | read -la ary
print_vars ary
echo 'this is a bunch of words' | read -la ary
print_vars ary
echo ' one two three' | read -la ary
print_vars ary
echo '' | read -la ary
print_vars ary
echo
set -l IFS
echo 'hello' | read -la ary
print_vars ary
echo 'h' | read -la ary
print_vars ary
echo '' | read -la ary
print_vars ary
set -le IFS

27
tests/read.out Normal file
View File

@ -0,0 +1,27 @@
2
2
1
1 'hello' 1 'there'
1 'hello there'
1 ''
1 '' 1 ''
1 'test' 1 '' 1 ''
1 'hello'
1 'h' 1 'ello'
1 'h' 1 'e' 1 'llo'
1 ''
1 't' 1 ''
1 't' 1 '' 1 ''
1 ' ' 1 't'
2 'hello' 'there'
1 'hello'
6 'this' 'is' 'a' 'bunch' 'of' 'words'
3 'one' 'two' 'three'
0
5 'h' 'e' 'l' 'l' 'o'
1 'h'
0

1
tests/read.status Normal file
View File

@ -0,0 +1 @@
0

View File

@ -1,5 +1,6 @@
Testing high level script functionality
File printf.in tested ok
File read.in tested ok
File test1.in tested ok
File test2.in tested ok
File test3.in tested ok