fish-shell/builtin_test.cpp
2012-03-15 20:40:57 -07:00

688 lines
26 KiB
C++

/** \file builtin_test.cpp Functions defining the test builtin
Functions used for implementing the test builtin.
Implemented from scratch (yes, really) by way of IEEE 1003.1 as reference.
*/
#include "config.h"
#include "common.h"
#include "builtin.h"
#include "wutil.h"
#include "proc.h"
#include <sys/stat.h>
#include <memory>
enum {
BUILTIN_TEST_SUCCESS = STATUS_BUILTIN_OK,
BUILTIN_TEST_FAIL = STATUS_BUILTIN_ERROR
};
int builtin_test( parser_t &parser, wchar_t **argv );
static const wchar_t * const condstr[] = {
L"!", L"&&", L"||", L"==", L"!=", L"<", L">", L"-nt", L"-ot", L"-ef", L"-eq",
L"-ne", L"-lt", L"-gt", L"-le", L"-ge", L"=~"
};
namespace test_expressions {
enum token_t {
test_unknown, // arbitrary string
test_bang, // "!", inverts sense
test_filetype_b, // "-b", for block special files
test_filetype_c, // "-c" for character special files
test_filetype_d, // "-d" for directories
test_filetype_e, // "-e" for files that exist
test_filetype_f, // "-f" for for regular files
test_filetype_g, // "-g" for set-group-id
test_filetype_h, // "-h" for symbolic links
test_filetype_L, // "-L", same as -h
test_filetype_p, // "-p", for FIFO
test_filetype_S, // "-S", socket
test_filesize_s, // "-s", size greater than zero
test_filedesc_t, // "-t", whether the fd is associated with a terminal
test_fileperm_r, // "-r", read permission
test_fileperm_u, // "-u", whether file is setuid
test_fileperm_w, // "-w", whether file write permission is allowed
test_fileperm_x, // "-x", whether file execute/search is allowed
test_string_n, // "-n", non-empty string
test_string_z, // "-z", true if length of string is 0
test_string_equal, // "=", true if strings are identical
test_string_not_equal, // "!=", true if strings are not identical
test_number_equal, // "-eq", true if numbers are equal
test_number_not_equal, // "-ne", true if numbers are not equal
test_number_greater, // "-gt", true if first number is larger than second
test_number_greater_equal, // "-ge", true if first number is at least second
test_number_lesser, // "-lt", true if first number is smaller than second
test_number_lesser_equal, // "-le", true if first number is at most second
test_combine_and, // "-a", true if left and right are both true
test_combine_or, // "-o", true if either left or right is true
test_paren_open, // "(", open paren
test_paren_close, // ")", close paren
};
static bool binary_primary_evaluate(test_expressions::token_t token, const wcstring &left, const wcstring &right, wcstring_list_t &errors);
static bool unary_primary_evaluate(test_expressions::token_t token, const wcstring &arg, wcstring_list_t &errors);
enum {
UNARY_PRIMARY = 1 << 0,
BINARY_PRIMARY = 1 << 1
};
static const struct token_info_t { token_t tok; const wchar_t *string; unsigned int flags; } token_infos[] =
{
{test_unknown, L"", 0},
{test_bang, L"!", 0},
{test_filetype_b, L"-b", UNARY_PRIMARY},
{test_filetype_c, L"-c", UNARY_PRIMARY},
{test_filetype_d, L"-d", UNARY_PRIMARY},
{test_filetype_e, L"-e", UNARY_PRIMARY},
{test_filetype_f, L"-f", UNARY_PRIMARY},
{test_filetype_g, L"-g", UNARY_PRIMARY},
{test_filetype_h, L"-h", UNARY_PRIMARY},
{test_filetype_L, L"-L", UNARY_PRIMARY},
{test_filetype_p, L"-p", UNARY_PRIMARY},
{test_filetype_S, L"-S", UNARY_PRIMARY},
{test_filesize_s, L"-s", UNARY_PRIMARY},
{test_filedesc_t, L"-t", UNARY_PRIMARY},
{test_fileperm_r, L"-r", UNARY_PRIMARY},
{test_fileperm_u, L"-u", UNARY_PRIMARY},
{test_fileperm_w, L"-w", UNARY_PRIMARY},
{test_fileperm_x, L"-x", UNARY_PRIMARY},
{test_string_n, L"-n", UNARY_PRIMARY},
{test_string_z, L"-z", UNARY_PRIMARY},
{test_string_equal, L"=", BINARY_PRIMARY},
{test_string_not_equal, L"!=", BINARY_PRIMARY},
{test_number_equal, L"-eq", BINARY_PRIMARY},
{test_number_not_equal, L"-ne", BINARY_PRIMARY},
{test_number_greater, L"-gt", BINARY_PRIMARY},
{test_number_greater_equal, L"-ge", BINARY_PRIMARY},
{test_number_lesser, L"-lt", BINARY_PRIMARY},
{test_number_lesser_equal, L"-le", BINARY_PRIMARY},
{test_combine_and, L"-a", 0},
{test_combine_or, L"-o", 0},
{test_paren_open, L"(", 0},
{test_paren_close, L")", 0}
};
const token_info_t *token_for_string(const wcstring &str) {
for (size_t i=0; i < sizeof token_infos / sizeof *token_infos; i++) {
if (str == token_infos[i].string) {
return &token_infos[i];
}
}
return &token_infos[0];
}
/* Grammar.
<expr> = <combining_expr>
<combining_expr> = <unary_expr> and/or <combining_expr> |
<combining_expr>
<unary_expr> = bang <unary_expr> |
<primary>
<primary> = <unary_primary> arg |
arg <binary_primary> arg |
'(' <expr> ')'
*/
class expression;
class test_parser {
private:
wcstring_list_t strings;
wcstring_list_t errors;
expression *error(const wchar_t *fmt, ...);
void add_error(const wchar_t *fmt, ...);
const wcstring &arg(unsigned int idx) { return strings.at(idx); }
public:
test_parser(const wcstring_list_t &val) : strings(val)
{ }
expression *parse_expression(unsigned int start, unsigned int end);
expression *parse_combining_expression(unsigned int start, unsigned int end);
expression *parse_unary_expression(unsigned int start, unsigned int end);
expression *parse_primary(unsigned int start, unsigned int end);
expression *parse_parenthentical(unsigned int start, unsigned int end);
expression *parse_unary_primary(unsigned int start, unsigned int end);
expression *parse_binary_primary(unsigned int start, unsigned int end);
static expression *parse_args(const wcstring_list_t &args, wcstring &err);
};
struct range_t {
unsigned int start;
unsigned int end;
range_t(unsigned s, unsigned e) : start(s), end(e) { }
};
/* Base class for expressions */
class expression {
protected:
expression(token_t what, range_t where) : token(what), range(where) { }
public:
const token_t token;
range_t range;
virtual ~expression() { }
// evaluate returns true if the expression is true (i.e. BUILTIN_TEST_SUCCESS)
virtual bool evaluate(wcstring_list_t &errors) = 0;
};
typedef std::auto_ptr<expression> expr_ref_t;
/* Single argument like -n foo */
class unary_primary : public expression {
public:
wcstring arg;
unary_primary(token_t tok, range_t where, const wcstring &what) : expression(tok, where), arg(what) { }
bool evaluate(wcstring_list_t &errors);
};
/* Two argument primary like foo != bar */
class binary_primary : public expression {
public:
wcstring arg_left;
wcstring arg_right;
binary_primary(token_t tok, range_t where, const wcstring &left, const wcstring &right) : expression(tok, where), arg_left(left), arg_right(right)
{ }
bool evaluate(wcstring_list_t &errors);
};
/* Unary operator like bang */
class unary_operator : public expression {
public:
expr_ref_t subject;
unary_operator(token_t tok, range_t where, expr_ref_t &exp) : expression(tok, where), subject(exp) { }
bool evaluate(wcstring_list_t &errors);
};
/* Combining expression. Contains a list of AND or OR expressions. It takes more than two so that we don't have to worry about precedence in the parser. */
class combining_expression : public expression {
public:
const std::vector<expression *> subjects;
const std::vector<token_t> combiners;
combining_expression(token_t tok, range_t where, const std::vector<expression *> &exprs, const std::vector<token_t> &combs) : expression(tok, where), subjects(exprs), combiners(combs)
{
/* We should have one more subject than combiner */
assert(subjects.size() == combiners.size() + 1);
}
/* We are responsible for destroying our expressions */
virtual ~combining_expression() {
for (size_t i=0; i < subjects.size(); i++) {
delete subjects[i];
}
}
bool evaluate(wcstring_list_t &errors);
};
/* Parenthetical expression */
class parenthetical_expression : public expression {
public:
expr_ref_t contents;
parenthetical_expression(token_t tok, range_t where, expr_ref_t &expr) : expression(tok, where), contents(expr) { }
virtual bool evaluate(wcstring_list_t &errors);
};
void test_parser::add_error(const wchar_t *fmt, ...) {
assert(fmt != NULL);
va_list va;
va_start(va, fmt);
this->errors.push_back(vformat_string(fmt, va));
va_end(va);
}
expression *test_parser::error(const wchar_t *fmt, ...) {
assert(fmt != NULL);
va_list va;
va_start(va, fmt);
this->errors.push_back(vformat_string(fmt, va));
va_end(va);
return NULL;
}
expression *test_parser::parse_unary_expression(unsigned int start, unsigned int end) {
if (start >= end) {
return error(L"Missing argument at index %u", start);
}
token_t tok = token_for_string(arg(start))->tok;
if (tok == test_bang) {
expr_ref_t subject(parse_unary_expression(start + 1, end));
if (subject.get()) {
return new unary_operator(tok, range_t(start, subject->range.end), subject);
} else {
return NULL;
}
} else {
return parse_primary(start, end);
}
}
/* Parse a combining expression (AND, OR) */
expression *test_parser::parse_combining_expression(unsigned int start, unsigned int end) {
if (start >= end)
return NULL;
std::vector<expression *> subjects;
std::vector<token_t> combiners;
unsigned int idx = start;
while (idx < end) {
if (! subjects.empty()) {
/* This is not the first expression, so we expect a combiner. */
token_t combiner = token_for_string(arg(idx))->tok;
if (combiner != test_combine_and && combiner != test_combine_or) {
/* Not a combiner, we're done */
break;
}
combiners.push_back(combiner);
idx++;
}
/* Parse another expression */
expression *expr = parse_unary_expression(idx, end);
if (! expr) {
add_error(L"Missing argument at index %u", idx);
break;
}
/* Go to the end of this expression */
idx = expr->range.end;
subjects.push_back(expr);
}
if (! subjects.empty()) {
/* Our new expression takes ownership of all expressions we created. The token we pass is irrelevant. */
return new combining_expression(test_combine_and, range_t(start, idx), subjects, combiners);
} else {
/* No subjects */
return NULL;
}
}
expression *test_parser::parse_unary_primary(unsigned int start, unsigned int end) {
/* We need two arguments */
if (start >= end) {
return error(L"Missing argument at index %u", start);
}
if (start + 1 >= end) {
return error(L"Missing argument at index %u", start + 1);
}
/* All our unary primaries are prefix, so the operator is at start. */
const token_info_t *info = token_for_string(arg(start));
if (! (info->flags & UNARY_PRIMARY))
return NULL;
return new unary_primary(info->tok, range_t(start, start + 2), arg(start + 1));
}
expression *test_parser::parse_binary_primary(unsigned int start, unsigned int end) {
/* We need three arguments */
for (unsigned int idx = start; idx < start + 3; idx++) {
if (idx >= end) {
return error(L"Missing argument at index %u", idx);
}
}
/* All our binary primaries are infix, so the operator is at start + 1. */
const token_info_t *info = token_for_string(arg(start + 1));
if (! (info->flags & BINARY_PRIMARY))
return NULL;
return new binary_primary(info->tok, range_t(start, start + 3), arg(start), arg(start + 2));
}
expression *test_parser::parse_parenthentical(unsigned int start, unsigned int end) {
/* We need at least three arguments: open paren, argument, close paren */
if (start + 3 >= end)
return NULL;
/* Must start with an open expression */
const token_info_t *open_paren = token_for_string(arg(start));
if (open_paren->tok != test_paren_open)
return NULL;
/* Parse a subexpression */
expression *subexr_ptr = parse_expression(start + 1, end);
if (! subexr_ptr)
return NULL;
expr_ref_t subexpr(subexr_ptr);
/* Parse a close paren */
unsigned close_index = subexpr->range.end;
assert(close_index <= end);
if (close_index == end) {
return error(L"Missing close paren at index %u", close_index);
}
const token_info_t *close_paren = token_for_string(arg(close_index));
if (close_paren->tok != test_paren_close) {
return error(L"Expected close paren at index %u", close_index);
}
/* Success */
return new parenthetical_expression(test_paren_open, range_t(start, close_index+1), subexpr);
}
expression *test_parser::parse_primary(unsigned int start, unsigned int end) {
if (start >= end) {
return error(L"Missing argument at index %u", start);
}
expression *expr = NULL;
if (! expr) expr = parse_parenthentical(start, end);
if (! expr) expr = parse_unary_primary(start, end);
if (! expr) expr = parse_binary_primary(start, end);
return expr;
}
expression *test_parser::parse_expression(unsigned int start, unsigned int end) {
if (start >= end) {
return error(L"Missing argument at index %u", start);
}
return parse_combining_expression(start, end);
}
expression *test_parser::parse_args(const wcstring_list_t &args, wcstring &err) {
/* Empty list and one-arg list should be handled by caller */
assert(args.size() > 1);
test_parser parser(args);
expression *result = parser.parse_expression(0, (unsigned int)args.size());
/* Handle errors */
bool errored = false;
for (size_t i = 0; i < parser.errors.size(); i++) {
err.append(L"test: ");
err.append(parser.errors.at(i));
err.push_back(L'\n');
errored = true;
// For now we only show the first error
break;
}
if (! errored && result) {
/* It's also an error if there are any unused arguments. This is not detected by parse_expression() */
assert(result->range.end <= args.size());
if (result->range.end < args.size()) {
append_format(err, L"test: unexpected argument at index %lu: '%ls'\n", (unsigned long)result->range.end, args.at(result->range.end).c_str());
delete result;
result = NULL;
errored = true;
}
}
return result;
}
bool unary_primary::evaluate(wcstring_list_t &errors) {
return unary_primary_evaluate(token, arg, errors);
}
bool binary_primary::evaluate(wcstring_list_t &errors) {
return binary_primary_evaluate(token, arg_left, arg_right, errors);
}
bool unary_operator::evaluate(wcstring_list_t &errors) {
switch (token) {
case test_bang:
assert(subject.get());
return ! subject->evaluate(errors);
default:
errors.push_back(format_string(L"Unknown token type in %s", __func__));
return false;
}
}
bool combining_expression::evaluate(wcstring_list_t &errors) {
switch (token) {
case test_combine_and:
case test_combine_or:
{
/* One-element case */
if (subjects.size() == 1)
return subjects.at(0)->evaluate(errors);
/* Evaluate our lists, remembering that AND has higher precedence than OR. We can visualize this as a sequence of OR expressions of AND expressions. */
assert(combiners.size() + 1 == subjects.size());
assert(! subjects.empty());
size_t idx = 0, max = subjects.size();
bool or_result = false;
while (idx < max) {
if (or_result) {
/* Short circuit */
break;
}
/* Evaluate a stream of AND starting at given subject index. It may only have one element. */
bool and_result = true;
for (; idx < max; idx++) {
/* Evaluate it, short-circuiting */
and_result = and_result && subjects.at(idx)->evaluate(errors);
/* If the combiner at this index (which corresponding to how we combine with the next subject) is not AND, then exit the loop */
if (idx + 1 < max && combiners.at(idx) != test_combine_and) {
idx++;
break;
}
}
/* OR it in */
or_result = or_result || and_result;
}
return or_result;
}
default:
errors.push_back(format_string(L"Unknown token type in %s", __func__));
return BUILTIN_TEST_FAIL;
}
}
bool parenthetical_expression::evaluate(wcstring_list_t &errors) {
return contents->evaluate(errors);
}
/* IEEE 1003.1 says nothing about what it means for two strings to be "algebraically equal". For example, should we interpret 0x10 as 0, 10, or 16? Here we use only base 10 and use wcstoll, which allows for leading + and -, and leading whitespace. This matches bash. */
static bool parse_number(const wcstring &arg, long long *out) {
const wchar_t *str = arg.c_str();
wchar_t *endptr = NULL;
*out = wcstoll(str, &endptr, 10);
return endptr && *endptr == L'\0';
}
static bool binary_primary_evaluate(test_expressions::token_t token, const wcstring &left, const wcstring &right, wcstring_list_t &errors) {
using namespace test_expressions;
long long left_num, right_num;
switch (token) {
case test_string_equal:
return left == right;
case test_string_not_equal:
return left != right;
case test_number_equal:
return parse_number(left, &left_num) && parse_number(right, &right_num) && left_num == right_num;
case test_number_not_equal:
return parse_number(left, &left_num) && parse_number(right, &right_num) && left_num != right_num;
case test_number_greater:
return parse_number(left, &left_num) && parse_number(right, &right_num) && left_num > right_num;
case test_number_greater_equal:
return parse_number(left, &left_num) && parse_number(right, &right_num) && left_num >= right_num;
case test_number_lesser:
return parse_number(left, &left_num) && parse_number(right, &right_num) && left_num < right_num;
case test_number_lesser_equal:
return parse_number(left, &left_num) && parse_number(right, &right_num) && left_num <= right_num;
default:
errors.push_back(format_string(L"Unknown token type in %s", __func__));
return false;
}
}
static bool unary_primary_evaluate(test_expressions::token_t token, const wcstring &arg, wcstring_list_t &errors) {
using namespace test_expressions;
struct stat buf;
long long num;
switch (token) {
case test_filetype_b: // "-b", for block special files
return !wstat(arg, &buf) && S_ISBLK(buf.st_mode);
case test_filetype_c: // "-c" for character special files
return !wstat(arg, &buf) && S_ISCHR(buf.st_mode);
case test_filetype_d: // "-d" for directories
return !wstat(arg, &buf) && S_ISDIR(buf.st_mode);
case test_filetype_e: // "-e" for files that exist
return !wstat(arg, &buf);
case test_filetype_f: // "-f" for for regular files
return !wstat(arg, &buf) && S_ISREG(buf.st_mode);
case test_filetype_g: // "-g" for set-group-id
return !wstat(arg, &buf) && (S_ISGID & buf.st_mode);
case test_filetype_h: // "-h" for symbolic links
case test_filetype_L: // "-L", same as -h
return !lwstat(arg, &buf) && S_ISLNK(buf.st_mode);
case test_filetype_p: // "-p", for FIFO
return !wstat(arg, &buf) && S_ISFIFO(buf.st_mode);
case test_filetype_S: // "-S", socket
return !wstat(arg, &buf) && S_ISSOCK(buf.st_mode);
case test_filesize_s: // "-s", size greater than zero
return !wstat(arg, &buf) && buf.st_size > 0;
case test_filedesc_t: // "-t", whether the fd is associated with a terminal
return parse_number(arg, &num) && num == (int)num && isatty((int)num);
case test_fileperm_r: // "-r", read permission
return !waccess(arg, R_OK);
case test_fileperm_u: // "-u", whether file is setuid
return !wstat(arg, &buf) && (S_ISUID & buf.st_mode);
case test_fileperm_w: // "-w", whether file write permission is allowed
return !waccess(arg, W_OK);
case test_fileperm_x: // "-x", whether file execute/search is allowed
return !waccess(arg, X_OK);
case test_string_n: // "-n", non-empty string
return ! arg.empty();
case test_string_z: // "-z", true if length of string is 0
return arg.empty();
default:
errors.push_back(format_string(L"Unknown token type in %s", __func__));
return false;
}
}
};
/*
* Evaluate a conditional expression given the arguments.
* If fromtest is set, the caller is the test or [ builtin;
* with the pointer giving the name of the command.
* for POSIX conformance this supports a more limited range
* of functionality.
*
* Return status is the final shell status, i.e. 0 for true,
* 1 for false and 2 for error.
*/
int builtin_test( parser_t &parser, wchar_t **argv )
{
using namespace test_expressions;
/* The first argument should be the name of the command ('test') */
if (! argv[0])
return BUILTIN_TEST_FAIL;
size_t argc = 0;
while (argv[argc + 1])
argc++;
const wcstring_list_t args(argv + 1, argv + 1 + argc);
if (argc == 0) {
// Per 1003.1, exit false
return BUILTIN_TEST_FAIL;
} else if (argc == 1) {
// Per 1003.1, exit true if the arg is non-empty
return args.at(0).empty() ? BUILTIN_TEST_FAIL : BUILTIN_TEST_SUCCESS;
} else {
// Try parsing. If expr is not nil, we are responsible for deleting it.
wcstring err;
expression *expr = test_parser::parse_args(args, err);
if (! expr) {
#if 0
printf("Oops! test was given args:\n");
for (size_t i=0; i < argc; i++) {
printf("\t%ls\n", args.at(i).c_str());
}
printf("and returned parse error: %ls\n", err.c_str());
#endif
builtin_show_error(err);
return BUILTIN_TEST_FAIL;
} else {
wcstring_list_t eval_errors;
bool result = expr->evaluate(eval_errors);
if (! eval_errors.empty()) {
printf("test returned eval errors:\n");
for (size_t i=0; i < eval_errors.size(); i++) {
printf("\t%ls\n", eval_errors.at(i).c_str());
}
}
delete expr;
return result ? BUILTIN_TEST_SUCCESS : BUILTIN_TEST_FAIL;
}
}
return 1;
}