Port test_tokenizer

This commit is contained in:
Johannes Altmanninger 2023-09-17 13:42:24 +02:00
parent a809672412
commit 408161f4d6
5 changed files with 147 additions and 165 deletions

View File

@ -15,6 +15,7 @@ mod redirection_ffi {
type wcharz_t = super::wcharz_t;
}
#[derive(Debug)]
enum RedirectionMode {
overwrite, // normal redirection: > file.txt
append, // appending redirection: >> file.txt

View File

@ -3,3 +3,5 @@ mod common;
mod fd_monitor;
#[cfg(test)]
mod string_escape;
#[cfg(test)]
mod tokenizer;

View File

@ -0,0 +1,141 @@
use crate::redirection::RedirectionMode;
use crate::tokenizer::{PipeOrRedir, TokFlags, TokenType, Tokenizer, TokenizerError};
use crate::wchar::prelude::*;
use libc::{STDERR_FILENO, STDOUT_FILENO};
#[test]
fn test_tokenizer() {
{
let s = L!("alpha beta");
let mut t = Tokenizer::new(s, TokFlags(0));
let token = t.next(); // alpha
assert!(token.is_some());
let token = token.unwrap();
assert_eq!(token.type_, TokenType::string);
assert_eq!(token.length, 5);
assert_eq!(t.text_of(&token), "alpha");
let token = t.next(); // beta
assert!(token.is_some());
let token = token.unwrap();
assert_eq!(token.type_, TokenType::string);
assert_eq!(token.offset, 6);
assert_eq!(token.length, 4);
assert_eq!(t.text_of(&token), "beta");
assert!(t.next().is_none());
}
let s = L!(concat!(
"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ",
"){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect ",
"&| &> ",
"&&& ||| ",
"&& || & |",
"Compress_Newlines\n \n\t\n \nInto_Just_One",
));
type tt = TokenType;
#[rustfmt::skip]
let types = [
tt::string, tt::redirect, tt::string, tt::redirect, tt::string, tt::string, tt::string,
tt::string, tt::string, tt::pipe, tt::redirect, tt::andand, tt::background, tt::oror,
tt::pipe, tt::andand, tt::oror, tt::background, tt::pipe, tt::string, tt::end, tt::string,
];
{
let t = Tokenizer::new(s, TokFlags(0));
let mut actual_types = vec![];
for token in t {
actual_types.push(token.type_);
}
assert_eq!(&actual_types[..], types);
}
// Test some errors.
{
let mut t = Tokenizer::new(L!("abc\\"), TokFlags(0));
let token = t.next().unwrap();
assert_eq!(token.type_, TokenType::error);
assert_eq!(token.error, TokenizerError::unterminated_escape);
assert_eq!(token.error_offset_within_token, 3);
}
{
let mut t = Tokenizer::new(L!("abc )defg(hij"), TokFlags(0));
let _token = t.next().unwrap();
let token = t.next().unwrap();
assert_eq!(token.type_, TokenType::error);
assert_eq!(token.error, TokenizerError::closing_unopened_subshell);
assert_eq!(token.offset, 4);
assert_eq!(token.error_offset_within_token, 0);
}
{
let mut t = Tokenizer::new(L!("abc defg(hij (klm)"), TokFlags(0));
let _token = t.next().unwrap();
let token = t.next().unwrap();
assert_eq!(token.type_, TokenType::error);
assert_eq!(token.error, TokenizerError::unterminated_subshell);
assert_eq!(token.error_offset_within_token, 4);
}
{
let mut t = Tokenizer::new(L!("abc defg[hij (klm)"), TokFlags(0));
let _token = t.next().unwrap();
let token = t.next().unwrap();
assert_eq!(token.type_, TokenType::error);
assert_eq!(token.error, TokenizerError::unterminated_slice);
assert_eq!(token.error_offset_within_token, 4);
}
// Test some redirection parsing.
macro_rules! pipe_or_redir {
($s:literal) => {
PipeOrRedir::try_from(L!($s)).unwrap()
};
}
assert!(pipe_or_redir!("|").is_pipe);
assert!(pipe_or_redir!("0>|").is_pipe);
assert_eq!(pipe_or_redir!("0>|").fd, 0);
assert!(pipe_or_redir!("2>|").is_pipe);
assert_eq!(pipe_or_redir!("2>|").fd, 2);
assert!(pipe_or_redir!(">|").is_pipe);
assert_eq!(pipe_or_redir!(">|").fd, STDOUT_FILENO);
assert!(!pipe_or_redir!(">").is_pipe);
assert_eq!(pipe_or_redir!(">").fd, STDOUT_FILENO);
assert_eq!(pipe_or_redir!("2>").fd, STDERR_FILENO);
assert_eq!(pipe_or_redir!("9999999999999>").fd, -1);
assert_eq!(pipe_or_redir!("9999999999999>&2").fd, -1);
assert_eq!(pipe_or_redir!("9999999999999>&2").is_valid(), false);
assert_eq!(pipe_or_redir!("9999999999999>&2").is_valid(), false);
assert!(pipe_or_redir!("&|").is_pipe);
assert!(pipe_or_redir!("&|").stderr_merge);
assert!(!pipe_or_redir!("&>").is_pipe);
assert!(pipe_or_redir!("&>").stderr_merge);
assert!(pipe_or_redir!("&>>").stderr_merge);
assert!(pipe_or_redir!("&>?").stderr_merge);
macro_rules! get_redir_mode {
($s:literal) => {
pipe_or_redir!($s).mode
};
}
assert_eq!(get_redir_mode!("<"), RedirectionMode::input);
assert_eq!(get_redir_mode!(">"), RedirectionMode::overwrite);
assert_eq!(get_redir_mode!("2>"), RedirectionMode::overwrite);
assert_eq!(get_redir_mode!(">>"), RedirectionMode::append);
assert_eq!(get_redir_mode!("2>>"), RedirectionMode::append);
assert_eq!(get_redir_mode!("2>?"), RedirectionMode::noclob);
assert_eq!(
get_redir_mode!("9999999999999999>?"),
RedirectionMode::noclob
);
assert_eq!(get_redir_mode!("2>&3"), RedirectionMode::fd);
assert_eq!(get_redir_mode!("3<&0"), RedirectionMode::fd);
assert_eq!(get_redir_mode!("3</tmp/filetxt"), RedirectionMode::input);
}

View File

@ -23,6 +23,7 @@ mod tokenizer_ffi {
}
/// Token types. XXX Why this isn't ParseTokenType, I'm not really sure.
#[derive(Debug)]
enum TokenType {
/// Error reading token
error,
@ -44,6 +45,7 @@ mod tokenizer_ffi {
comment,
}
#[derive(Debug, Eq, PartialEq)]
enum TokenizerError {
none,
unterminated_quote,
@ -1146,7 +1148,7 @@ impl PipeOrRedir {
// \return if we are "valid". Here "valid" means only that the source fd did not overflow.
// For example 99999999999> is invalid.
fn is_valid(&self) -> bool {
pub fn is_valid(&self) -> bool {
self.fd >= 0
}

View File

@ -530,169 +530,6 @@ static void test_convert_nulls() {
}
}
/// Test the tokenizer.
static void test_tokenizer() {
say(L"Testing tokenizer");
{
const wchar_t *str = L"alpha beta";
auto t = new_tokenizer(str, 0);
std::unique_ptr<tok_t> token{};
token = t->next(); // alpha
do_test(token);
do_test(token->type_ == token_type_t::string);
do_test(token->offset == 0);
do_test(token->length == 5);
do_test(*t->text_of(*token) == L"alpha");
token = t->next(); // beta
do_test(token);
do_test(token->type_ == token_type_t::string);
do_test(token->offset == 6);
do_test(token->length == 4);
do_test(*t->text_of(*token) == L"beta");
token = t->next();
do_test(!token);
}
const wchar_t *str =
L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells "
L"){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect "
L"&| &> "
L"&&& ||| "
L"&& || & |"
L"Compress_Newlines\n \n\t\n \nInto_Just_One";
using tt = token_type_t;
const token_type_t types[] = {
tt::string, tt::redirect, tt::string, tt::redirect, tt::string, tt::string,
tt::string, tt::string, tt::string, tt::pipe, tt::redirect, tt::andand,
tt::background, tt::oror, tt::pipe, tt::andand, tt::oror, tt::background,
tt::pipe, tt::string, tt::end, tt::string};
say(L"Test correct tokenization");
{
auto t = new_tokenizer(str, 0);
size_t i = 0;
while (auto token = t->next()) {
if (i >= sizeof types / sizeof *types) {
err(L"Too many tokens returned from tokenizer");
std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token->type_);
break;
}
if (types[i] != token->type_) {
err(L"Tokenization error:");
std::fwprintf(
stdout,
L"Token number %zu of string \n'%ls'\n, expected type %ld, got token type "
L"%ld\n",
i + 1, str, (long)types[i], (long)token->type_);
}
i++;
}
if (i < sizeof types / sizeof *types) {
err(L"Too few tokens returned from tokenizer");
}
}
// Test some errors.
{
auto t = new_tokenizer(L"abc\\", 0);
auto token = t->next();
do_test(token);
do_test(token->type_ == token_type_t::error);
do_test(token->error == tokenizer_error_t::unterminated_escape);
do_test(token->error_offset_within_token == 3);
}
{
auto t = new_tokenizer(L"abc )defg(hij", 0);
auto token = t->next();
do_test(token);
token = t->next();
do_test(token);
do_test(token->type_ == token_type_t::error);
do_test(token->error == tokenizer_error_t::closing_unopened_subshell);
do_test(token->offset == 4);
do_test(token->error_offset_within_token == 0);
}
{
auto t = new_tokenizer(L"abc defg(hij (klm)", 0);
auto token = t->next();
do_test(token);
token = t->next();
do_test(token);
do_test(token->type_ == token_type_t::error);
do_test(token->error == tokenizer_error_t::unterminated_subshell);
do_test(token->error_offset_within_token == 4);
}
{
auto t = new_tokenizer(L"abc defg[hij (klm)", 0);
auto token = t->next();
do_test(token);
token = t->next();
do_test(token);
do_test(token->type_ == token_type_t::error);
do_test(token->error == tokenizer_error_t::unterminated_slice);
do_test(token->error_offset_within_token == 4);
}
// Test some redirection parsing.
auto pipe_or_redir = [](const wchar_t *s) { return pipe_or_redir_from_string(s); };
do_test(pipe_or_redir(L"|")->is_pipe);
do_test(pipe_or_redir(L"0>|")->is_pipe);
do_test(pipe_or_redir(L"0>|")->fd == 0);
do_test(pipe_or_redir(L"2>|")->is_pipe);
do_test(pipe_or_redir(L"2>|")->fd == 2);
do_test(pipe_or_redir(L">|")->is_pipe);
do_test(pipe_or_redir(L">|")->fd == STDOUT_FILENO);
do_test(!pipe_or_redir(L">")->is_pipe);
do_test(pipe_or_redir(L">")->fd == STDOUT_FILENO);
do_test(pipe_or_redir(L"2>")->fd == STDERR_FILENO);
do_test(pipe_or_redir(L"9999999999999>")->fd == -1);
do_test(pipe_or_redir(L"9999999999999>&2")->fd == -1);
do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false);
do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false);
do_test(pipe_or_redir(L"&|")->is_pipe);
do_test(pipe_or_redir(L"&|")->stderr_merge);
do_test(!pipe_or_redir(L"&>")->is_pipe);
do_test(pipe_or_redir(L"&>")->stderr_merge);
do_test(pipe_or_redir(L"&>>")->stderr_merge);
do_test(pipe_or_redir(L"&>?")->stderr_merge);
auto get_redir_mode = [](const wchar_t *s) -> maybe_t<redirection_mode_t> {
if (auto redir = pipe_or_redir_from_string(s)) {
return redir->mode;
}
return none();
};
if (get_redir_mode(L"<") != redirection_mode_t::input)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L">") != redirection_mode_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"2>") != redirection_mode_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L">>") != redirection_mode_t::append)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"2>>") != redirection_mode_t::append)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"2>?") != redirection_mode_t::noclob)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"9999999999999999>?") != redirection_mode_t::noclob)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"2>&3") != redirection_mode_t::fd)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"3<&0") != redirection_mode_t::fd)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"3</tmp/filetxt") != redirection_mode_t::input)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
}
static void test_iothread() {
say(L"Testing iothreads");
std::atomic<int> shared_int{0};
@ -5552,7 +5389,6 @@ static const test_t s_tests[]{
{TEST_GROUP("convert_ascii"), test_convert_ascii},
{TEST_GROUP("perf_convert_ascii"), perf_convert_ascii, true},
{TEST_GROUP("convert_nulls"), test_convert_nulls},
{TEST_GROUP("tokenizer"), test_tokenizer},
{TEST_GROUP("iothread"), test_iothread},
{TEST_GROUP("pthread"), test_pthread},
{TEST_GROUP("debounce"), test_debounce},