Work towards refactoring tokenizer to be a real object

This commit is contained in:
ridiculousfish 2012-11-21 17:48:35 -08:00
parent e73be48d96
commit f545fb2491
12 changed files with 123 additions and 169 deletions

View File

@ -143,7 +143,6 @@ static void write_part(const wchar_t *begin,
int cut_at_cursor, int cut_at_cursor,
int tokenize) int tokenize)
{ {
tokenizer tok;
wcstring out; wcstring out;
wchar_t *buff; wchar_t *buff;
size_t pos; size_t pos;
@ -155,9 +154,8 @@ static void write_part(const wchar_t *begin,
buff = wcsndup(begin, end-begin); buff = wcsndup(begin, end-begin);
// fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end ); // fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
out.clear(); out.clear();
tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
for (tok_init(&tok, buff, TOK_ACCEPT_UNFINISHED); for (; tok_has_next(&tok);
tok_has_next(&tok);
tok_next(&tok)) tok_next(&tok))
{ {
if ((cut_at_cursor) && if ((cut_at_cursor) &&

View File

@ -1765,7 +1765,6 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
completer_t completer(cmd, type); completer_t completer(cmd, type);
const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end; const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end;
tokenizer tok;
const wchar_t *current_token=0, *prev_token=0; const wchar_t *current_token=0, *prev_token=0;
wcstring current_command; wcstring current_command;
int on_command=0; int on_command=0;
@ -1807,9 +1806,8 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
int had_cmd=0; int had_cmd=0;
int end_loop=0; int end_loop=0;
tok_init(&tok, buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
while (tok_has_next(&tok) && !end_loop) while (tok_has_next(&tok) && !end_loop)
{ {

View File

@ -84,7 +84,6 @@ static void insert_tabs(wcstring &out, int indent)
*/ */
static int indent(wcstring &out, const wcstring &in, int flags) static int indent(wcstring &out, const wcstring &in, int flags)
{ {
tokenizer tok;
int res=0; int res=0;
int is_command = 1; int is_command = 1;
int indent = 0; int indent = 0;
@ -92,8 +91,7 @@ static int indent(wcstring &out, const wcstring &in, int flags)
int prev_type = 0; int prev_type = 0;
int prev_prev_type = 0; int prev_prev_type = 0;
tok_init(&tok, in.c_str(), TOK_SHOW_COMMENTS); tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS);
for (; tok_has_next(&tok); tok_next(&tok)) for (; tok_has_next(&tok); tok_next(&tok))
{ {
int type = tok_last_type(&tok); int type = tok_last_type(&tok);

View File

@ -289,13 +289,12 @@ static void test_convert()
*/ */
static void test_tok() static void test_tok()
{ {
tokenizer t;
say(L"Testing tokenizer"); say(L"Testing tokenizer");
say(L"Testing invalid input"); say(L"Testing invalid input");
tok_init(&t, 0, 0); tokenizer_t t(NULL, 0);
if (tok_last_type(&t) != TOK_ERROR) if (tok_last_type(&t) != TOK_ERROR)
{ {
@ -326,14 +325,12 @@ static void test_tok()
const int types[] = const int types[] =
{ {
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END
} };
;
size_t i;
say(L"Test correct tokenization"); say(L"Test correct tokenization");
for (i=0, tok_init(&t, str, 0); i<(sizeof(types)/sizeof(int)); i++,tok_next(&t)) tokenizer_t t(str, 0);
{ for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) {
if (types[i] != tok_last_type(&t)) if (types[i] != tok_last_type(&t))
{ {
err(L"Tokenization error:"); err(L"Tokenization error:");

View File

@ -691,8 +691,8 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command
int arg_pos = -1; int arg_pos = -1;
bool had_cmd = false; bool had_cmd = false;
tokenizer tok; tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
for (tok_init(&tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok)) for (; tok_has_next(&tok); tok_next(&tok))
{ {
int last_type = tok_last_type(&tok); int last_type = tok_last_type(&tok);
@ -955,10 +955,8 @@ static void tokenize(const wchar_t * const buff, std::vector<int> &color, const
std::fill(color.begin(), color.end(), -1); std::fill(color.begin(), color.end(), -1);
tokenizer tok; tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
for (tok_init(&tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS); for (; tok_has_next(&tok); tok_next(&tok))
tok_has_next(&tok);
tok_next(&tok))
{ {
int last_type = tok_last_type(&tok); int last_type = tok_last_type(&tok);

View File

@ -1415,10 +1415,8 @@ void history_t::add_with_file_detection(const wcstring &str)
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
path_list_t potential_paths; path_list_t potential_paths;
tokenizer tokenizer; tokenizer_t tokenizer(str.c_str(), TOK_SQUASH_ERRORS);
for (tok_init(&tokenizer, str.c_str(), TOK_SQUASH_ERRORS); for (; tok_has_next(&tokenizer); tok_next(&tokenizer))
tok_has_next(&tokenizer);
tok_next(&tokenizer))
{ {
int type = tok_last_type(&tokenizer); int type = tok_last_type(&tokenizer);
if (type == TOK_STRING) if (type == TOK_STRING)

View File

@ -326,8 +326,6 @@ static void job_or_process_extent(const wchar_t *buff,
wchar_t *buffcpy; wchar_t *buffcpy;
int finished=0; int finished=0;
tokenizer tok;
CHECK(buff,); CHECK(buff,);
if (a) if (a)
@ -365,9 +363,8 @@ static void job_or_process_extent(const wchar_t *buff,
DIE_MEM(); DIE_MEM();
} }
for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED); tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
tok_has_next(&tok) && !finished; for (; tok_has_next(&tok) && !finished; tok_next(&tok))
tok_next(&tok))
{ {
int tok_begin = tok_get_pos(&tok); int tok_begin = tok_get_pos(&tok);
@ -440,8 +437,6 @@ void parse_util_token_extent(const wchar_t *buff,
long pos; long pos;
wchar_t *buffcpy; wchar_t *buffcpy;
tokenizer tok;
const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL; const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL;
CHECK(buff,); CHECK(buff,);
@ -474,9 +469,8 @@ void parse_util_token_extent(const wchar_t *buff,
DIE_MEM(); DIE_MEM();
} }
for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
tok_has_next(&tok); for (; tok_has_next(&tok); tok_next(&tok))
tok_next(&tok))
{ {
size_t tok_begin = tok_get_pos(&tok); size_t tok_begin = tok_get_pos(&tok);
size_t tok_end = tok_begin; size_t tok_end = tok_begin;
@ -711,9 +705,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
wchar_t last_quote = '\0'; wchar_t last_quote = '\0';
int unfinished; int unfinished;
tokenizer tok; tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
tok_init(&tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
for (; tok_has_next(&tok); tok_next(&tok)) for (; tok_has_next(&tok); tok_next(&tok))
{ {
if (tok_get_pos(&tok) > pos) if (tok_get_pos(&tok) > pos)

View File

@ -525,7 +525,6 @@ static int parser_is_pipe_forbidden(const wcstring &word)
*/ */
static const wchar_t *parser_find_end(const wchar_t * buff) static const wchar_t *parser_find_end(const wchar_t * buff)
{ {
tokenizer tok;
int had_cmd=0; int had_cmd=0;
int count = 0; int count = 0;
int error=0; int error=0;
@ -533,9 +532,8 @@ static const wchar_t *parser_find_end(const wchar_t * buff)
CHECK(buff, 0); CHECK(buff, 0);
for (tok_init(&tok, buff, 0); tokenizer_t tok(buff, 0);
tok_has_next(&tok) && !error; for (; tok_has_next(&tok) && !error; tok_next(&tok))
tok_next(&tok))
{ {
int last_type = tok_last_type(&tok); int last_type = tok_last_type(&tok);
switch (last_type) switch (last_type)
@ -796,7 +794,6 @@ void parser_t::print_errors_stderr()
int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args) int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
{ {
tokenizer tok;
expand_flags_t eflags = 0; expand_flags_t eflags = 0;
if (! show_errors) if (! show_errors)
@ -808,8 +805,8 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
eval_args may be called while evaulating another command, so we eval_args may be called while evaulating another command, so we
save the previous tokenizer and restore it on exit save the previous tokenizer and restore it on exit
*/ */
tokenizer *previous_tokenizer=current_tokenizer; tokenizer_t * const previous_tokenizer = current_tokenizer;
int previous_pos=current_tokenizer_pos; const int previous_pos = current_tokenizer_pos;
int do_loop=1; int do_loop=1;
CHECK(line, 1); CHECK(line, 1);
@ -819,10 +816,10 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
if (this->parser_type == PARSER_TYPE_GENERAL) if (this->parser_type == PARSER_TYPE_GENERAL)
proc_push_interactive(0); proc_push_interactive(0);
tokenizer_t tok(line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
current_tokenizer = &tok; current_tokenizer = &tok;
current_tokenizer_pos = 0; current_tokenizer_pos = 0;
tok_init(&tok, line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
error_code=0; error_code=0;
for (; do_loop && tok_has_next(&tok) ; tok_next(&tok)) for (; do_loop && tok_has_next(&tok) ; tok_next(&tok))
@ -1319,7 +1316,7 @@ job_t *parser_t::job_get_from_pid(int pid)
*/ */
void parser_t::parse_job_argument_list(process_t *p, void parser_t::parse_job_argument_list(process_t *p,
job_t *j, job_t *j,
tokenizer *tok, tokenizer_t *tok,
std::vector<completion_t> &args, std::vector<completion_t> &args,
bool unskip) bool unskip)
{ {
@ -1718,7 +1715,7 @@ f
*/ */
int parser_t::parse_job(process_t *p, int parser_t::parse_job(process_t *p,
job_t *j, job_t *j,
tokenizer *tok) tokenizer_t *tok)
{ {
std::vector<completion_t> args; // The list that will become the argc array for the program std::vector<completion_t> args; // The list that will become the argc array for the program
int use_function = 1; // May functions be considered when checking what action this command represents int use_function = 1; // May functions be considered when checking what action this command represents
@ -2185,7 +2182,6 @@ int parser_t::parse_job(process_t *p,
const wchar_t *end=parser_find_end(tok_string(tok) + const wchar_t *end=parser_find_end(tok_string(tok) +
current_tokenizer_pos); current_tokenizer_pos);
tokenizer subtok;
int make_sub_block = j->first_process != p; int make_sub_block = j->first_process != p;
if (!end) if (!end)
@ -2202,9 +2198,8 @@ int parser_t::parse_job(process_t *p,
{ {
int done=0; int done=0;
for (tok_init(&subtok, end, 0); tokenizer_t subtok(end, 0);
!done && tok_has_next(&subtok); for (; ! done && tok_has_next(&subtok); tok_next(&subtok))
tok_next(&subtok))
{ {
switch (tok_last_type(&subtok)) switch (tok_last_type(&subtok))
@ -2388,7 +2383,7 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc
\param tok The tokenizer to read tokens from \param tok The tokenizer to read tokens from
*/ */
void parser_t::eval_job(tokenizer *tok) void parser_t::eval_job(tokenizer_t *tok)
{ {
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
job_t *j; job_t *j;
@ -2630,7 +2625,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
const wchar_t * const cmd = cmdStr.c_str(); const wchar_t * const cmd = cmdStr.c_str();
size_t forbid_count; size_t forbid_count;
int code; int code;
tokenizer *previous_tokenizer=current_tokenizer; tokenizer_t *previous_tokenizer=current_tokenizer;
block_t *start_current_block = current_block; block_t *start_current_block = current_block;
/* Record the current chain so we can put it back later */ /* Record the current chain so we can put it back later */
@ -2676,8 +2671,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
this->push_block(new scope_block_t(block_type)); this->push_block(new scope_block_t(block_type));
current_tokenizer = new tokenizer; current_tokenizer = new tokenizer_t(cmd, 0);
tok_init(current_tokenizer, cmd, 0);
error_code = 0; error_code = 0;
@ -2907,19 +2901,17 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *prefix) int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *prefix)
{ {
tokenizer tok; tokenizer_t *const previous_tokenizer = current_tokenizer;
tokenizer *previous_tokenizer = current_tokenizer; const int previous_pos = current_tokenizer_pos;
int previous_pos = current_tokenizer_pos;
int do_loop = 1; int do_loop = 1;
int err = 0; int err = 0;
CHECK(buff, 1); CHECK(buff, 1);
current_tokenizer = &tok;
for (tok_init(&tok, buff, 0); tokenizer_t tok(buff, 0);
do_loop && tok_has_next(&tok); current_tokenizer = &tok;
tok_next(&tok)) for (; do_loop && tok_has_next(&tok); tok_next(&tok))
{ {
current_tokenizer_pos = tok_get_pos(&tok); current_tokenizer_pos = tok_get_pos(&tok);
switch (tok_last_type(&tok)) switch (tok_last_type(&tok))
@ -2970,7 +2962,7 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre
tok_destroy(&tok); tok_destroy(&tok);
current_tokenizer=previous_tokenizer; current_tokenizer = previous_tokenizer;
current_tokenizer_pos = previous_pos; current_tokenizer_pos = previous_pos;
error_code=0; error_code=0;
@ -2985,7 +2977,6 @@ int parser_t::test(const wchar_t * buff,
{ {
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
tokenizer tok;
/* /*
Set to one if a command name has been given for the currently Set to one if a command name has been given for the currently
parsed process specification parsed process specification
@ -2994,8 +2985,8 @@ int parser_t::test(const wchar_t * buff,
int err=0; int err=0;
int unfinished = 0; int unfinished = 0;
tokenizer *previous_tokenizer=current_tokenizer; tokenizer_t * const previous_tokenizer=current_tokenizer;
int previous_pos=current_tokenizer_pos; const int previous_pos=current_tokenizer_pos;
int block_pos[BLOCK_MAX_COUNT] = {}; int block_pos[BLOCK_MAX_COUNT] = {};
block_type_t block_type[BLOCK_MAX_COUNT] = {}; block_type_t block_type[BLOCK_MAX_COUNT] = {};
@ -3043,11 +3034,10 @@ int parser_t::test(const wchar_t * buff,
} }
tokenizer_t tok(buff, 0);
current_tokenizer = &tok; current_tokenizer = &tok;
for (tok_init(&tok, buff, 0); for (;; tok_next(&tok))
;
tok_next(&tok))
{ {
current_tokenizer_pos = tok_get_pos(&tok); current_tokenizer_pos = tok_get_pos(&tok);

View File

@ -295,7 +295,7 @@ struct profile_item_t
wcstring cmd; wcstring cmd;
}; };
struct tokenizer; struct tokenizer_t;
class parser_t class parser_t
{ {
@ -316,7 +316,7 @@ private:
wcstring err_buff; wcstring err_buff;
/** Pointer to the current tokenizer */ /** Pointer to the current tokenizer */
tokenizer *current_tokenizer; tokenizer_t *current_tokenizer;
/** String for representing the current line */ /** String for representing the current line */
wcstring lineinfo; wcstring lineinfo;
@ -344,10 +344,10 @@ private:
parser_t(const parser_t&); parser_t(const parser_t&);
parser_t& operator=(const parser_t&); parser_t& operator=(const parser_t&);
void parse_job_argument_list(process_t *p, job_t *j, tokenizer *tok, std::vector<completion_t>&, bool); void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector<completion_t>&, bool);
int parse_job(process_t *p, job_t *j, tokenizer *tok); int parse_job(process_t *p, job_t *j, tokenizer_t *tok);
void skipped_exec(job_t * j); void skipped_exec(job_t * j);
void eval_job(tokenizer *tok); void eval_job(tokenizer_t *tok);
int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset); int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset);
void print_errors(wcstring &target, const wchar_t *prefix); void print_errors(wcstring &target, const wchar_t *prefix);
void print_errors_stderr(); void print_errors_stderr();

View File

@ -1821,7 +1821,6 @@ static void handle_token_history(int forward, int reset)
const wchar_t *str=0; const wchar_t *str=0;
long current_pos; long current_pos;
tokenizer tok;
if (reset) if (reset)
{ {
@ -1895,10 +1894,8 @@ static void handle_token_history(int forward, int reset)
{ {
//debug( 3, L"new '%ls'", data->token_history_buff.c_str() ); //debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
for (tok_init(&tok, data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED); for (; tok_has_next(&tok); tok_next(&tok))
tok_has_next(&tok);
tok_next(&tok))
{ {
switch (tok_last_type(&tok)) switch (tok_last_type(&tok))
{ {

View File

@ -83,7 +83,7 @@ static const wchar_t *tok_desc[] =
\return 0 if the system could not provide the memory needed, and 1 otherwise. \return 0 if the system could not provide the memory needed, and 1 otherwise.
*/ */
static int check_size(tokenizer *tok, size_t len) static int check_size(tokenizer_t *tok, size_t len)
{ {
if (tok->last_len <= len) if (tok->last_len <= len)
{ {
@ -103,7 +103,7 @@ static int check_size(tokenizer *tok, size_t len)
/** /**
Set the latest tokens string to be the specified error message Set the latest tokens string to be the specified error message
*/ */
static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_message) static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
{ {
tok->last_type = TOK_ERROR; tok->last_type = TOK_ERROR;
tok->error = error_type; tok->error = error_type;
@ -117,13 +117,13 @@ static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_
wcscpy(tok->last, error_message); wcscpy(tok->last, error_message);
} }
int tok_get_error(tokenizer *tok) int tok_get_error(tokenizer_t *tok)
{ {
return tok->error; return tok->error;
} }
void tok_init(tokenizer *tok, const wchar_t *b, int flags) tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last(NULL), last_type(0), last_len(0), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0)
{ {
/* We can only generate error messages on the main thread due to wgettext() thread safety issues. */ /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */
@ -132,33 +132,28 @@ void tok_init(tokenizer *tok, const wchar_t *b, int flags)
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
} }
CHECK(tok,);
memset(tok, 0, sizeof(tokenizer));
CHECK(b,); CHECK(b,);
tok->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED); this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
tok->show_comments = !!(flags & TOK_SHOW_COMMENTS); this->show_comments = !!(flags & TOK_SHOW_COMMENTS);
tok->squash_errors = !!(flags & TOK_SQUASH_ERRORS); this->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
tok->has_next=true;
tok->has_next = (*b != L'\0'); this->has_next = (*b != L'\0');
tok->orig_buff = tok->buff = b; this->orig_buff = this->buff = b;
tok->cached_lineno_offset = 0; this->cached_lineno_offset = 0;
tok->cached_lineno_count = 0; this->cached_lineno_count = 0;
tok_next(tok); tok_next(this);
} }
void tok_destroy(tokenizer *tok) void tok_destroy(tokenizer_t *tok)
{ {
CHECK(tok,); CHECK(tok,);
free(tok->last); free(tok->last);
} }
int tok_last_type(tokenizer *tok) int tok_last_type(tokenizer_t *tok)
{ {
CHECK(tok, TOK_ERROR); CHECK(tok, TOK_ERROR);
CHECK(tok->buff, TOK_ERROR); CHECK(tok->buff, TOK_ERROR);
@ -166,14 +161,14 @@ int tok_last_type(tokenizer *tok)
return tok->last_type; return tok->last_type;
} }
wchar_t *tok_last(tokenizer *tok) wchar_t *tok_last(tokenizer_t *tok)
{ {
CHECK(tok, 0); CHECK(tok, 0);
return tok->last; return tok->last;
} }
int tok_has_next(tokenizer *tok) int tok_has_next(tokenizer_t *tok)
{ {
/* /*
Return 1 on broken tokenizer Return 1 on broken tokenizer
@ -185,7 +180,7 @@ int tok_has_next(tokenizer *tok)
return tok->has_next; return tok->has_next;
} }
int tokenizer::line_number_of_character_at_offset(size_t offset) int tokenizer_t::line_number_of_character_at_offset(size_t offset)
{ {
// we want to return (one plus) the number of newlines at offsets less than the given offset // we want to return (one plus) the number of newlines at offsets less than the given offset
// cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset
@ -265,24 +260,28 @@ static int myal(wchar_t c)
/** /**
Read the next token as a string Read the next token as a string
*/ */
static void read_string(tokenizer *tok) static void read_string(tokenizer_t *tok)
{ {
const wchar_t *start; const wchar_t *start;
long len; long len;
int mode=0;
int do_loop=1; int do_loop=1;
int paran_count=0; int paran_count=0;
start = tok->buff; start = tok->buff;
bool is_first = true; bool is_first = true;
enum tok_mode_t {
mode_regular_text = 0, // regular text
mode_subshell = 1, // inside of subshell
mode_array_brackets = 2, // inside of array brackets
mode_array_brackets_and_subshell = 3 // inside of array brackets and subshell, like in '$foo[(ech'
} mode = mode_regular_text;
while (1) while (1)
{ {
if (!myal(*tok->buff)) if (!myal(*tok->buff))
{ {
// debug(1, L"%lc", *tok->buff );
if (*tok->buff == L'\\') if (*tok->buff == L'\\')
{ {
tok->buff++; tok->buff++;
@ -296,13 +295,13 @@ static void read_string(tokenizer *tok)
else else
{ {
/* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */ /* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */
do_loop = 0;
tok->buff--; tok->buff--;
do_loop = 0;
} }
} }
else if (*tok->buff == L'\n' && mode == 0) else if (*tok->buff == L'\n' && mode == mode_regular_text)
{ {
tok->buff--; tok->buff--;
do_loop = 0; do_loop = 0;
@ -312,33 +311,24 @@ static void read_string(tokenizer *tok)
tok->buff++; tok->buff++;
continue; continue;
} }
/*
The modes are as follows:
0: regular text
1: inside of subshell
2: inside of array brackets
3: inside of array brackets and subshell, like in '$foo[(ech'
*/
switch (mode) switch (mode)
{ {
case 0: case mode_regular_text:
{ {
switch (*tok->buff) switch (*tok->buff)
{ {
case L'(': case L'(':
{ {
paran_count=1; paran_count=1;
mode = 1; mode = mode_subshell;
break; break;
} }
case L'[': case L'[':
{ {
if (tok->buff != start) if (tok->buff != start)
mode=2; mode = mode_array_brackets;
break; break;
} }
@ -356,7 +346,7 @@ static void read_string(tokenizer *tok)
{ {
tok->buff += wcslen(tok->buff); tok->buff += wcslen(tok->buff);
if ((!tok->accept_unfinished)) if (! tok->accept_unfinished)
{ {
TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR); TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
return; return;
@ -369,7 +359,7 @@ static void read_string(tokenizer *tok)
default: default:
{ {
if (!tok_is_string_character(*(tok->buff), is_first)) if (! tok_is_string_character(*(tok->buff), is_first))
{ {
do_loop=0; do_loop=0;
} }
@ -378,8 +368,8 @@ static void read_string(tokenizer *tok)
break; break;
} }
case 3: case mode_array_brackets_and_subshell:
case 1: case mode_subshell:
switch (*tok->buff) switch (*tok->buff)
{ {
case L'\'': case L'\'':
@ -411,7 +401,7 @@ static void read_string(tokenizer *tok)
paran_count--; paran_count--;
if (paran_count == 0) if (paran_count == 0)
{ {
mode--; mode = (mode == mode_array_brackets_and_subshell ? mode_array_brackets : mode_regular_text);
} }
break; break;
case L'\0': case L'\0':
@ -419,16 +409,17 @@ static void read_string(tokenizer *tok)
break; break;
} }
break; break;
case 2:
case mode_array_brackets:
switch (*tok->buff) switch (*tok->buff)
{ {
case L'(': case L'(':
paran_count=1; paran_count=1;
mode = 3; mode = mode_array_brackets_and_subshell;
break; break;
case L']': case L']':
mode=0; mode = mode_regular_text;
break; break;
case L'\0': case L'\0':
@ -447,7 +438,7 @@ static void read_string(tokenizer *tok)
is_first = false; is_first = false;
} }
if ((!tok->accept_unfinished) && (mode!=0)) if ((!tok->accept_unfinished) && (mode != mode_regular_text))
{ {
TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR); TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR);
return; return;
@ -467,7 +458,7 @@ static void read_string(tokenizer *tok)
/** /**
Read the next token as a comment. Read the next token as a comment.
*/ */
static void read_comment(tokenizer *tok) static void read_comment(tokenizer_t *tok)
{ {
const wchar_t *start; const wchar_t *start;
@ -487,7 +478,7 @@ static void read_comment(tokenizer *tok)
/** /**
Read a FD redirection. Read a FD redirection.
*/ */
static void read_redirect(tokenizer *tok, int fd) static void read_redirect(tokenizer_t *tok, int fd)
{ {
int mode = -1; int mode = -1;
@ -552,7 +543,7 @@ static void read_redirect(tokenizer *tok, int fd)
} }
} }
wchar_t tok_last_quote(tokenizer *tok) wchar_t tok_last_quote(tokenizer_t *tok)
{ {
CHECK(tok, 0); CHECK(tok, 0);
@ -582,7 +573,7 @@ const wchar_t *tok_get_desc(int type)
} }
void tok_next(tokenizer *tok) void tok_next(tokenizer_t *tok)
{ {
CHECK(tok,); CHECK(tok,);
@ -705,20 +696,18 @@ void tok_next(tokenizer *tok)
} }
const wchar_t *tok_string(tokenizer *tok) const wchar_t *tok_string(tokenizer_t *tok)
{ {
return tok?tok->orig_buff:0; return tok?tok->orig_buff:0;
} }
wchar_t *tok_first(const wchar_t *str) wchar_t *tok_first(const wchar_t *str)
{ {
tokenizer t;
wchar_t *res=0; wchar_t *res=0;
CHECK(str, 0); CHECK(str, 0);
tok_init(&t, str, TOK_SQUASH_ERRORS); tokenizer_t t(str, TOK_SQUASH_ERRORS);
switch (tok_last_type(&t)) switch (tok_last_type(&t))
{ {
case TOK_STRING: case TOK_STRING:
@ -733,7 +722,7 @@ wchar_t *tok_first(const wchar_t *str)
return res; return res;
} }
int tok_get_pos(tokenizer *tok) int tok_get_pos(tokenizer_t *tok)
{ {
CHECK(tok, 0); CHECK(tok, 0);
@ -741,7 +730,7 @@ int tok_get_pos(tokenizer *tok)
} }
void tok_set_pos(tokenizer *tok, int pos) void tok_set_pos(tokenizer_t *tok, int pos)
{ {
CHECK(tok,); CHECK(tok,);

View File

@ -61,11 +61,12 @@ enum tokenizer_error
*/ */
#define TOK_SQUASH_ERRORS 4 #define TOK_SQUASH_ERRORS 4
typedef unsigned int tok_flags_t;
/** /**
The tokenizer struct. The tokenizer struct.
*/ */
struct tokenizer struct tokenizer_t
{ {
/** A pointer into the original string, showing where the next token begins */ /** A pointer into the original string, showing where the next token begins */
const wchar_t *buff; const wchar_t *buff;
@ -100,62 +101,60 @@ struct tokenizer
/** Return the line number of the character at the given offset */ /** Return the line number of the character at the given offset */
int line_number_of_character_at_offset(size_t offset); int line_number_of_character_at_offset(size_t offset);
/**
Constructor for a tokenizer. b is the string that is to be
tokenized. It is not copied, and should not be freed by the caller
until after the tokenizer is destroyed.
\param b The string to tokenize
\param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
to accept incomplete tokens, such as a subshell without a closing
parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
*/
tokenizer_t(const wchar_t *b, tok_flags_t flags);
}; };
/**
Initialize the tokenizer. b is the string that is to be
tokenized. It is not copied, and should not be freed by the caller
until after the tokenizer is destroyed.
\param tok The tokenizer to initialize
\param b The string to tokenize
\param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
to accept incomplete tokens, such as a subshell without a closing
parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
*/
void tok_init(tokenizer *tok, const wchar_t *b, int flags);
/** /**
Jump to the next token. Jump to the next token.
*/ */
void tok_next(tokenizer *tok); void tok_next(tokenizer_t *tok);
/** /**
Returns the type of the last token. Must be one of the values in the token_type enum. Returns the type of the last token. Must be one of the values in the token_type enum.
*/ */
int tok_last_type(tokenizer *tok); int tok_last_type(tokenizer_t *tok);
/** /**
Returns the last token string. The string should not be freed by the caller. Returns the last token string. The string should not be freed by the caller.
*/ */
wchar_t *tok_last(tokenizer *tok); wchar_t *tok_last(tokenizer_t *tok);
/** /**
Returns the type of quote from the last TOK_QSTRING Returns the type of quote from the last TOK_QSTRING
*/ */
wchar_t tok_last_quote(tokenizer *tok); wchar_t tok_last_quote(tokenizer_t *tok);
/** /**
Returns true as long as there are more tokens left Returns true as long as there are more tokens left
*/ */
int tok_has_next(tokenizer *tok); int tok_has_next(tokenizer_t *tok);
/** /**
Returns the position of the beginning of the current token in the original string Returns the position of the beginning of the current token in the original string
*/ */
int tok_get_pos(tokenizer *tok); int tok_get_pos(tokenizer_t *tok);
/** /**
Destroy the tokenizer and free asociated memory Destroy the tokenizer and free asociated memory
*/ */
void tok_destroy(tokenizer *tok); void tok_destroy(tokenizer_t *tok);
/** /**
Returns the original string to tokenizer Returns the original string to tokenizer
*/ */
const wchar_t *tok_string(tokenizer *tok); const wchar_t *tok_string(tokenizer_t *tok);
/** /**
@ -178,7 +177,7 @@ bool tok_is_string_character(wchar_t c, bool is_first);
/** /**
Move tokenizer position Move tokenizer position
*/ */
void tok_set_pos(tokenizer *tok, int pos); void tok_set_pos(tokenizer_t *tok, int pos);
/** /**
Returns a string description of the specified token type Returns a string description of the specified token type
@ -188,7 +187,7 @@ const wchar_t *tok_get_desc(int type);
/** /**
Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR. Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR.
*/ */
int tok_get_error(tokenizer *tok); int tok_get_error(tokenizer_t *tok);
#endif #endif