From f545fb2491d36e02dc6c4005ada86be8678bba78 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 21 Nov 2012 17:48:35 -0800 Subject: [PATCH] Work towards refactoring tokenizer to be a real object --- builtin_commandline.cpp | 6 +-- complete.cpp | 6 +-- fish_indent.cpp | 4 +- fish_tests.cpp | 13 ++--- highlight.cpp | 10 ++-- history.cpp | 6 +-- parse_util.cpp | 18 ++----- parser.cpp | 54 ++++++++------------ parser.h | 10 ++-- reader.cpp | 7 +-- tokenizer.cpp | 109 ++++++++++++++++++---------------------- tokenizer.h | 49 +++++++++--------- 12 files changed, 123 insertions(+), 169 deletions(-) diff --git a/builtin_commandline.cpp b/builtin_commandline.cpp index cb3ee9e3e..07dc2c969 100644 --- a/builtin_commandline.cpp +++ b/builtin_commandline.cpp @@ -143,7 +143,6 @@ static void write_part(const wchar_t *begin, int cut_at_cursor, int tokenize) { - tokenizer tok; wcstring out; wchar_t *buff; size_t pos; @@ -155,9 +154,8 @@ static void write_part(const wchar_t *begin, buff = wcsndup(begin, end-begin); // fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end ); out.clear(); - - for (tok_init(&tok, buff, TOK_ACCEPT_UNFINISHED); - tok_has_next(&tok); + tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED); + for (; tok_has_next(&tok); tok_next(&tok)) { if ((cut_at_cursor) && diff --git a/complete.cpp b/complete.cpp index ce0944854..c21beafab 100644 --- a/complete.cpp +++ b/complete.cpp @@ -1765,7 +1765,6 @@ void complete(const wcstring &cmd, std::vector &comps, complete_ty completer_t completer(cmd, type); const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end; - tokenizer tok; const wchar_t *current_token=0, *prev_token=0; wcstring current_command; int on_command=0; @@ -1807,9 +1806,8 @@ void complete(const wcstring &cmd, std::vector &comps, complete_ty int had_cmd=0; int end_loop=0; - - tok_init(&tok, buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - + + tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); while (tok_has_next(&tok) && !end_loop) { diff --git a/fish_indent.cpp b/fish_indent.cpp index dedf11f27..c50eeba65 100644 --- a/fish_indent.cpp +++ b/fish_indent.cpp @@ -84,7 +84,6 @@ static void insert_tabs(wcstring &out, int indent) */ static int indent(wcstring &out, const wcstring &in, int flags) { - tokenizer tok; int res=0; int is_command = 1; int indent = 0; @@ -92,8 +91,7 @@ static int indent(wcstring &out, const wcstring &in, int flags) int prev_type = 0; int prev_prev_type = 0; - tok_init(&tok, in.c_str(), TOK_SHOW_COMMENTS); - + tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS); for (; tok_has_next(&tok); tok_next(&tok)) { int type = tok_last_type(&tok); diff --git a/fish_tests.cpp b/fish_tests.cpp index 3a1b382db..a2ac00c47 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -289,13 +289,12 @@ static void test_convert() */ static void test_tok() { - tokenizer t; say(L"Testing tokenizer"); say(L"Testing invalid input"); - tok_init(&t, 0, 0); + tokenizer_t t(NULL, 0); if (tok_last_type(&t) != TOK_ERROR) { @@ -326,14 +325,12 @@ static void test_tok() const int types[] = { TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END - } - ; - size_t i; + }; say(L"Test correct tokenization"); - - for (i=0, tok_init(&t, str, 0); i<(sizeof(types)/sizeof(int)); i++,tok_next(&t)) - { + + tokenizer_t t(str, 0); + for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) { if (types[i] != tok_last_type(&t)) { err(L"Tokenization error:"); diff --git a/highlight.cpp b/highlight.cpp index 543f33aaf..77acd8dad 100644 --- a/highlight.cpp +++ b/highlight.cpp @@ -691,8 +691,8 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command int arg_pos = -1; bool had_cmd = false; - tokenizer tok; - for (tok_init(&tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok)) + tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); + for (; tok_has_next(&tok); tok_next(&tok)) { int last_type = tok_last_type(&tok); @@ -955,10 +955,8 @@ static void tokenize(const wchar_t * const buff, std::vector &color, const std::fill(color.begin(), color.end(), -1); - tokenizer tok; - for (tok_init(&tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS); - tok_has_next(&tok); - tok_next(&tok)) + tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS); + for (; tok_has_next(&tok); tok_next(&tok)) { int last_type = tok_last_type(&tok); diff --git a/history.cpp b/history.cpp index dd12eb95b..0a87e8b9e 100644 --- a/history.cpp +++ b/history.cpp @@ -1415,10 +1415,8 @@ void history_t::add_with_file_detection(const wcstring &str) ASSERT_IS_MAIN_THREAD(); path_list_t potential_paths; - tokenizer tokenizer; - for (tok_init(&tokenizer, str.c_str(), TOK_SQUASH_ERRORS); - tok_has_next(&tokenizer); - tok_next(&tokenizer)) + tokenizer_t tokenizer(str.c_str(), TOK_SQUASH_ERRORS); + for (; tok_has_next(&tokenizer); tok_next(&tokenizer)) { int type = tok_last_type(&tokenizer); if (type == TOK_STRING) diff --git a/parse_util.cpp b/parse_util.cpp index 711b22e2c..10072ec86 100644 --- a/parse_util.cpp +++ b/parse_util.cpp @@ -326,8 +326,6 @@ static void job_or_process_extent(const wchar_t *buff, wchar_t *buffcpy; int finished=0; - tokenizer tok; - CHECK(buff,); if (a) @@ -365,9 +363,8 @@ static void job_or_process_extent(const wchar_t *buff, DIE_MEM(); } - for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED); - tok_has_next(&tok) && !finished; - tok_next(&tok)) + tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED); + for (; tok_has_next(&tok) && !finished; tok_next(&tok)) { int tok_begin = tok_get_pos(&tok); @@ -440,8 +437,6 @@ void parse_util_token_extent(const wchar_t *buff, long pos; wchar_t *buffcpy; - tokenizer tok; - const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL; CHECK(buff,); @@ -474,9 +469,8 @@ void parse_util_token_extent(const wchar_t *buff, DIE_MEM(); } - for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - tok_has_next(&tok); - tok_next(&tok)) + tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); + for (; tok_has_next(&tok); tok_next(&tok)) { size_t tok_begin = tok_get_pos(&tok); size_t tok_end = tok_begin; @@ -711,9 +705,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_ wchar_t last_quote = '\0'; int unfinished; - tokenizer tok; - tok_init(&tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); - + tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); for (; tok_has_next(&tok); tok_next(&tok)) { if (tok_get_pos(&tok) > pos) diff --git a/parser.cpp b/parser.cpp index 6713fddf0..8a1d29846 100644 --- a/parser.cpp +++ b/parser.cpp @@ -525,7 +525,6 @@ static int parser_is_pipe_forbidden(const wcstring &word) */ static const wchar_t *parser_find_end(const wchar_t * buff) { - tokenizer tok; int had_cmd=0; int count = 0; int error=0; @@ -533,9 +532,8 @@ static const wchar_t *parser_find_end(const wchar_t * buff) CHECK(buff, 0); - for (tok_init(&tok, buff, 0); - tok_has_next(&tok) && !error; - tok_next(&tok)) + tokenizer_t tok(buff, 0); + for (; tok_has_next(&tok) && !error; tok_next(&tok)) { int last_type = tok_last_type(&tok); switch (last_type) @@ -796,7 +794,6 @@ void parser_t::print_errors_stderr() int parser_t::eval_args(const wchar_t *line, std::vector &args) { - tokenizer tok; expand_flags_t eflags = 0; if (! show_errors) @@ -808,8 +805,8 @@ int parser_t::eval_args(const wchar_t *line, std::vector &args) eval_args may be called while evaulating another command, so we save the previous tokenizer and restore it on exit */ - tokenizer *previous_tokenizer=current_tokenizer; - int previous_pos=current_tokenizer_pos; + tokenizer_t * const previous_tokenizer = current_tokenizer; + const int previous_pos = current_tokenizer_pos; int do_loop=1; CHECK(line, 1); @@ -819,10 +816,10 @@ int parser_t::eval_args(const wchar_t *line, std::vector &args) if (this->parser_type == PARSER_TYPE_GENERAL) proc_push_interactive(0); + tokenizer_t tok(line, (show_errors ? 0 : TOK_SQUASH_ERRORS)); current_tokenizer = &tok; current_tokenizer_pos = 0; - tok_init(&tok, line, (show_errors ? 0 : TOK_SQUASH_ERRORS)); error_code=0; for (; do_loop && tok_has_next(&tok) ; tok_next(&tok)) @@ -1319,7 +1316,7 @@ job_t *parser_t::job_get_from_pid(int pid) */ void parser_t::parse_job_argument_list(process_t *p, job_t *j, - tokenizer *tok, + tokenizer_t *tok, std::vector &args, bool unskip) { @@ -1718,7 +1715,7 @@ f */ int parser_t::parse_job(process_t *p, job_t *j, - tokenizer *tok) + tokenizer_t *tok) { std::vector args; // The list that will become the argc array for the program int use_function = 1; // May functions be considered when checking what action this command represents @@ -2185,7 +2182,6 @@ int parser_t::parse_job(process_t *p, const wchar_t *end=parser_find_end(tok_string(tok) + current_tokenizer_pos); - tokenizer subtok; int make_sub_block = j->first_process != p; if (!end) @@ -2202,9 +2198,8 @@ int parser_t::parse_job(process_t *p, { int done=0; - for (tok_init(&subtok, end, 0); - !done && tok_has_next(&subtok); - tok_next(&subtok)) + tokenizer_t subtok(end, 0); + for (; ! done && tok_has_next(&subtok); tok_next(&subtok)) { switch (tok_last_type(&subtok)) @@ -2388,7 +2383,7 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc \param tok The tokenizer to read tokens from */ -void parser_t::eval_job(tokenizer *tok) +void parser_t::eval_job(tokenizer_t *tok) { ASSERT_IS_MAIN_THREAD(); job_t *j; @@ -2630,7 +2625,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type const wchar_t * const cmd = cmdStr.c_str(); size_t forbid_count; int code; - tokenizer *previous_tokenizer=current_tokenizer; + tokenizer_t *previous_tokenizer=current_tokenizer; block_t *start_current_block = current_block; /* Record the current chain so we can put it back later */ @@ -2676,8 +2671,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type this->push_block(new scope_block_t(block_type)); - current_tokenizer = new tokenizer; - tok_init(current_tokenizer, cmd, 0); + current_tokenizer = new tokenizer_t(cmd, 0); error_code = 0; @@ -2907,19 +2901,17 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *prefix) { - tokenizer tok; - tokenizer *previous_tokenizer = current_tokenizer; - int previous_pos = current_tokenizer_pos; + tokenizer_t *const previous_tokenizer = current_tokenizer; + const int previous_pos = current_tokenizer_pos; int do_loop = 1; int err = 0; CHECK(buff, 1); - current_tokenizer = &tok; - for (tok_init(&tok, buff, 0); - do_loop && tok_has_next(&tok); - tok_next(&tok)) + tokenizer_t tok(buff, 0); + current_tokenizer = &tok; + for (; do_loop && tok_has_next(&tok); tok_next(&tok)) { current_tokenizer_pos = tok_get_pos(&tok); switch (tok_last_type(&tok)) @@ -2970,7 +2962,7 @@ int parser_t::test_args(const wchar_t * buff, wcstring *out, const wchar_t *pre tok_destroy(&tok); - current_tokenizer=previous_tokenizer; + current_tokenizer = previous_tokenizer; current_tokenizer_pos = previous_pos; error_code=0; @@ -2985,7 +2977,6 @@ int parser_t::test(const wchar_t * buff, { ASSERT_IS_MAIN_THREAD(); - tokenizer tok; /* Set to one if a command name has been given for the currently parsed process specification @@ -2994,8 +2985,8 @@ int parser_t::test(const wchar_t * buff, int err=0; int unfinished = 0; - tokenizer *previous_tokenizer=current_tokenizer; - int previous_pos=current_tokenizer_pos; + tokenizer_t * const previous_tokenizer=current_tokenizer; + const int previous_pos=current_tokenizer_pos; int block_pos[BLOCK_MAX_COUNT] = {}; block_type_t block_type[BLOCK_MAX_COUNT] = {}; @@ -3043,11 +3034,10 @@ int parser_t::test(const wchar_t * buff, } + tokenizer_t tok(buff, 0); current_tokenizer = &tok; - for (tok_init(&tok, buff, 0); - ; - tok_next(&tok)) + for (;; tok_next(&tok)) { current_tokenizer_pos = tok_get_pos(&tok); diff --git a/parser.h b/parser.h index e86539836..751182c35 100644 --- a/parser.h +++ b/parser.h @@ -295,7 +295,7 @@ struct profile_item_t wcstring cmd; }; -struct tokenizer; +struct tokenizer_t; class parser_t { @@ -316,7 +316,7 @@ private: wcstring err_buff; /** Pointer to the current tokenizer */ - tokenizer *current_tokenizer; + tokenizer_t *current_tokenizer; /** String for representing the current line */ wcstring lineinfo; @@ -344,10 +344,10 @@ private: parser_t(const parser_t&); parser_t& operator=(const parser_t&); - void parse_job_argument_list(process_t *p, job_t *j, tokenizer *tok, std::vector&, bool); - int parse_job(process_t *p, job_t *j, tokenizer *tok); + void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector&, bool); + int parse_job(process_t *p, job_t *j, tokenizer_t *tok); void skipped_exec(job_t * j); - void eval_job(tokenizer *tok); + void eval_job(tokenizer_t *tok); int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset); void print_errors(wcstring &target, const wchar_t *prefix); void print_errors_stderr(); diff --git a/reader.cpp b/reader.cpp index 5548dabfb..df39f3a29 100644 --- a/reader.cpp +++ b/reader.cpp @@ -1821,7 +1821,6 @@ static void handle_token_history(int forward, int reset) const wchar_t *str=0; long current_pos; - tokenizer tok; if (reset) { @@ -1895,10 +1894,8 @@ static void handle_token_history(int forward, int reset) { //debug( 3, L"new '%ls'", data->token_history_buff.c_str() ); - - for (tok_init(&tok, data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED); - tok_has_next(&tok); - tok_next(&tok)) + tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED); + for (; tok_has_next(&tok); tok_next(&tok)) { switch (tok_last_type(&tok)) { diff --git a/tokenizer.cpp b/tokenizer.cpp index e5f131f8e..1a59820e7 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -83,7 +83,7 @@ static const wchar_t *tok_desc[] = \return 0 if the system could not provide the memory needed, and 1 otherwise. */ -static int check_size(tokenizer *tok, size_t len) +static int check_size(tokenizer_t *tok, size_t len) { if (tok->last_len <= len) { @@ -103,7 +103,7 @@ static int check_size(tokenizer *tok, size_t len) /** Set the latest tokens string to be the specified error message */ -static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_message) +static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message) { tok->last_type = TOK_ERROR; tok->error = error_type; @@ -117,13 +117,13 @@ static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_ wcscpy(tok->last, error_message); } -int tok_get_error(tokenizer *tok) +int tok_get_error(tokenizer_t *tok) { return tok->error; } -void tok_init(tokenizer *tok, const wchar_t *b, int flags) +tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last(NULL), last_type(0), last_len(0), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0) { /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */ @@ -132,33 +132,28 @@ void tok_init(tokenizer *tok, const wchar_t *b, int flags) ASSERT_IS_MAIN_THREAD(); } - CHECK(tok,); - - memset(tok, 0, sizeof(tokenizer)); - CHECK(b,); - tok->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED); - tok->show_comments = !!(flags & TOK_SHOW_COMMENTS); - tok->squash_errors = !!(flags & TOK_SQUASH_ERRORS); - tok->has_next=true; + this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED); + this->show_comments = !!(flags & TOK_SHOW_COMMENTS); + this->squash_errors = !!(flags & TOK_SQUASH_ERRORS); - tok->has_next = (*b != L'\0'); - tok->orig_buff = tok->buff = b; - tok->cached_lineno_offset = 0; - tok->cached_lineno_count = 0; - tok_next(tok); + this->has_next = (*b != L'\0'); + this->orig_buff = this->buff = b; + this->cached_lineno_offset = 0; + this->cached_lineno_count = 0; + tok_next(this); } -void tok_destroy(tokenizer *tok) +void tok_destroy(tokenizer_t *tok) { CHECK(tok,); free(tok->last); } -int tok_last_type(tokenizer *tok) +int tok_last_type(tokenizer_t *tok) { CHECK(tok, TOK_ERROR); CHECK(tok->buff, TOK_ERROR); @@ -166,14 +161,14 @@ int tok_last_type(tokenizer *tok) return tok->last_type; } -wchar_t *tok_last(tokenizer *tok) +wchar_t *tok_last(tokenizer_t *tok) { CHECK(tok, 0); return tok->last; } -int tok_has_next(tokenizer *tok) +int tok_has_next(tokenizer_t *tok) { /* Return 1 on broken tokenizer @@ -185,7 +180,7 @@ int tok_has_next(tokenizer *tok) return tok->has_next; } -int tokenizer::line_number_of_character_at_offset(size_t offset) +int tokenizer_t::line_number_of_character_at_offset(size_t offset) { // we want to return (one plus) the number of newlines at offsets less than the given offset // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset @@ -265,24 +260,28 @@ static int myal(wchar_t c) /** Read the next token as a string */ -static void read_string(tokenizer *tok) +static void read_string(tokenizer_t *tok) { const wchar_t *start; long len; - int mode=0; int do_loop=1; int paran_count=0; start = tok->buff; bool is_first = true; + enum tok_mode_t { + mode_regular_text = 0, // regular text + mode_subshell = 1, // inside of subshell + mode_array_brackets = 2, // inside of array brackets + mode_array_brackets_and_subshell = 3 // inside of array brackets and subshell, like in '$foo[(ech' + } mode = mode_regular_text; + while (1) { if (!myal(*tok->buff)) { -// debug(1, L"%lc", *tok->buff ); - if (*tok->buff == L'\\') { tok->buff++; @@ -296,13 +295,13 @@ static void read_string(tokenizer *tok) else { /* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */ - do_loop = 0; tok->buff--; + do_loop = 0; } } - else if (*tok->buff == L'\n' && mode == 0) + else if (*tok->buff == L'\n' && mode == mode_regular_text) { tok->buff--; do_loop = 0; @@ -312,33 +311,24 @@ static void read_string(tokenizer *tok) tok->buff++; continue; } - - - /* - The modes are as follows: - - 0: regular text - 1: inside of subshell - 2: inside of array brackets - 3: inside of array brackets and subshell, like in '$foo[(ech' - */ + switch (mode) { - case 0: + case mode_regular_text: { switch (*tok->buff) { case L'(': { paran_count=1; - mode = 1; + mode = mode_subshell; break; } case L'[': { if (tok->buff != start) - mode=2; + mode = mode_array_brackets; break; } @@ -356,7 +346,7 @@ static void read_string(tokenizer *tok) { tok->buff += wcslen(tok->buff); - if ((!tok->accept_unfinished)) + if (! tok->accept_unfinished) { TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR); return; @@ -369,7 +359,7 @@ static void read_string(tokenizer *tok) default: { - if (!tok_is_string_character(*(tok->buff), is_first)) + if (! tok_is_string_character(*(tok->buff), is_first)) { do_loop=0; } @@ -378,8 +368,8 @@ static void read_string(tokenizer *tok) break; } - case 3: - case 1: + case mode_array_brackets_and_subshell: + case mode_subshell: switch (*tok->buff) { case L'\'': @@ -411,7 +401,7 @@ static void read_string(tokenizer *tok) paran_count--; if (paran_count == 0) { - mode--; + mode = (mode == mode_array_brackets_and_subshell ? mode_array_brackets : mode_regular_text); } break; case L'\0': @@ -419,16 +409,17 @@ static void read_string(tokenizer *tok) break; } break; - case 2: + + case mode_array_brackets: switch (*tok->buff) { case L'(': paran_count=1; - mode = 3; + mode = mode_array_brackets_and_subshell; break; case L']': - mode=0; + mode = mode_regular_text; break; case L'\0': @@ -447,7 +438,7 @@ static void read_string(tokenizer *tok) is_first = false; } - if ((!tok->accept_unfinished) && (mode!=0)) + if ((!tok->accept_unfinished) && (mode != mode_regular_text)) { TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR); return; @@ -467,7 +458,7 @@ static void read_string(tokenizer *tok) /** Read the next token as a comment. */ -static void read_comment(tokenizer *tok) +static void read_comment(tokenizer_t *tok) { const wchar_t *start; @@ -487,7 +478,7 @@ static void read_comment(tokenizer *tok) /** Read a FD redirection. */ -static void read_redirect(tokenizer *tok, int fd) +static void read_redirect(tokenizer_t *tok, int fd) { int mode = -1; @@ -552,7 +543,7 @@ static void read_redirect(tokenizer *tok, int fd) } } -wchar_t tok_last_quote(tokenizer *tok) +wchar_t tok_last_quote(tokenizer_t *tok) { CHECK(tok, 0); @@ -582,7 +573,7 @@ const wchar_t *tok_get_desc(int type) } -void tok_next(tokenizer *tok) +void tok_next(tokenizer_t *tok) { CHECK(tok,); @@ -705,20 +696,18 @@ void tok_next(tokenizer *tok) } -const wchar_t *tok_string(tokenizer *tok) +const wchar_t *tok_string(tokenizer_t *tok) { return tok?tok->orig_buff:0; } wchar_t *tok_first(const wchar_t *str) { - tokenizer t; wchar_t *res=0; CHECK(str, 0); - tok_init(&t, str, TOK_SQUASH_ERRORS); - + tokenizer_t t(str, TOK_SQUASH_ERRORS); switch (tok_last_type(&t)) { case TOK_STRING: @@ -733,7 +722,7 @@ wchar_t *tok_first(const wchar_t *str) return res; } -int tok_get_pos(tokenizer *tok) +int tok_get_pos(tokenizer_t *tok) { CHECK(tok, 0); @@ -741,7 +730,7 @@ int tok_get_pos(tokenizer *tok) } -void tok_set_pos(tokenizer *tok, int pos) +void tok_set_pos(tokenizer_t *tok, int pos) { CHECK(tok,); diff --git a/tokenizer.h b/tokenizer.h index ae6b6ecc9..4d4deacca 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -61,11 +61,12 @@ enum tokenizer_error */ #define TOK_SQUASH_ERRORS 4 +typedef unsigned int tok_flags_t; /** The tokenizer struct. */ -struct tokenizer +struct tokenizer_t { /** A pointer into the original string, showing where the next token begins */ const wchar_t *buff; @@ -100,62 +101,60 @@ struct tokenizer /** Return the line number of the character at the given offset */ int line_number_of_character_at_offset(size_t offset); + /** + Constructor for a tokenizer. b is the string that is to be + tokenized. It is not copied, and should not be freed by the caller + until after the tokenizer is destroyed. + + \param b The string to tokenize + \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer + to accept incomplete tokens, such as a subshell without a closing + parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens + + */ + tokenizer_t(const wchar_t *b, tok_flags_t flags); }; -/** - Initialize the tokenizer. b is the string that is to be - tokenized. It is not copied, and should not be freed by the caller - until after the tokenizer is destroyed. - - \param tok The tokenizer to initialize - \param b The string to tokenize - \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer - to accept incomplete tokens, such as a subshell without a closing - parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens - -*/ -void tok_init(tokenizer *tok, const wchar_t *b, int flags); - /** Jump to the next token. */ -void tok_next(tokenizer *tok); +void tok_next(tokenizer_t *tok); /** Returns the type of the last token. Must be one of the values in the token_type enum. */ -int tok_last_type(tokenizer *tok); +int tok_last_type(tokenizer_t *tok); /** Returns the last token string. The string should not be freed by the caller. */ -wchar_t *tok_last(tokenizer *tok); +wchar_t *tok_last(tokenizer_t *tok); /** Returns the type of quote from the last TOK_QSTRING */ -wchar_t tok_last_quote(tokenizer *tok); +wchar_t tok_last_quote(tokenizer_t *tok); /** Returns true as long as there are more tokens left */ -int tok_has_next(tokenizer *tok); +int tok_has_next(tokenizer_t *tok); /** Returns the position of the beginning of the current token in the original string */ -int tok_get_pos(tokenizer *tok); +int tok_get_pos(tokenizer_t *tok); /** Destroy the tokenizer and free asociated memory */ -void tok_destroy(tokenizer *tok); +void tok_destroy(tokenizer_t *tok); /** Returns the original string to tokenizer */ -const wchar_t *tok_string(tokenizer *tok); +const wchar_t *tok_string(tokenizer_t *tok); /** @@ -178,7 +177,7 @@ bool tok_is_string_character(wchar_t c, bool is_first); /** Move tokenizer position */ -void tok_set_pos(tokenizer *tok, int pos); +void tok_set_pos(tokenizer_t *tok, int pos); /** Returns a string description of the specified token type @@ -188,7 +187,7 @@ const wchar_t *tok_get_desc(int type); /** Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR. */ -int tok_get_error(tokenizer *tok); +int tok_get_error(tokenizer_t *tok); #endif