Work towards refactoring tokenizer to be a real object

2025-03-27 14:45:13 +08:00 · 2012-11-21 17:48:35 -08:00 · 2012-11-21 17:48:35 -08:00 · f545fb2491
commit f545fb2491
parent e73be48d96
12 changed files with 123 additions and 169 deletions
--- a/builtin_commandline.cpp
+++ b/builtin_commandline.cpp
@ -143,7 +143,6 @@ static void write_part(const wchar_t *begin,
                       int cut_at_cursor,
                       int tokenize)
 {
    tokenizer tok;
    wcstring out;
    wchar_t *buff;
    size_t pos;
@ -155,9 +154,8 @@ static void write_part(const wchar_t *begin,
        buff = wcsndup(begin, end-begin);
 //    fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
        out.clear();
-
+        tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
-        for (tok_init(&tok, buff, TOK_ACCEPT_UNFINISHED);
+        for (; tok_has_next(&tok);
                tok_has_next(&tok);
                tok_next(&tok))
        {
            if ((cut_at_cursor) &&
--- a/complete.cpp
+++ b/complete.cpp
@ -1765,7 +1765,6 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
    completer_t completer(cmd, type);
    const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end;
    tokenizer tok;
    const wchar_t *current_token=0, *prev_token=0;
    wcstring current_command;
    int on_command=0;
@ -1807,9 +1806,8 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
        int had_cmd=0;
        int end_loop=0;
-
+        
-        tok_init(&tok, buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+        tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
        while (tok_has_next(&tok) && !end_loop)
        {
--- a/fish_indent.cpp
+++ b/fish_indent.cpp
@ -84,7 +84,6 @@ static void insert_tabs(wcstring &out, int indent)
 */
 static int indent(wcstring &out, const wcstring &in, int flags)
 {
    tokenizer tok;
    int res=0;
    int is_command = 1;
    int indent = 0;
@ -92,8 +91,7 @@ static int indent(wcstring &out, const wcstring &in, int flags)
    int prev_type = 0;
    int prev_prev_type = 0;
-    tok_init(&tok, in.c_str(), TOK_SHOW_COMMENTS);
+    tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS);
    for (; tok_has_next(&tok); tok_next(&tok))
    {
        int type = tok_last_type(&tok);
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@ -289,13 +289,12 @@ static void test_convert()
 */
 static void test_tok()
 {
    tokenizer t;
    say(L"Testing tokenizer");
    say(L"Testing invalid input");
-    tok_init(&t, 0, 0);
+    tokenizer_t t(NULL, 0);
    if (tok_last_type(&t) != TOK_ERROR)
    {
@ -326,14 +325,12 @@ static void test_tok()
        const int types[] =
        {
            TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END
-        }
+        };
        ;
        size_t i;
        say(L"Test correct tokenization");
-
+        
-        for (i=0, tok_init(&t, str, 0); i<(sizeof(types)/sizeof(int)); i++,tok_next(&t))
+        tokenizer_t t(str, 0);
-        {
+        for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) {
            if (types[i] != tok_last_type(&t))
            {
                err(L"Tokenization error:");
--- a/highlight.cpp
+++ b/highlight.cpp
@ -691,8 +691,8 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command
    int arg_pos = -1;
    bool had_cmd = false;
-    tokenizer tok;
+    tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-    for (tok_init(&tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok))
+    for (; tok_has_next(&tok); tok_next(&tok))
    {
        int last_type = tok_last_type(&tok);
@ -955,10 +955,8 @@ static void tokenize(const wchar_t * const buff, std::vector<int> &color, const
    std::fill(color.begin(), color.end(), -1);
-    tokenizer tok;
+    tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
-    for (tok_init(&tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tok); tok_next(&tok))
            tok_has_next(&tok);
            tok_next(&tok))
    {
        int last_type = tok_last_type(&tok);
--- a/history.cpp
+++ b/history.cpp
@ -1415,10 +1415,8 @@ void history_t::add_with_file_detection(const wcstring &str)
    ASSERT_IS_MAIN_THREAD();
    path_list_t potential_paths;
-    tokenizer tokenizer;
+    tokenizer_t tokenizer(str.c_str(), TOK_SQUASH_ERRORS);
-    for (tok_init(&tokenizer, str.c_str(), TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tokenizer); tok_next(&tokenizer))
            tok_has_next(&tokenizer);
            tok_next(&tokenizer))
    {
        int type = tok_last_type(&tokenizer);
        if (type == TOK_STRING)
--- a/parse_util.cpp
+++ b/parse_util.cpp
@ -326,8 +326,6 @@ static void job_or_process_extent(const wchar_t *buff,
    wchar_t *buffcpy;
    int finished=0;
    tokenizer tok;
    CHECK(buff,);
    if (a)
@ -365,9 +363,8 @@ static void job_or_process_extent(const wchar_t *buff,
        DIE_MEM();
    }
-    for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED);
+    tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
-            tok_has_next(&tok) && !finished;
+    for (; tok_has_next(&tok) && !finished; tok_next(&tok))
            tok_next(&tok))
    {
        int tok_begin = tok_get_pos(&tok);
@ -440,8 +437,6 @@ void parse_util_token_extent(const wchar_t *buff,
    long pos;
    wchar_t *buffcpy;
    tokenizer tok;
    const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL;
    CHECK(buff,);
@ -474,9 +469,8 @@ void parse_util_token_extent(const wchar_t *buff,
        DIE_MEM();
    }
-    for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+    tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-            tok_has_next(&tok);
+    for (; tok_has_next(&tok); tok_next(&tok))
            tok_next(&tok))
    {
        size_t tok_begin = tok_get_pos(&tok);
        size_t tok_end = tok_begin;
@ -711,9 +705,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
    wchar_t last_quote = '\0';
    int unfinished;
-    tokenizer tok;
+    tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
    tok_init(&tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
    for (; tok_has_next(&tok); tok_next(&tok))
    {
        if (tok_get_pos(&tok) > pos)
--- a/parser.cpp
+++ b/parser.cpp
@ -525,7 +525,6 @@ static int parser_is_pipe_forbidden(const wcstring &word)
 */
 static const wchar_t *parser_find_end(const wchar_t * buff)
 {
    tokenizer tok;
    int had_cmd=0;
    int count = 0;
    int error=0;
@ -533,9 +532,8 @@ static const wchar_t *parser_find_end(const wchar_t * buff)
    CHECK(buff, 0);
-    for (tok_init(&tok, buff, 0);
+    tokenizer_t tok(buff, 0);
-            tok_has_next(&tok) && !error;
+    for (; tok_has_next(&tok) && !error; tok_next(&tok))
            tok_next(&tok))
    {
        int last_type = tok_last_type(&tok);
        switch (last_type)
@ -796,7 +794,6 @@ void parser_t::print_errors_stderr()
 int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
 {
    tokenizer tok;
    expand_flags_t eflags = 0;
    if (! show_errors)
@ -808,8 +805,8 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
      eval_args may be called while evaulating another command, so we
      save the previous tokenizer and restore it on exit
    */
-    tokenizer *previous_tokenizer=current_tokenizer;
+    tokenizer_t * const previous_tokenizer = current_tokenizer;
-    int previous_pos=current_tokenizer_pos;
+    const int previous_pos = current_tokenizer_pos;
    int do_loop=1;
    CHECK(line, 1);
@ -819,10 +816,10 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
    if (this->parser_type == PARSER_TYPE_GENERAL)
        proc_push_interactive(0);
    tokenizer_t tok(line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
    current_tokenizer = &tok;
    current_tokenizer_pos = 0;
    tok_init(&tok, line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
    error_code=0;
    for (; do_loop && tok_has_next(&tok) ; tok_next(&tok))
@ -1319,7 +1316,7 @@ job_t *parser_t::job_get_from_pid(int pid)
 */
 void parser_t::parse_job_argument_list(process_t *p,
                                       job_t *j,
-                                       tokenizer *tok,
+                                       tokenizer_t *tok,
                                       std::vector<completion_t> &args,
                                       bool unskip)
 {
@ -1718,7 +1715,7 @@ f
 */
 int parser_t::parse_job(process_t *p,
                        job_t *j,
-                        tokenizer *tok)
+                        tokenizer_t *tok)
 {
    std::vector<completion_t> args; // The list that will become the argc array for the program
    int use_function = 1;   // May functions be considered when checking what action this command represents
@ -2185,7 +2182,6 @@ int parser_t::parse_job(process_t *p,
        const wchar_t *end=parser_find_end(tok_string(tok) +
                                           current_tokenizer_pos);
        tokenizer subtok;
        int make_sub_block = j->first_process != p;
        if (!end)
@ -2202,9 +2198,8 @@ int parser_t::parse_job(process_t *p,
            {
                int done=0;
-                for (tok_init(&subtok, end, 0);
+                tokenizer_t subtok(end, 0);
-                        !done && tok_has_next(&subtok);
+                for (; ! done && tok_has_next(&subtok); tok_next(&subtok))
                        tok_next(&subtok))
                {
                    switch (tok_last_type(&subtok))
@ -2388,7 +2383,7 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc
   \param tok The tokenizer to read tokens from
 */
-void parser_t::eval_job(tokenizer *tok)
+void parser_t::eval_job(tokenizer_t *tok)
 {
    ASSERT_IS_MAIN_THREAD();
    job_t *j;
@ -2630,7 +2625,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
    const wchar_t * const cmd = cmdStr.c_str();
    size_t forbid_count;
    int code;
-    tokenizer *previous_tokenizer=current_tokenizer;
+    tokenizer_t *previous_tokenizer=current_tokenizer;
    block_t *start_current_block = current_block;
    /* Record the current chain so we can put it back later */
@ -2676,8 +2671,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
    this->push_block(new scope_block_t(block_type));
-    current_tokenizer = new tokenizer;
+    current_tokenizer = new tokenizer_t(cmd, 0);
    tok_init(current_tokenizer, cmd, 0);
    error_code = 0;
@ -2907,19 +2901,17 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
 int parser_t::test_args(const  wchar_t * buff, wcstring *out, const wchar_t *prefix)
 {
-    tokenizer tok;
+    tokenizer_t *const previous_tokenizer = current_tokenizer;
-    tokenizer *previous_tokenizer = current_tokenizer;
+    const int previous_pos = current_tokenizer_pos;
    int previous_pos = current_tokenizer_pos;
    int do_loop = 1;
    int err = 0;
    CHECK(buff, 1);
    current_tokenizer = &tok;
-    for (tok_init(&tok, buff, 0);
+    tokenizer_t tok(buff, 0);
-            do_loop && tok_has_next(&tok);
+    current_tokenizer = &tok;
-            tok_next(&tok))
+    for (; do_loop && tok_has_next(&tok); tok_next(&tok))
    {
        current_tokenizer_pos = tok_get_pos(&tok);
        switch (tok_last_type(&tok))
@ -2970,7 +2962,7 @@ int parser_t::test_args(const  wchar_t * buff, wcstring *out, const wchar_t *pre
    tok_destroy(&tok);
-    current_tokenizer=previous_tokenizer;
+    current_tokenizer = previous_tokenizer;
    current_tokenizer_pos = previous_pos;
    error_code=0;
@ -2985,7 +2977,6 @@ int parser_t::test(const  wchar_t * buff,
 {
    ASSERT_IS_MAIN_THREAD();
    tokenizer tok;
    /*
       Set to one if a command name has been given for the currently
       parsed process specification
@ -2994,8 +2985,8 @@ int parser_t::test(const  wchar_t * buff,
    int err=0;
    int unfinished = 0;
-    tokenizer *previous_tokenizer=current_tokenizer;
+    tokenizer_t * const previous_tokenizer=current_tokenizer;
-    int previous_pos=current_tokenizer_pos;
+    const int previous_pos=current_tokenizer_pos;
    int block_pos[BLOCK_MAX_COUNT] = {};
    block_type_t block_type[BLOCK_MAX_COUNT] = {};
@ -3043,11 +3034,10 @@ int parser_t::test(const  wchar_t * buff,
    }
    tokenizer_t tok(buff, 0);
    current_tokenizer = &tok;
-    for (tok_init(&tok, buff, 0);
+    for (;; tok_next(&tok))
            ;
            tok_next(&tok))
    {
        current_tokenizer_pos = tok_get_pos(&tok);
--- a/parser.h
+++ b/parser.h
@ -295,7 +295,7 @@ struct profile_item_t
    wcstring cmd;
 };
-struct tokenizer;
+struct tokenizer_t;
 class parser_t
 {
@ -316,7 +316,7 @@ private:
    wcstring err_buff;
    /** Pointer to the current tokenizer */
-    tokenizer *current_tokenizer;
+    tokenizer_t *current_tokenizer;
    /** String for representing the current line */
    wcstring lineinfo;
@ -344,10 +344,10 @@ private:
    parser_t(const parser_t&);
    parser_t& operator=(const parser_t&);
-    void parse_job_argument_list(process_t *p, job_t *j, tokenizer *tok, std::vector<completion_t>&, bool);
+    void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector<completion_t>&, bool);
-    int parse_job(process_t *p, job_t *j, tokenizer *tok);
+    int parse_job(process_t *p, job_t *j, tokenizer_t *tok);
    void skipped_exec(job_t * j);
-    void eval_job(tokenizer *tok);
+    void eval_job(tokenizer_t *tok);
    int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset);
    void print_errors(wcstring &target, const wchar_t *prefix);
    void print_errors_stderr();
--- a/reader.cpp
+++ b/reader.cpp
@ -1821,7 +1821,6 @@ static void handle_token_history(int forward, int reset)
    const wchar_t *str=0;
    long current_pos;
    tokenizer tok;
    if (reset)
    {
@ -1895,10 +1894,8 @@ static void handle_token_history(int forward, int reset)
        {
            //debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
-
+            tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
-            for (tok_init(&tok, data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
+            for (; tok_has_next(&tok); tok_next(&tok))
                    tok_has_next(&tok);
                    tok_next(&tok))
            {
                switch (tok_last_type(&tok))
                {
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
@ -83,7 +83,7 @@ static const wchar_t *tok_desc[] =
   \return 0 if the system could not provide the memory needed, and 1 otherwise.
 */
-static int check_size(tokenizer *tok, size_t len)
+static int check_size(tokenizer_t *tok, size_t len)
 {
    if (tok->last_len <= len)
    {
@ -103,7 +103,7 @@ static int check_size(tokenizer *tok, size_t len)
 /**
   Set the latest tokens string to be the specified error message
 */
-static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_message)
+static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
 {
    tok->last_type = TOK_ERROR;
    tok->error = error_type;
@ -117,13 +117,13 @@ static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_
    wcscpy(tok->last, error_message);
 }
-int tok_get_error(tokenizer *tok)
+int tok_get_error(tokenizer_t *tok)
 {
    return tok->error;
 }
-void tok_init(tokenizer *tok, const wchar_t *b, int flags)
+tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last(NULL), last_type(0), last_len(0), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0)
 {
    /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */
@ -132,33 +132,28 @@ void tok_init(tokenizer *tok, const wchar_t *b, int flags)
        ASSERT_IS_MAIN_THREAD();
    }
    CHECK(tok,);
    memset(tok, 0, sizeof(tokenizer));
    CHECK(b,);
-    tok->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
+    this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
-    tok->show_comments = !!(flags & TOK_SHOW_COMMENTS);
+    this->show_comments = !!(flags & TOK_SHOW_COMMENTS);
-    tok->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
+    this->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
    tok->has_next=true;
-    tok->has_next = (*b != L'\0');
+    this->has_next = (*b != L'\0');
-    tok->orig_buff = tok->buff = b;
+    this->orig_buff = this->buff = b;
-    tok->cached_lineno_offset = 0;
+    this->cached_lineno_offset = 0;
-    tok->cached_lineno_count = 0;
+    this->cached_lineno_count = 0;
-    tok_next(tok);
+    tok_next(this);
 }
-void tok_destroy(tokenizer *tok)
+void tok_destroy(tokenizer_t *tok)
 {
    CHECK(tok,);
    free(tok->last);
 }
-int tok_last_type(tokenizer *tok)
+int tok_last_type(tokenizer_t *tok)
 {
    CHECK(tok, TOK_ERROR);
    CHECK(tok->buff, TOK_ERROR);
@ -166,14 +161,14 @@ int tok_last_type(tokenizer *tok)
    return tok->last_type;
 }
-wchar_t *tok_last(tokenizer *tok)
+wchar_t *tok_last(tokenizer_t *tok)
 {
    CHECK(tok, 0);
    return tok->last;
 }
-int tok_has_next(tokenizer *tok)
+int tok_has_next(tokenizer_t *tok)
 {
    /*
      Return 1 on broken tokenizer
@ -185,7 +180,7 @@ int tok_has_next(tokenizer *tok)
    return   tok->has_next;
 }
-int tokenizer::line_number_of_character_at_offset(size_t offset)
+int tokenizer_t::line_number_of_character_at_offset(size_t offset)
 {
    // we want to return (one plus) the number of newlines at offsets less than the given offset
    // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset
@ -265,24 +260,28 @@ static int myal(wchar_t c)
 /**
   Read the next token as a string
 */
-static void read_string(tokenizer *tok)
+static void read_string(tokenizer_t *tok)
 {
    const wchar_t *start;
    long len;
    int mode=0;
    int do_loop=1;
    int paran_count=0;
    start = tok->buff;
    bool is_first = true;
    enum tok_mode_t {
        mode_regular_text = 0, // regular text
        mode_subshell = 1, // inside of subshell
        mode_array_brackets = 2, // inside of array brackets
        mode_array_brackets_and_subshell = 3 // inside of array brackets and subshell, like in '$foo[(ech'
    } mode = mode_regular_text;
    while (1)
    {
        if (!myal(*tok->buff))
        {
 //      debug(1, L"%lc", *tok->buff );
            if (*tok->buff == L'\\')
            {
                tok->buff++;
@ -296,13 +295,13 @@ static void read_string(tokenizer *tok)
                    else
                    {
                        /* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */
                        do_loop = 0;
                        tok->buff--;
                        do_loop = 0;
                    }
                }
-                else if (*tok->buff == L'\n' && mode == 0)
+                else if (*tok->buff == L'\n' && mode == mode_regular_text)
                {
                    tok->buff--;
                    do_loop = 0;
@ -312,33 +311,24 @@ static void read_string(tokenizer *tok)
                tok->buff++;
                continue;
            }
-
+            
            /*
              The modes are as follows:
              0: regular text
              1: inside of subshell
              2: inside of array brackets
              3: inside of array brackets and subshell, like in '$foo[(ech'
            */
            switch (mode)
            {
-                case 0:
+                case mode_regular_text:
                {
                    switch (*tok->buff)
                    {
                        case L'(':
                        {
                            paran_count=1;
-                            mode = 1;
+                            mode = mode_subshell;
                            break;
                        }
                        case L'[':
                        {
                            if (tok->buff != start)
-                                mode=2;
+                                mode = mode_array_brackets;
                            break;
                        }
@ -356,7 +346,7 @@ static void read_string(tokenizer *tok)
                            {
                                tok->buff += wcslen(tok->buff);
-                                if ((!tok->accept_unfinished))
+                                if (! tok->accept_unfinished)
                                {
                                    TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
                                    return;
@ -369,7 +359,7 @@ static void read_string(tokenizer *tok)
                        default:
                        {
-                            if (!tok_is_string_character(*(tok->buff), is_first))
+                            if (! tok_is_string_character(*(tok->buff), is_first))
                            {
                                do_loop=0;
                            }
@ -378,8 +368,8 @@ static void read_string(tokenizer *tok)
                    break;
                }
-                case 3:
+                case mode_array_brackets_and_subshell:
-                case 1:
+                case mode_subshell:
                    switch (*tok->buff)
                    {
                        case L'\'':
@ -411,7 +401,7 @@ static void read_string(tokenizer *tok)
                            paran_count--;
                            if (paran_count == 0)
                            {
-                                mode--;
+                                mode = (mode == mode_array_brackets_and_subshell ? mode_array_brackets : mode_regular_text);
                            }
                            break;
                        case L'\0':
@ -419,16 +409,17 @@ static void read_string(tokenizer *tok)
                            break;
                    }
                    break;
-                case 2:
+                    
                case mode_array_brackets:
                    switch (*tok->buff)
                    {
                        case L'(':
                            paran_count=1;
-                            mode = 3;
+                            mode = mode_array_brackets_and_subshell;
                            break;
                        case L']':
-                            mode=0;
+                            mode = mode_regular_text;
                            break;
                        case L'\0':
@ -447,7 +438,7 @@ static void read_string(tokenizer *tok)
        is_first = false;
    }
-    if ((!tok->accept_unfinished) && (mode!=0))
+    if ((!tok->accept_unfinished) && (mode != mode_regular_text))
    {
        TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR);
        return;
@ -467,7 +458,7 @@ static void read_string(tokenizer *tok)
 /**
   Read the next token as a comment.
 */
-static void read_comment(tokenizer *tok)
+static void read_comment(tokenizer_t *tok)
 {
    const wchar_t *start;
@ -487,7 +478,7 @@ static void read_comment(tokenizer *tok)
 /**
   Read a FD redirection.
 */
-static void read_redirect(tokenizer *tok, int fd)
+static void read_redirect(tokenizer_t *tok, int fd)
 {
    int mode = -1;
@ -552,7 +543,7 @@ static void read_redirect(tokenizer *tok, int fd)
    }
 }
-wchar_t tok_last_quote(tokenizer *tok)
+wchar_t tok_last_quote(tokenizer_t *tok)
 {
    CHECK(tok, 0);
@ -582,7 +573,7 @@ const wchar_t *tok_get_desc(int type)
 }
-void tok_next(tokenizer *tok)
+void tok_next(tokenizer_t *tok)
 {
    CHECK(tok,);
@ -705,20 +696,18 @@ void tok_next(tokenizer *tok)
 }
-const wchar_t *tok_string(tokenizer *tok)
+const wchar_t *tok_string(tokenizer_t *tok)
 {
    return tok?tok->orig_buff:0;
 }
 wchar_t *tok_first(const wchar_t *str)
 {
    tokenizer t;
    wchar_t *res=0;
    CHECK(str, 0);
-    tok_init(&t, str, TOK_SQUASH_ERRORS);
+    tokenizer_t t(str, TOK_SQUASH_ERRORS);
    switch (tok_last_type(&t))
    {
        case TOK_STRING:
@ -733,7 +722,7 @@ wchar_t *tok_first(const wchar_t *str)
    return res;
 }
-int tok_get_pos(tokenizer *tok)
+int tok_get_pos(tokenizer_t *tok)
 {
    CHECK(tok, 0);
@ -741,7 +730,7 @@ int tok_get_pos(tokenizer *tok)
 }
-void tok_set_pos(tokenizer *tok, int pos)
+void tok_set_pos(tokenizer_t *tok, int pos)
 {
    CHECK(tok,);
--- a/tokenizer.h
+++ b/tokenizer.h
@ -61,11 +61,12 @@ enum tokenizer_error
 */
 #define TOK_SQUASH_ERRORS 4
 typedef unsigned int tok_flags_t;
 /**
   The tokenizer struct.
 */
-struct tokenizer
+struct tokenizer_t
 {
    /** A pointer into the original string, showing where the next token begins */
    const wchar_t *buff;
@ -100,62 +101,60 @@ struct tokenizer
    /** Return the line number of the character at the given offset */
    int line_number_of_character_at_offset(size_t offset);
    /**
      Constructor for a tokenizer. b is the string that is to be
      tokenized. It is not copied, and should not be freed by the caller
      until after the tokenizer is destroyed.
      \param b The string to tokenize
      \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
      to accept incomplete tokens, such as a subshell without a closing
      parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
    */
    tokenizer_t(const wchar_t *b, tok_flags_t flags);
 };
 /**
  Initialize the tokenizer. b is the string that is to be
  tokenized. It is not copied, and should not be freed by the caller
  until after the tokenizer is destroyed.
  \param tok The tokenizer to initialize
  \param b The string to tokenize
  \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
  to accept incomplete tokens, such as a subshell without a closing
  parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
 */
 void tok_init(tokenizer *tok, const wchar_t *b, int flags);
 /**
  Jump to the next token.
 */
-void tok_next(tokenizer *tok);
+void tok_next(tokenizer_t *tok);
 /**
  Returns the type of the last token. Must be one of the values in the token_type enum.
 */
-int tok_last_type(tokenizer *tok);
+int tok_last_type(tokenizer_t *tok);
 /**
  Returns the last token string. The string should not be freed by the caller.
 */
-wchar_t *tok_last(tokenizer *tok);
+wchar_t *tok_last(tokenizer_t *tok);
 /**
  Returns the type of quote from the last TOK_QSTRING
 */
-wchar_t tok_last_quote(tokenizer *tok);
+wchar_t tok_last_quote(tokenizer_t *tok);
 /**
  Returns true as long as there are more tokens left
 */
-int tok_has_next(tokenizer *tok);
+int tok_has_next(tokenizer_t *tok);
 /**
  Returns the position of the beginning of the current token in the original string
 */
-int tok_get_pos(tokenizer *tok);
+int tok_get_pos(tokenizer_t *tok);
 /**
   Destroy the tokenizer and free asociated memory
 */
-void tok_destroy(tokenizer *tok);
+void tok_destroy(tokenizer_t *tok);
 /**
   Returns the original string to tokenizer
 */
-const wchar_t *tok_string(tokenizer *tok);
+const wchar_t *tok_string(tokenizer_t *tok);
 /**
@ -178,7 +177,7 @@ bool tok_is_string_character(wchar_t c, bool is_first);
 /**
   Move tokenizer position
 */
-void tok_set_pos(tokenizer *tok, int pos);
+void tok_set_pos(tokenizer_t *tok, int pos);
 /**
   Returns a string description of the specified token type
@ -188,7 +187,7 @@ const wchar_t *tok_get_desc(int type);
 /**
   Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR.
 */
-int tok_get_error(tokenizer *tok);
+int tok_get_error(tokenizer_t *tok);
 #endif