From f545fb2491d36e02dc6c4005ada86be8678bba78 Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Wed, 21 Nov 2012 17:48:35 -0800
Subject: [PATCH] Work towards refactoring tokenizer to be a real object

---
 builtin_commandline.cpp |   6 +--
 complete.cpp            |   6 +--
 fish_indent.cpp         |   4 +-
 fish_tests.cpp          |  13 ++---
 highlight.cpp           |  10 ++--
 history.cpp             |   6 +--
 parse_util.cpp          |  18 ++-----
 parser.cpp              |  54 ++++++++------------
 parser.h                |  10 ++--
 reader.cpp              |   7 +--
 tokenizer.cpp           | 109 ++++++++++++++++++----------------------
 tokenizer.h             |  49 +++++++++---------
 12 files changed, 123 insertions(+), 169 deletions(-)

diff --git a/builtin_commandline.cpp b/builtin_commandline.cpp
index cb3ee9e3e..07dc2c969 100644
--- a/builtin_commandline.cpp
+++ b/builtin_commandline.cpp
@@ -143,7 +143,6 @@ static void write_part(const wchar_t *begin,
                        int cut_at_cursor,
                        int tokenize)
 {
-    tokenizer tok;
     wcstring out;
     wchar_t *buff;
     size_t pos;
@@ -155,9 +154,8 @@ static void write_part(const wchar_t *begin,
         buff = wcsndup(begin, end-begin);
 //    fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
         out.clear();
-
-        for (tok_init(&tok, buff, TOK_ACCEPT_UNFINISHED);
-                tok_has_next(&tok);
+        tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
+        for (; tok_has_next(&tok);
                 tok_next(&tok))
         {
             if ((cut_at_cursor) &&
diff --git a/complete.cpp b/complete.cpp
index ce0944854..c21beafab 100644
--- a/complete.cpp
+++ b/complete.cpp
@@ -1765,7 +1765,6 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
     completer_t completer(cmd, type);
 
     const wchar_t *tok_begin, *tok_end, *cmdsubst_begin, *cmdsubst_end, *prev_begin, *prev_end;
-    tokenizer tok;
     const wchar_t *current_token=0, *prev_token=0;
     wcstring current_command;
     int on_command=0;
@@ -1807,9 +1806,8 @@ void complete(const wcstring &cmd, std::vector<completion_t> &comps, complete_ty
 
         int had_cmd=0;
         int end_loop=0;
-
-        tok_init(&tok, buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-
+        
+        tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
         while (tok_has_next(&tok) && !end_loop)
         {
 
diff --git a/fish_indent.cpp b/fish_indent.cpp
index dedf11f27..c50eeba65 100644
--- a/fish_indent.cpp
+++ b/fish_indent.cpp
@@ -84,7 +84,6 @@ static void insert_tabs(wcstring &out, int indent)
  */
 static int indent(wcstring &out, const wcstring &in, int flags)
 {
-    tokenizer tok;
     int res=0;
     int is_command = 1;
     int indent = 0;
@@ -92,8 +91,7 @@ static int indent(wcstring &out, const wcstring &in, int flags)
     int prev_type = 0;
     int prev_prev_type = 0;
 
-    tok_init(&tok, in.c_str(), TOK_SHOW_COMMENTS);
-
+    tokenizer_t tok(in.c_str(), TOK_SHOW_COMMENTS);
     for (; tok_has_next(&tok); tok_next(&tok))
     {
         int type = tok_last_type(&tok);
diff --git a/fish_tests.cpp b/fish_tests.cpp
index 3a1b382db..a2ac00c47 100644
--- a/fish_tests.cpp
+++ b/fish_tests.cpp
@@ -289,13 +289,12 @@ static void test_convert()
 */
 static void test_tok()
 {
-    tokenizer t;
 
     say(L"Testing tokenizer");
 
 
     say(L"Testing invalid input");
-    tok_init(&t, 0, 0);
+    tokenizer_t t(NULL, 0);
 
     if (tok_last_type(&t) != TOK_ERROR)
     {
@@ -326,14 +325,12 @@ static void test_tok()
         const int types[] =
         {
             TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END
-        }
-        ;
-        size_t i;
+        };
 
         say(L"Test correct tokenization");
-
-        for (i=0, tok_init(&t, str, 0); i<(sizeof(types)/sizeof(int)); i++,tok_next(&t))
-        {
+        
+        tokenizer_t t(str, 0);
+        for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t)) {
             if (types[i] != tok_last_type(&t))
             {
                 err(L"Tokenization error:");
diff --git a/highlight.cpp b/highlight.cpp
index 543f33aaf..77acd8dad 100644
--- a/highlight.cpp
+++ b/highlight.cpp
@@ -691,8 +691,8 @@ static bool autosuggest_parse_command(const wcstring &str, wcstring *out_command
     int arg_pos = -1;
 
     bool had_cmd = false;
-    tokenizer tok;
-    for (tok_init(&tok, str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS); tok_has_next(&tok); tok_next(&tok))
+    tokenizer_t tok(str.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tok); tok_next(&tok))
     {
         int last_type = tok_last_type(&tok);
 
@@ -955,10 +955,8 @@ static void tokenize(const wchar_t * const buff, std::vector<int> &color, const
 
     std::fill(color.begin(), color.end(), -1);
 
-    tokenizer tok;
-    for (tok_init(&tok, buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
-            tok_has_next(&tok);
-            tok_next(&tok))
+    tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tok); tok_next(&tok))
     {
         int last_type = tok_last_type(&tok);
 
diff --git a/history.cpp b/history.cpp
index dd12eb95b..0a87e8b9e 100644
--- a/history.cpp
+++ b/history.cpp
@@ -1415,10 +1415,8 @@ void history_t::add_with_file_detection(const wcstring &str)
     ASSERT_IS_MAIN_THREAD();
     path_list_t potential_paths;
 
-    tokenizer tokenizer;
-    for (tok_init(&tokenizer, str.c_str(), TOK_SQUASH_ERRORS);
-            tok_has_next(&tokenizer);
-            tok_next(&tokenizer))
+    tokenizer_t tokenizer(str.c_str(), TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tokenizer); tok_next(&tokenizer))
     {
         int type = tok_last_type(&tokenizer);
         if (type == TOK_STRING)
diff --git a/parse_util.cpp b/parse_util.cpp
index 711b22e2c..10072ec86 100644
--- a/parse_util.cpp
+++ b/parse_util.cpp
@@ -326,8 +326,6 @@ static void job_or_process_extent(const wchar_t *buff,
     wchar_t *buffcpy;
     int finished=0;
 
-    tokenizer tok;
-
     CHECK(buff,);
 
     if (a)
@@ -365,9 +363,8 @@ static void job_or_process_extent(const wchar_t *buff,
         DIE_MEM();
     }
 
-    for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED);
-            tok_has_next(&tok) && !finished;
-            tok_next(&tok))
+    tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
+    for (; tok_has_next(&tok) && !finished; tok_next(&tok))
     {
         int tok_begin = tok_get_pos(&tok);
 
@@ -440,8 +437,6 @@ void parse_util_token_extent(const wchar_t *buff,
     long pos;
     wchar_t *buffcpy;
 
-    tokenizer tok;
-
     const wchar_t *a = NULL, *b = NULL, *pa = NULL, *pb = NULL;
 
     CHECK(buff,);
@@ -474,9 +469,8 @@ void parse_util_token_extent(const wchar_t *buff,
         DIE_MEM();
     }
 
-    for (tok_init(&tok, buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-            tok_has_next(&tok);
-            tok_next(&tok))
+    tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
+    for (; tok_has_next(&tok); tok_next(&tok))
     {
         size_t tok_begin = tok_get_pos(&tok);
         size_t tok_end = tok_begin;
@@ -711,9 +705,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
     wchar_t last_quote = '\0';
     int unfinished;
 
-    tokenizer tok;
-    tok_init(&tok, cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
-
+    tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
     for (; tok_has_next(&tok); tok_next(&tok))
     {
         if (tok_get_pos(&tok) > pos)
diff --git a/parser.cpp b/parser.cpp
index 6713fddf0..8a1d29846 100644
--- a/parser.cpp
+++ b/parser.cpp
@@ -525,7 +525,6 @@ static int parser_is_pipe_forbidden(const wcstring &word)
 */
 static const wchar_t *parser_find_end(const wchar_t * buff)
 {
-    tokenizer tok;
     int had_cmd=0;
     int count = 0;
     int error=0;
@@ -533,9 +532,8 @@ static const wchar_t *parser_find_end(const wchar_t * buff)
 
     CHECK(buff, 0);
 
-    for (tok_init(&tok, buff, 0);
-            tok_has_next(&tok) && !error;
-            tok_next(&tok))
+    tokenizer_t tok(buff, 0);
+    for (; tok_has_next(&tok) && !error; tok_next(&tok))
     {
         int last_type = tok_last_type(&tok);
         switch (last_type)
@@ -796,7 +794,6 @@ void parser_t::print_errors_stderr()
 
 int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
 {
-    tokenizer tok;
 
     expand_flags_t eflags = 0;
     if (! show_errors)
@@ -808,8 +805,8 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
       eval_args may be called while evaulating another command, so we
       save the previous tokenizer and restore it on exit
     */
-    tokenizer *previous_tokenizer=current_tokenizer;
-    int previous_pos=current_tokenizer_pos;
+    tokenizer_t * const previous_tokenizer = current_tokenizer;
+    const int previous_pos = current_tokenizer_pos;
     int do_loop=1;
 
     CHECK(line, 1);
@@ -819,10 +816,10 @@ int parser_t::eval_args(const wchar_t *line, std::vector<completion_t> &args)
     if (this->parser_type == PARSER_TYPE_GENERAL)
         proc_push_interactive(0);
 
+    tokenizer_t tok(line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
     current_tokenizer = &tok;
     current_tokenizer_pos = 0;
 
-    tok_init(&tok, line, (show_errors ? 0 : TOK_SQUASH_ERRORS));
     error_code=0;
 
     for (; do_loop && tok_has_next(&tok) ; tok_next(&tok))
@@ -1319,7 +1316,7 @@ job_t *parser_t::job_get_from_pid(int pid)
 */
 void parser_t::parse_job_argument_list(process_t *p,
                                        job_t *j,
-                                       tokenizer *tok,
+                                       tokenizer_t *tok,
                                        std::vector<completion_t> &args,
                                        bool unskip)
 {
@@ -1718,7 +1715,7 @@ f
 */
 int parser_t::parse_job(process_t *p,
                         job_t *j,
-                        tokenizer *tok)
+                        tokenizer_t *tok)
 {
     std::vector<completion_t> args; // The list that will become the argc array for the program
     int use_function = 1;   // May functions be considered when checking what action this command represents
@@ -2185,7 +2182,6 @@ int parser_t::parse_job(process_t *p,
 
         const wchar_t *end=parser_find_end(tok_string(tok) +
                                            current_tokenizer_pos);
-        tokenizer subtok;
         int make_sub_block = j->first_process != p;
 
         if (!end)
@@ -2202,9 +2198,8 @@ int parser_t::parse_job(process_t *p,
             {
                 int done=0;
 
-                for (tok_init(&subtok, end, 0);
-                        !done && tok_has_next(&subtok);
-                        tok_next(&subtok))
+                tokenizer_t subtok(end, 0);
+                for (; ! done && tok_has_next(&subtok); tok_next(&subtok))
                 {
 
                     switch (tok_last_type(&subtok))
@@ -2388,7 +2383,7 @@ static bool job_should_skip_elseif(const job_t *job, const block_t *current_bloc
    \param tok The tokenizer to read tokens from
 */
 
-void parser_t::eval_job(tokenizer *tok)
+void parser_t::eval_job(tokenizer_t *tok)
 {
     ASSERT_IS_MAIN_THREAD();
     job_t *j;
@@ -2630,7 +2625,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
     const wchar_t * const cmd = cmdStr.c_str();
     size_t forbid_count;
     int code;
-    tokenizer *previous_tokenizer=current_tokenizer;
+    tokenizer_t *previous_tokenizer=current_tokenizer;
     block_t *start_current_block = current_block;
 
     /* Record the current chain so we can put it back later */
@@ -2676,8 +2671,7 @@ int parser_t::eval(const wcstring &cmdStr, const io_chain_t &io, enum block_type
 
     this->push_block(new scope_block_t(block_type));
 
-    current_tokenizer = new tokenizer;
-    tok_init(current_tokenizer, cmd, 0);
+    current_tokenizer = new tokenizer_t(cmd, 0);
 
     error_code = 0;
 
@@ -2907,19 +2901,17 @@ int parser_t::parser_test_argument(const wchar_t *arg, wcstring *out, const wcha
 
 int parser_t::test_args(const  wchar_t * buff, wcstring *out, const wchar_t *prefix)
 {
-    tokenizer tok;
-    tokenizer *previous_tokenizer = current_tokenizer;
-    int previous_pos = current_tokenizer_pos;
+    tokenizer_t *const previous_tokenizer = current_tokenizer;
+    const int previous_pos = current_tokenizer_pos;
     int do_loop = 1;
     int err = 0;
 
     CHECK(buff, 1);
 
-    current_tokenizer = &tok;
 
-    for (tok_init(&tok, buff, 0);
-            do_loop && tok_has_next(&tok);
-            tok_next(&tok))
+    tokenizer_t tok(buff, 0);
+    current_tokenizer = &tok;
+    for (; do_loop && tok_has_next(&tok); tok_next(&tok))
     {
         current_tokenizer_pos = tok_get_pos(&tok);
         switch (tok_last_type(&tok))
@@ -2970,7 +2962,7 @@ int parser_t::test_args(const  wchar_t * buff, wcstring *out, const wchar_t *pre
 
     tok_destroy(&tok);
 
-    current_tokenizer=previous_tokenizer;
+    current_tokenizer = previous_tokenizer;
     current_tokenizer_pos = previous_pos;
 
     error_code=0;
@@ -2985,7 +2977,6 @@ int parser_t::test(const  wchar_t * buff,
 {
     ASSERT_IS_MAIN_THREAD();
 
-    tokenizer tok;
     /*
        Set to one if a command name has been given for the currently
        parsed process specification
@@ -2994,8 +2985,8 @@ int parser_t::test(const  wchar_t * buff,
     int err=0;
     int unfinished = 0;
 
-    tokenizer *previous_tokenizer=current_tokenizer;
-    int previous_pos=current_tokenizer_pos;
+    tokenizer_t * const previous_tokenizer=current_tokenizer;
+    const int previous_pos=current_tokenizer_pos;
 
     int block_pos[BLOCK_MAX_COUNT] = {};
     block_type_t block_type[BLOCK_MAX_COUNT] = {};
@@ -3043,11 +3034,10 @@ int parser_t::test(const  wchar_t * buff,
 
     }
 
+    tokenizer_t tok(buff, 0);
     current_tokenizer = &tok;
 
-    for (tok_init(&tok, buff, 0);
-            ;
-            tok_next(&tok))
+    for (;; tok_next(&tok))
     {
         current_tokenizer_pos = tok_get_pos(&tok);
 
diff --git a/parser.h b/parser.h
index e86539836..751182c35 100644
--- a/parser.h
+++ b/parser.h
@@ -295,7 +295,7 @@ struct profile_item_t
     wcstring cmd;
 };
 
-struct tokenizer;
+struct tokenizer_t;
 
 class parser_t
 {
@@ -316,7 +316,7 @@ private:
     wcstring err_buff;
 
     /** Pointer to the current tokenizer */
-    tokenizer *current_tokenizer;
+    tokenizer_t *current_tokenizer;
 
     /** String for representing the current line */
     wcstring lineinfo;
@@ -344,10 +344,10 @@ private:
     parser_t(const parser_t&);
     parser_t& operator=(const parser_t&);
 
-    void parse_job_argument_list(process_t *p, job_t *j, tokenizer *tok, std::vector<completion_t>&, bool);
-    int parse_job(process_t *p, job_t *j, tokenizer *tok);
+    void parse_job_argument_list(process_t *p, job_t *j, tokenizer_t *tok, std::vector<completion_t>&, bool);
+    int parse_job(process_t *p, job_t *j, tokenizer_t *tok);
     void skipped_exec(job_t * j);
-    void eval_job(tokenizer *tok);
+    void eval_job(tokenizer_t *tok);
     int parser_test_argument(const wchar_t *arg, wcstring *out, const wchar_t *prefix, int offset);
     void print_errors(wcstring &target, const wchar_t *prefix);
     void print_errors_stderr();
diff --git a/reader.cpp b/reader.cpp
index 5548dabfb..df39f3a29 100644
--- a/reader.cpp
+++ b/reader.cpp
@@ -1821,7 +1821,6 @@ static void handle_token_history(int forward, int reset)
 
     const wchar_t *str=0;
     long current_pos;
-    tokenizer tok;
 
     if (reset)
     {
@@ -1895,10 +1894,8 @@ static void handle_token_history(int forward, int reset)
         {
 
             //debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
-
-            for (tok_init(&tok, data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
-                    tok_has_next(&tok);
-                    tok_next(&tok))
+            tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
+            for (; tok_has_next(&tok); tok_next(&tok))
             {
                 switch (tok_last_type(&tok))
                 {
diff --git a/tokenizer.cpp b/tokenizer.cpp
index e5f131f8e..1a59820e7 100644
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
@@ -83,7 +83,7 @@ static const wchar_t *tok_desc[] =
 
    \return 0 if the system could not provide the memory needed, and 1 otherwise.
 */
-static int check_size(tokenizer *tok, size_t len)
+static int check_size(tokenizer_t *tok, size_t len)
 {
     if (tok->last_len <= len)
     {
@@ -103,7 +103,7 @@ static int check_size(tokenizer *tok, size_t len)
 /**
    Set the latest tokens string to be the specified error message
 */
-static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_message)
+static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
 {
     tok->last_type = TOK_ERROR;
     tok->error = error_type;
@@ -117,13 +117,13 @@ static void tok_call_error(tokenizer *tok, int error_type, const wchar_t *error_
     wcscpy(tok->last, error_message);
 }
 
-int tok_get_error(tokenizer *tok)
+int tok_get_error(tokenizer_t *tok)
 {
     return tok->error;
 }
 
 
-void tok_init(tokenizer *tok, const wchar_t *b, int flags)
+tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last(NULL), last_type(0), last_len(0), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), last_quote(0), error(0), squash_errors(false), cached_lineno_offset(0), cached_lineno_count(0)
 {
 
     /* We can only generate error messages on the main thread due to wgettext() thread safety issues. */
@@ -132,33 +132,28 @@ void tok_init(tokenizer *tok, const wchar_t *b, int flags)
         ASSERT_IS_MAIN_THREAD();
     }
 
-    CHECK(tok,);
-
-    memset(tok, 0, sizeof(tokenizer));
-
     CHECK(b,);
 
 
-    tok->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
-    tok->show_comments = !!(flags & TOK_SHOW_COMMENTS);
-    tok->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
-    tok->has_next=true;
+    this->accept_unfinished = !!(flags & TOK_ACCEPT_UNFINISHED);
+    this->show_comments = !!(flags & TOK_SHOW_COMMENTS);
+    this->squash_errors = !!(flags & TOK_SQUASH_ERRORS);
 
-    tok->has_next = (*b != L'\0');
-    tok->orig_buff = tok->buff = b;
-    tok->cached_lineno_offset = 0;
-    tok->cached_lineno_count = 0;
-    tok_next(tok);
+    this->has_next = (*b != L'\0');
+    this->orig_buff = this->buff = b;
+    this->cached_lineno_offset = 0;
+    this->cached_lineno_count = 0;
+    tok_next(this);
 }
 
-void tok_destroy(tokenizer *tok)
+void tok_destroy(tokenizer_t *tok)
 {
     CHECK(tok,);
 
     free(tok->last);
 }
 
-int tok_last_type(tokenizer *tok)
+int tok_last_type(tokenizer_t *tok)
 {
     CHECK(tok, TOK_ERROR);
     CHECK(tok->buff, TOK_ERROR);
@@ -166,14 +161,14 @@ int tok_last_type(tokenizer *tok)
     return tok->last_type;
 }
 
-wchar_t *tok_last(tokenizer *tok)
+wchar_t *tok_last(tokenizer_t *tok)
 {
     CHECK(tok, 0);
 
     return tok->last;
 }
 
-int tok_has_next(tokenizer *tok)
+int tok_has_next(tokenizer_t *tok)
 {
     /*
       Return 1 on broken tokenizer
@@ -185,7 +180,7 @@ int tok_has_next(tokenizer *tok)
     return   tok->has_next;
 }
 
-int tokenizer::line_number_of_character_at_offset(size_t offset)
+int tokenizer_t::line_number_of_character_at_offset(size_t offset)
 {
     // we want to return (one plus) the number of newlines at offsets less than the given offset
     // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset
@@ -265,24 +260,28 @@ static int myal(wchar_t c)
 /**
    Read the next token as a string
 */
-static void read_string(tokenizer *tok)
+static void read_string(tokenizer_t *tok)
 {
     const wchar_t *start;
     long len;
-    int mode=0;
     int do_loop=1;
     int paran_count=0;
 
     start = tok->buff;
     bool is_first = true;
 
+    enum tok_mode_t {
+        mode_regular_text = 0, // regular text
+        mode_subshell = 1, // inside of subshell
+        mode_array_brackets = 2, // inside of array brackets
+        mode_array_brackets_and_subshell = 3 // inside of array brackets and subshell, like in '$foo[(ech'
+    } mode = mode_regular_text;
+
     while (1)
     {
 
         if (!myal(*tok->buff))
         {
-//      debug(1, L"%lc", *tok->buff );
-
             if (*tok->buff == L'\\')
             {
                 tok->buff++;
@@ -296,13 +295,13 @@ static void read_string(tokenizer *tok)
                     else
                     {
                         /* Since we are about to increment tok->buff, decrement it first so the increment doesn't go past the end of the buffer. https://github.com/fish-shell/fish-shell/issues/389 */
-                        do_loop = 0;
                         tok->buff--;
+                        do_loop = 0;
                     }
 
 
                 }
-                else if (*tok->buff == L'\n' && mode == 0)
+                else if (*tok->buff == L'\n' && mode == mode_regular_text)
                 {
                     tok->buff--;
                     do_loop = 0;
@@ -312,33 +311,24 @@ static void read_string(tokenizer *tok)
                 tok->buff++;
                 continue;
             }
-
-
-            /*
-              The modes are as follows:
-
-              0: regular text
-              1: inside of subshell
-              2: inside of array brackets
-              3: inside of array brackets and subshell, like in '$foo[(ech'
-            */
+            
             switch (mode)
             {
-                case 0:
+                case mode_regular_text:
                 {
                     switch (*tok->buff)
                     {
                         case L'(':
                         {
                             paran_count=1;
-                            mode = 1;
+                            mode = mode_subshell;
                             break;
                         }
 
                         case L'[':
                         {
                             if (tok->buff != start)
-                                mode=2;
+                                mode = mode_array_brackets;
                             break;
                         }
 
@@ -356,7 +346,7 @@ static void read_string(tokenizer *tok)
                             {
                                 tok->buff += wcslen(tok->buff);
 
-                                if ((!tok->accept_unfinished))
+                                if (! tok->accept_unfinished)
                                 {
                                     TOK_CALL_ERROR(tok, TOK_UNTERMINATED_QUOTE, QUOTE_ERROR);
                                     return;
@@ -369,7 +359,7 @@ static void read_string(tokenizer *tok)
 
                         default:
                         {
-                            if (!tok_is_string_character(*(tok->buff), is_first))
+                            if (! tok_is_string_character(*(tok->buff), is_first))
                             {
                                 do_loop=0;
                             }
@@ -378,8 +368,8 @@ static void read_string(tokenizer *tok)
                     break;
                 }
 
-                case 3:
-                case 1:
+                case mode_array_brackets_and_subshell:
+                case mode_subshell:
                     switch (*tok->buff)
                     {
                         case L'\'':
@@ -411,7 +401,7 @@ static void read_string(tokenizer *tok)
                             paran_count--;
                             if (paran_count == 0)
                             {
-                                mode--;
+                                mode = (mode == mode_array_brackets_and_subshell ? mode_array_brackets : mode_regular_text);
                             }
                             break;
                         case L'\0':
@@ -419,16 +409,17 @@ static void read_string(tokenizer *tok)
                             break;
                     }
                     break;
-                case 2:
+                    
+                case mode_array_brackets:
                     switch (*tok->buff)
                     {
                         case L'(':
                             paran_count=1;
-                            mode = 3;
+                            mode = mode_array_brackets_and_subshell;
                             break;
 
                         case L']':
-                            mode=0;
+                            mode = mode_regular_text;
                             break;
 
                         case L'\0':
@@ -447,7 +438,7 @@ static void read_string(tokenizer *tok)
         is_first = false;
     }
 
-    if ((!tok->accept_unfinished) && (mode!=0))
+    if ((!tok->accept_unfinished) && (mode != mode_regular_text))
     {
         TOK_CALL_ERROR(tok, TOK_UNTERMINATED_SUBSHELL, PARAN_ERROR);
         return;
@@ -467,7 +458,7 @@ static void read_string(tokenizer *tok)
 /**
    Read the next token as a comment.
 */
-static void read_comment(tokenizer *tok)
+static void read_comment(tokenizer_t *tok)
 {
     const wchar_t *start;
 
@@ -487,7 +478,7 @@ static void read_comment(tokenizer *tok)
 /**
    Read a FD redirection.
 */
-static void read_redirect(tokenizer *tok, int fd)
+static void read_redirect(tokenizer_t *tok, int fd)
 {
     int mode = -1;
 
@@ -552,7 +543,7 @@ static void read_redirect(tokenizer *tok, int fd)
     }
 }
 
-wchar_t tok_last_quote(tokenizer *tok)
+wchar_t tok_last_quote(tokenizer_t *tok)
 {
     CHECK(tok, 0);
 
@@ -582,7 +573,7 @@ const wchar_t *tok_get_desc(int type)
 }
 
 
-void tok_next(tokenizer *tok)
+void tok_next(tokenizer_t *tok)
 {
 
     CHECK(tok,);
@@ -705,20 +696,18 @@ void tok_next(tokenizer *tok)
 
 }
 
-const wchar_t *tok_string(tokenizer *tok)
+const wchar_t *tok_string(tokenizer_t *tok)
 {
     return tok?tok->orig_buff:0;
 }
 
 wchar_t *tok_first(const wchar_t *str)
 {
-    tokenizer t;
     wchar_t *res=0;
 
     CHECK(str, 0);
 
-    tok_init(&t, str, TOK_SQUASH_ERRORS);
-
+    tokenizer_t t(str, TOK_SQUASH_ERRORS);
     switch (tok_last_type(&t))
     {
         case TOK_STRING:
@@ -733,7 +722,7 @@ wchar_t *tok_first(const wchar_t *str)
     return res;
 }
 
-int tok_get_pos(tokenizer *tok)
+int tok_get_pos(tokenizer_t *tok)
 {
     CHECK(tok, 0);
 
@@ -741,7 +730,7 @@ int tok_get_pos(tokenizer *tok)
 }
 
 
-void tok_set_pos(tokenizer *tok, int pos)
+void tok_set_pos(tokenizer_t *tok, int pos)
 {
     CHECK(tok,);
 
diff --git a/tokenizer.h b/tokenizer.h
index ae6b6ecc9..4d4deacca 100644
--- a/tokenizer.h
+++ b/tokenizer.h
@@ -61,11 +61,12 @@ enum tokenizer_error
 */
 #define TOK_SQUASH_ERRORS 4
 
+typedef unsigned int tok_flags_t;
 
 /**
    The tokenizer struct.
 */
-struct tokenizer
+struct tokenizer_t
 {
     /** A pointer into the original string, showing where the next token begins */
     const wchar_t *buff;
@@ -100,62 +101,60 @@ struct tokenizer
     /** Return the line number of the character at the given offset */
     int line_number_of_character_at_offset(size_t offset);
 
+    /**
+      Constructor for a tokenizer. b is the string that is to be
+      tokenized. It is not copied, and should not be freed by the caller
+      until after the tokenizer is destroyed.
+
+      \param b The string to tokenize
+      \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
+      to accept incomplete tokens, such as a subshell without a closing
+      parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
+
+    */
+    tokenizer_t(const wchar_t *b, tok_flags_t flags);
 };
 
-/**
-  Initialize the tokenizer. b is the string that is to be
-  tokenized. It is not copied, and should not be freed by the caller
-  until after the tokenizer is destroyed.
-
-  \param tok The tokenizer to initialize
-  \param b The string to tokenize
-  \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
-  to accept incomplete tokens, such as a subshell without a closing
-  parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
-
-*/
-void tok_init(tokenizer *tok, const wchar_t *b, int flags);
-
 /**
   Jump to the next token.
 */
-void tok_next(tokenizer *tok);
+void tok_next(tokenizer_t *tok);
 
 /**
   Returns the type of the last token. Must be one of the values in the token_type enum.
 */
-int tok_last_type(tokenizer *tok);
+int tok_last_type(tokenizer_t *tok);
 
 /**
   Returns the last token string. The string should not be freed by the caller.
 */
-wchar_t *tok_last(tokenizer *tok);
+wchar_t *tok_last(tokenizer_t *tok);
 
 /**
   Returns the type of quote from the last TOK_QSTRING
 */
-wchar_t tok_last_quote(tokenizer *tok);
+wchar_t tok_last_quote(tokenizer_t *tok);
 
 /**
   Returns true as long as there are more tokens left
 */
-int tok_has_next(tokenizer *tok);
+int tok_has_next(tokenizer_t *tok);
 
 /**
   Returns the position of the beginning of the current token in the original string
 */
-int tok_get_pos(tokenizer *tok);
+int tok_get_pos(tokenizer_t *tok);
 
 /**
    Destroy the tokenizer and free asociated memory
 */
-void tok_destroy(tokenizer *tok);
+void tok_destroy(tokenizer_t *tok);
 
 
 /**
    Returns the original string to tokenizer
  */
-const wchar_t *tok_string(tokenizer *tok);
+const wchar_t *tok_string(tokenizer_t *tok);
 
 
 /**
@@ -178,7 +177,7 @@ bool tok_is_string_character(wchar_t c, bool is_first);
 /**
    Move tokenizer position
 */
-void tok_set_pos(tokenizer *tok, int pos);
+void tok_set_pos(tokenizer_t *tok, int pos);
 
 /**
    Returns a string description of the specified token type
@@ -188,7 +187,7 @@ const wchar_t *tok_get_desc(int type);
 /**
    Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR.
 */
-int tok_get_error(tokenizer *tok);
+int tok_get_error(tokenizer_t *tok);
 
 
 #endif