Fix to restore an optimization from parse_util_get_line_from_offset in a more thread-safe way

2025-03-23 11:15:14 +08:00 · 2012-08-04 17:44:14 -07:00 · 2012-08-04 17:44:14 -07:00 · 682353f9cc
commit 682353f9cc
parent 25c6671a87
5 changed files with 68 additions and 21 deletions
--- a/function.cpp
+++ b/function.cpp
@ -192,7 +192,7 @@ void function_add( const function_data_t &data, const parser_t &parser )
    
    /* Create and store a new function */
    const wchar_t *filename = reader_current_filename();
-    int def_offset = parse_util_lineno( parser.get_buffer(), parser.current_block->tok_pos )-1;
+    int def_offset = parser.line_number_of_character_at_offset(parser.current_block->tok_pos) - 1;
    const function_map_t::value_type new_pair(data.name, function_info_t(data, filename, def_offset, is_autoload));
    loaded_functions.insert(new_pair);
 	
--- a/parser.cpp
+++ b/parser.cpp
@ -1009,25 +1009,14 @@ const wchar_t *parser_t::is_function() const

 int parser_t::get_lineno() const
 {
-	const wchar_t *whole_str;
-	const wchar_t *function_name;
-
 	int lineno;
-	
-/*	static const wchar_t *prev_str = 0;
-  static int i=0;
-  static int lineno=1;
-*/
-	if( !current_tokenizer )
+    
+    if( ! current_tokenizer || ! tok_string( current_tokenizer ))
 		return -1;
-	
-	whole_str = tok_string( current_tokenizer );
-
-	if( !whole_str )
-		return -1;
-	
-	lineno = parse_util_lineno( whole_str, current_tokenizer_pos );
-
+        
+    lineno = current_tokenizer->line_number_of_character_at_offset(current_tokenizer_pos);
+    
+    const wchar_t *function_name;
 	if( (function_name = is_function()) )
 	{
 		lineno += function_get_definition_offset( function_name );
@ -1036,6 +1025,16 @@ int parser_t::get_lineno() const
 	return lineno;
 }

+int parser_t::line_number_of_character_at_offset(size_t idx) const
+{
+    if( ! current_tokenizer)
+        return -1;
+    
+    int result = current_tokenizer->line_number_of_character_at_offset(idx);
+    //assert(result == parse_util_lineno(tok_string( current_tokenizer ), idx));
+    return result;
+}
+
 const wchar_t *parser_t::current_filename() const
 {
    /* We query a global array for the current file name, so it only makes sense to ask this on the principal parser. */
@ -2536,7 +2535,7 @@ int parser_t::eval( const wcstring &cmdStr, io_data_t *io, enum block_type_t blo

 	this->push_block( block_type );

-	current_tokenizer = (tokenizer *)malloc( sizeof(tokenizer));
+	current_tokenizer = new tokenizer;
 	tok_init( current_tokenizer, cmd, 0 );

 	error_code = 0;
@ -2588,7 +2587,7 @@ int parser_t::eval( const wcstring &cmdStr, io_data_t *io, enum block_type_t blo
 	this->print_errors_stderr();

 	tok_destroy( current_tokenizer );
-	free( current_tokenizer );
+	delete current_tokenizer;

    while (forbidden_function.size() > forbid_count)
 		parser_t::allow_function();
--- a/parser.h
+++ b/parser.h
@ -392,6 +392,9 @@ class parser_t {

    /** Returns the current line number */
    int get_lineno() const;
+    
+    /** Returns the line number for the character at the given index */
+    int line_number_of_character_at_offset(size_t idx) const;

    /** Returns the current position in the latest string of the tokenizer. */
    int get_pos() const;
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
@ -145,6 +145,8 @@ void tok_init( tokenizer *tok, const wchar_t *b, int flags )

 	tok->has_next = (*b != L'\0');
 	tok->orig_buff = tok->buff = b;
+    tok->cached_lineno_offset = 0;
+    tok->cached_lineno_count = 0;
 	tok_next( tok );
 }

@ -182,6 +184,42 @@ int tok_has_next( tokenizer *tok )
 	return 	tok->has_next;
 }

+int tokenizer::line_number_of_character_at_offset(size_t offset)
+{
+    // we want to return (one plus) the number of newlines at offsets less than the given offset
+    // cached_lineno_count is the number of newlines at indexes less than cached_lineno_offset
+    const wchar_t *str = orig_buff;
+	if (! str)
+		return 0;
+    
+    // easy hack to handle 0
+    if (offset == 0)
+        return 1;
+    
+    size_t i;
+    if (offset > cached_lineno_offset)
+    {
+        for ( i = cached_lineno_offset; str[i] && i<offset; i++)
+        {
+            /* Add one for every newline we find in the range [cached_lineno_offset, offset) */
+            if( str[i] == L'\n' )
+                cached_lineno_count++;
+        }
+        cached_lineno_offset = i; //note: i, not offset, in case offset is beyond the length of the string
+    }
+    else if (offset < cached_lineno_offset)
+    {
+        /* Subtract one for every newline we find in the range [offset, cached_lineno_offset) */
+        for (i = offset; i < cached_lineno_offset; i++)
+        {
+            if (str[i] == L'\n')
+                cached_lineno_count--;
+        }
+        cached_lineno_offset = offset;
+    }
+	return cached_lineno_count + 1;
+}
+
 /**
   Tests if this character can be a part of a string. The redirect ^ is allowed unless it's the first character.
 */
--- a/tokenizer.h
+++ b/tokenizer.h
@ -90,9 +90,16 @@ struct tokenizer
 	wchar_t last_quote;
 	/** Last error */
 	int error;
-    
    /* Whether we are squashing errors */
    bool squash_errors;
+
+    /* Cached line number information */
+    size_t cached_lineno_offset;
+    int cached_lineno_count;
+
+    /** Return the line number of the character at the given offset */
+    int line_number_of_character_at_offset(size_t offset);
+
 };

 /**