fish-shell/src/parser_keywords.cpp
Mahmoud Al-Qudsi bc66921ac9 Optimize keyword detection
The data stored in these containers is small enough that it is worth
creating distinct sets for each lookup.

In a microbenchmark of these changes, the single-lookup version of the
function with lookups gated on the length of input (bypassed entirely if
the input is longer than the longest key in the container) provided a
1.5x-3.5x speedup over the previous implementation.

Additionally, as the collections are static and their contents are never
modified after startup, it makes no sense to continously calculate the
location of and allocate an iterator for the `!= foo.end()` comparison;
the end iterator is now statically cached.

I'm not expecting massive speed gains out of this change, but the parser
does perform enough of these to make it worth optimizing in this way.
2019-04-03 20:53:29 -05:00

90 lines
3.2 KiB
C++

// Functions having to do with parser keywords, like testing if a function is a block command.
#include "config.h" // IWYU pragma: keep
#include <string>
#include <unordered_set>
#include "common.h"
#include "fallback.h" // IWYU pragma: keep
#include "parser_keywords.h"
typedef std::unordered_set<wcstring> string_set_t;
static const wcstring skip_keywords[] {
L"else",
L"begin",
};
static const wcstring subcommand_keywords[] {
L"command", L"builtin", L"while", L"exec",
L"if", L"and", L"or", L"not"
};
static const string_set_t block_keywords = {
L"for", L"while", L"if",
L"function", L"switch", L"begin"
};
static const wcstring reserved_keywords[] = {
L"end", L"case", L"else", L"return",
L"continue", L"break", L"argparse", L"read",
L"set", L"status", L"test", L"["
};
// The lists above are purposely implemented separately from the logic below, so that future
// maintainers may assume the contents of the list based off their names, and not off what the
// functions below require them to contain.
static size_t list_max_length(const string_set_t &list) {
size_t result = 0;
for (const auto &w: list) {
if (w.length() > result) {
result = w.length();
}
}
return result;
}
bool parser_keywords_skip_arguments(const wcstring &cmd) {
return cmd == skip_keywords[0] || cmd == skip_keywords[1];
}
bool parser_keywords_is_subcommand(const wcstring &cmd) {
const static string_set_t search_list = ([](){
string_set_t results;
results.insert(std::begin(subcommand_keywords), std::end(subcommand_keywords));
results.insert(std::begin(skip_keywords), std::end(skip_keywords));
return results;
})();
const static auto max_len = list_max_length(search_list);
const static auto not_found = search_list.end();
// Everything above is executed only at startup, this is the actual optimized search routine:
return cmd.length() <= max_len && search_list.find(cmd) != not_found;
}
bool parser_keywords_is_block(const wcstring &word) {
const static auto max_len = list_max_length(block_keywords);
const static auto not_found = block_keywords.end();
// Everything above is executed only at startup, this is the actual optimized search routine:
return word.length() <= max_len && block_keywords.find(word) != not_found;
}
bool parser_keywords_is_reserved(const wcstring &word) {
const static string_set_t search_list = ([](){
string_set_t results;
results.insert(std::begin(subcommand_keywords), std::end(subcommand_keywords));
results.insert(std::begin(skip_keywords), std::end(skip_keywords));
results.insert(std::begin(block_keywords), std::end(block_keywords));
results.insert(std::begin(reserved_keywords), std::end(reserved_keywords));
return results;
})();
const static auto max_len = list_max_length(search_list);
const static auto not_found = search_list.end();
// Everything above is executed only at startup, this is the actual optimized search routine:
return word.length() <= max_len && search_list.find(word) != not_found;
}