Using XXHash64 for all wcstring unordered_map/set hashing

Since we are including XXHash32/64 anyway for the wchar_t* hashing,
we might as well use it.

Use arch-specific hash size and xxhash for all wcstring hashing

Instead of using XXHash64 for all platforms, use the 32-bit version
when running on 32-bit platforms where XXHash64 is significantly slower
than XXHash32 (and the additional precision will not be used).

Additionally, manually specify wcstring_hash as hashing method for
non-const wcstring unordered_set/map instances (the const varieties
don't have an in-library hash and so already use our xxhash-based
specialization when calling std::hash<const wcstring>).
This commit is contained in:
Mahmoud Al-Qudsi 2017-08-19 13:47:16 -05:00
parent d9f901f36d
commit d54fbddb11
14 changed files with 35 additions and 34 deletions

View File

@ -55,7 +55,7 @@ class autoload_t : public lru_cache_t<autoload_t, autoload_function_t> {
wcstring_list_t last_path_tokenized;
/// A table containing all the files that are currently being loaded.
/// This is here to help prevent recursion.
std::unordered_set<wcstring> is_loading_set;
std::unordered_set<wcstring, wcstring_hash> is_loading_set;
// Function invoked when a command is removed
typedef void (*command_removed_function_t)(const wcstring &);
const command_removed_function_t command_removed;

View File

@ -66,7 +66,7 @@ class argparse_cmd_opts_t {
wcstring_list_t raw_exclusive_flags;
wcstring_list_t argv;
std::unordered_map<wchar_t, option_spec_t *> options;
std::unordered_map<wcstring, wchar_t> long_to_short_flag;
std::unordered_map<wcstring, wchar_t, wcstring_hash> long_to_short_flag;
std::vector<std::vector<wchar_t>> exclusive_flag_sets;
~argparse_cmd_opts_t() {

View File

@ -836,15 +836,22 @@ enum {
// Custom hash function used by unordered_map/unordered_set when key is const
#ifndef CONST_WCSTRING_HASH
#define CONST_WCSTRING_HASH 1
#include "xxhash32.h"
#include "xxhash64.h"
inline size_t xxhash(const void *t, size_t size) {
#if __SIZEOF_POINTER__ == __SIZEOF_INT__
return XXHash32::hash(t, size, 0);
#else
return XXHash64::hash(t, size, 0);
}
struct wcstring_hash {
size_t operator()(const wcstring &w) const { return xxhash(w.c_str(), w.size()); }
};
namespace std {
template <>
struct hash<const wcstring>
{
std::size_t operator()(const wcstring& w) const
{
std::hash<wcstring> hasher;
return hasher((wcstring) w);
}
};
template <>
struct hash<const wcstring> {
std::size_t operator()(const wcstring &w) const { return xxhash(w.c_str(), w.size()); }
};
}
#endif
#endif

View File

@ -162,7 +162,7 @@ namespace std {
template<>
struct hash<completion_entry_t> {
size_t operator()(const completion_entry_t &c) const {
std::hash<wcstring> hasher;
wcstring_hash hasher;
return hasher((wcstring) c.cmd);
}
};
@ -297,7 +297,7 @@ class completer_t {
/// Table of completions conditions that have already been tested and the corresponding test
/// results.
typedef std::unordered_map<wcstring, bool> condition_cache_t;
typedef std::unordered_map<wcstring, bool, wcstring_hash> condition_cache_t;
condition_cache_t condition_cache;
enum complete_type_t { COMPLETE_DEFAULT, COMPLETE_AUTOSUGGEST };
@ -600,7 +600,7 @@ void completer_t::complete_cmd_desc(const wcstring &str) {
wcstring lookup_cmd(L"__fish_describe_command ");
lookup_cmd.append(escape_string(cmd_start, 1));
std::unordered_map<wcstring, wcstring> lookup;
std::unordered_map<wcstring, wcstring, wcstring_hash> lookup;
// First locate a list of possible descriptions using a single call to apropos or a direct
// search if we know the location of the whatis database. This can take some time on slower
@ -1557,7 +1557,7 @@ wcstring complete_print() {
/// Completion "wrapper" support. The map goes from wrapping-command to wrapped-command-list.
static std::mutex wrapper_lock;
typedef std::unordered_map<wcstring, wcstring_list_t> wrapper_map_t;
typedef std::unordered_map<wcstring, wcstring_list_t, wcstring_hash> wrapper_map_t;
static wrapper_map_t &wrap_map() {
ASSERT_IS_LOCKED(wrapper_lock);
// A pointer is a little more efficient than an object as a static because we can elide the
@ -1614,7 +1614,7 @@ wcstring_list_t complete_get_wrap_chain(const wcstring &command) {
const wrapper_map_t &wraps = wrap_map();
wcstring_list_t result;
std::unordered_set<wcstring> visited; // set of visited commands
std::unordered_set<wcstring, wcstring_hash> visited; // set of visited commands
wcstring_list_t to_visit(1, command); // stack of remaining-to-visit commands
wcstring target;

View File

@ -55,7 +55,6 @@
#include "sanity.h"
#include "screen.h"
#include "wutil.h" // IWYU pragma: keep
#include "xxhash64.h"
#define DEFAULT_TERM1 "ansi"
#define DEFAULT_TERM2 "dumb"
@ -330,9 +329,7 @@ struct const_string_set_comparer {
namespace std {
template<>
struct hash<const wchar_t *> {
size_t operator()(const wchar_t *p) const {
return XXHash64::hash(p, wcslen(p), 0);
}
size_t operator()(const wchar_t *p) const { return xxhash(p, wcslen(p)); }
};
template <>
struct equal_to<const wchar_t *> {

View File

@ -34,7 +34,7 @@ class env_universal_t {
// Keys that have been modified, and need to be written. A value here that is not present in
// vars indicates a deleted value.
std::unordered_set<wcstring> modified;
std::unordered_set<wcstring, wcstring_hash> modified;
// Path that we save to. If empty, use the default.
const wcstring explicit_vars_path;

View File

@ -67,7 +67,7 @@ static const wchar_t *const highlight_var[] = {L"fish_color_normal",
/// Returns:
/// false: the filesystem is not case insensitive
/// true: the file system is case insensitive
typedef std::unordered_map<wcstring, bool> case_sensitivity_cache_t;
typedef std::unordered_map<wcstring, bool, wcstring_hash> case_sensitivity_cache_t;
bool fs_is_case_insensitive(const wcstring &path, int fd,
case_sensitivity_cache_t &case_sensitivity_cache) {
bool result = false;
@ -146,7 +146,7 @@ bool is_potential_path(const wcstring &potential_path_fragment, const wcstring_l
// Don't test the same path multiple times, which can happen if the path is absolute and the
// CDPATH contains multiple entries.
std::unordered_set<wcstring> checked_paths;
std::unordered_set<wcstring, wcstring_hash> checked_paths;
// Keep a cache of which paths / filesystems are case sensitive.
case_sensitivity_cache_t case_sensitivity_cache;

View File

@ -139,7 +139,7 @@ class history_t {
uint32_t disable_automatic_save_counter;
// Deleted item contents.
std::unordered_set<wcstring> deleted_items;
std::unordered_set<wcstring, wcstring_hash> deleted_items;
// The mmaped region for the history file.
const char *mmap_start;

View File

@ -45,7 +45,7 @@ class lru_cache_t {
explicit lru_node_t(const CONTENTS &v) : value(std::move(v)) {}
};
typedef typename std::unordered_map<wcstring, lru_node_t>::iterator node_iter_t;
typedef typename std::unordered_map<wcstring, lru_node_t, wcstring_hash>::iterator node_iter_t;
// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is
// used from background threads.
@ -54,7 +54,7 @@ class lru_cache_t {
// All of our nodes
// Note that our linked list contains pointers to these nodes in the map
// We are dependent on the iterator-noninvalidation guarantees of std::map
std::unordered_map<wcstring, lru_node_t> node_map;
std::unordered_map<wcstring, lru_node_t, wcstring_hash> node_map;
// Head of the linked list
// The list is circular!

View File

@ -267,7 +267,7 @@ static void mangle_1_completion_description(wcstring *str) {
static void join_completions(comp_info_list_t *comps) {
// A map from description to index in the completion list of the element with that description.
// The indexes are stored +1.
std::unordered_map<wcstring, size_t> desc_table;
std::unordered_map<wcstring, size_t, wcstring_hash> desc_table;
// Note that we mutate the completion list as we go, so the size changes.
for (size_t i = 0; i < comps->size(); i++) {

View File

@ -203,7 +203,7 @@ size_t escape_code_length(const wchar_t *code);
class cached_esc_sequences_t {
private:
// Cached escape sequences we've already detected in the prompt and similar strings.
std::unordered_set<wcstring> cache;
std::unordered_set<wcstring, wcstring_hash> cache;
// The escape sequence lengths we've cached. My original implementation used min and max
// length variables. The cache was then iterated over using a loop like this:
// `for (size_t l = min; l <= max; l++)`.

View File

@ -439,7 +439,7 @@ class wildcard_expander_t {
// The working directory to resolve paths against
const wcstring working_directory;
// The set of items we have resolved, used to efficiently avoid duplication.
std::unordered_set<wcstring> completion_set;
std::unordered_set<wcstring, wcstring_hash> completion_set;
// The set of file IDs we have visited, used to avoid symlink loops.
std::unordered_set<file_id_t> visited_files;
// Flags controlling expansion.

View File

@ -38,7 +38,7 @@ const file_id_t kInvalidFileID = {(dev_t)-1LL, (ino_t)-1LL, (uint64_t)-1LL, -1,
#endif
/// Map used as cache by wgettext.
static owning_lock<std::unordered_map<wcstring, wcstring>> wgettext_map;
static owning_lock<std::unordered_map<wcstring, wcstring, wcstring_hash>> wgettext_map;
bool wreaddir_resolving(DIR *dir, const wcstring &dir_path, wcstring &out_name, bool *out_is_dir) {
struct dirent d;

View File

@ -145,13 +145,10 @@ struct file_id_t {
#ifndef HASH_FILE_ID
#define HASH_FILE_ID 1
#include "xxhash64.h"
namespace std {
template<>
struct hash<file_id_t> {
size_t operator()(const file_id_t &f) const {
return XXHash64::hash(&f, sizeof(f), 0);
}
size_t operator()(const file_id_t &f) const { return xxhash(&f, sizeof(f)); }
};
}
#endif