mirror of
https://github.com/trapexit/mergerfs.git
synced 2025-01-22 07:59:30 +08:00
change from fasthash64 to wyhash
wyhash has very good performance for small keys (our usecase) and works on platforms with alignment concerns. A user had an issue where fasthash64 lead to misaligned reads and signal exceptions. wyhash does not have such issues and is faster.
This commit is contained in:
parent
58a8f8326d
commit
0371b047f9
|
@ -1,84 +0,0 @@
|
|||
/* The MIT License
|
||||
|
||||
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use, copy,
|
||||
modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "fasthash.h"
|
||||
|
||||
// Compression function for Merkle-Damgard construction.
|
||||
// This function is generated using the framework provided.
|
||||
#define mix(h) ({ \
|
||||
(h) ^= (h) >> 23; \
|
||||
(h) *= 0x2127599bf4325c37ULL; \
|
||||
(h) ^= (h) >> 47; })
|
||||
|
||||
uint64_t
|
||||
fasthash64(const void *buf,
|
||||
size_t len,
|
||||
uint64_t seed)
|
||||
{
|
||||
const uint64_t m = 0x880355f21e6d1965ULL;
|
||||
const uint64_t *pos = (const uint64_t *)buf;
|
||||
const uint64_t *end = pos + (len / 8);
|
||||
const unsigned char *pos2;
|
||||
uint64_t h = seed ^ (len * m);
|
||||
uint64_t v;
|
||||
|
||||
while (pos != end) {
|
||||
v = *pos++;
|
||||
h ^= mix(v);
|
||||
h *= m;
|
||||
}
|
||||
|
||||
pos2 = (const unsigned char*)pos;
|
||||
v = 0;
|
||||
|
||||
switch (len & 7) {
|
||||
case 7: v ^= (uint64_t)pos2[6] << 48;
|
||||
case 6: v ^= (uint64_t)pos2[5] << 40;
|
||||
case 5: v ^= (uint64_t)pos2[4] << 32;
|
||||
case 4: v ^= (uint64_t)pos2[3] << 24;
|
||||
case 3: v ^= (uint64_t)pos2[2] << 16;
|
||||
case 2: v ^= (uint64_t)pos2[1] << 8;
|
||||
case 1: v ^= (uint64_t)pos2[0];
|
||||
h ^= mix(v);
|
||||
h *= m;
|
||||
}
|
||||
|
||||
return mix(h);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
fasthash32(const void *buf,
|
||||
size_t len,
|
||||
uint32_t seed)
|
||||
{
|
||||
// the following trick converts the 64-bit hashcode to Fermat
|
||||
// residue, which shall retain information from both the higher
|
||||
// and lower parts of hashcode.
|
||||
uint64_t h;
|
||||
|
||||
h = fasthash64(buf, len, seed);
|
||||
|
||||
return (h - (h >> 32));
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
/* The MIT License
|
||||
|
||||
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use, copy,
|
||||
modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _FASTHASH_H
|
||||
#define _FASTHASH_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* fasthash32 - 32-bit implementation of fasthash
|
||||
* @buf: data buffer
|
||||
* @len: data size
|
||||
* @seed: the seed
|
||||
*/
|
||||
uint32_t fasthash32(const void *buf, size_t len, uint32_t seed);
|
||||
|
||||
/**
|
||||
* fasthash64 - 64-bit implementation of fasthash
|
||||
* @buf: data buffer
|
||||
* @len: data size
|
||||
* @seed: the seed
|
||||
*/
|
||||
uint64_t fasthash64(const void *buf, size_t len, uint64_t seed);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -18,8 +18,8 @@
|
|||
|
||||
#include "ef.hpp"
|
||||
#include "errno.hpp"
|
||||
#include "fasthash.h"
|
||||
#include "fs_inode.hpp"
|
||||
#include "wyhash.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
|
@ -61,9 +61,10 @@ path_hash(const char *fusepath_,
|
|||
const dev_t dev_,
|
||||
const ino_t ino_)
|
||||
{
|
||||
return fasthash64(fusepath_,
|
||||
fusepath_len_,
|
||||
fs::inode::MAGIC);
|
||||
return wyhash(fusepath_,
|
||||
fusepath_len_,
|
||||
fs::inode::MAGIC,
|
||||
_wyp);
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -98,9 +99,10 @@ devino_hash(const char *fusepath_,
|
|||
buf[0] = dev_;
|
||||
buf[1] = ino_;
|
||||
|
||||
return fasthash64((void*)&buf[0],
|
||||
sizeof(buf),
|
||||
fs::inode::MAGIC);
|
||||
return wyhash((void*)&buf[0],
|
||||
sizeof(buf),
|
||||
fs::inode::MAGIC,
|
||||
_wyp);
|
||||
}
|
||||
|
||||
static
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "khash.h"
|
||||
#include "fasthash.h"
|
||||
#include "wyhash.h"
|
||||
|
||||
KHASH_SET_INIT_INT64(hashset);
|
||||
|
||||
|
@ -45,7 +45,7 @@ public:
|
|||
uint64_t h;
|
||||
khint_t key;
|
||||
|
||||
h = fasthash64(str_,len_,0x7472617065786974);
|
||||
h = wyhash(str_,len_,0x7472617065786974,_wyp);
|
||||
|
||||
key = kh_put(hashset,_set,h,&rv);
|
||||
if(rv == 0)
|
||||
|
|
140
src/wyhash.h
Normal file
140
src/wyhash.h
Normal file
|
@ -0,0 +1,140 @@
|
|||
//Author: Wang Yi <godspeed_china@yeah.net>
|
||||
#ifndef wyhash_final_version
|
||||
#define wyhash_final_version
|
||||
//defines that change behavior
|
||||
#ifndef WYHASH_CONDOM
|
||||
#define WYHASH_CONDOM 0 //0,1,2
|
||||
#endif
|
||||
#define WYHASH_32BIT_MUM 0 //faster on 32 bit system
|
||||
//includes
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#if defined(_MSC_VER) && defined(_M_X64)
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(_umul128)
|
||||
#endif
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
|
||||
#define _likely_(x) __builtin_expect(x,1)
|
||||
#define _unlikely_(x) __builtin_expect(x,0)
|
||||
#else
|
||||
#define _likely_(x) (x)
|
||||
#define _unlikely_(x) (x)
|
||||
#endif
|
||||
//mum function
|
||||
static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
|
||||
static inline void _wymum(uint64_t *A, uint64_t *B){
|
||||
#if(WYHASH_32BIT_MUM)
|
||||
uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
|
||||
#if(WYHASH_CONDOM>1)
|
||||
*A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
|
||||
#else
|
||||
*A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
|
||||
#endif
|
||||
#elif defined(__SIZEOF_INT128__)
|
||||
__uint128_t r=*A; r*=*B;
|
||||
#if(WYHASH_CONDOM>1)
|
||||
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
|
||||
#else
|
||||
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
|
||||
#endif
|
||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||
#if(WYHASH_CONDOM>1)
|
||||
uint64_t a, b;
|
||||
a=_umul128(*A,*B,&b);
|
||||
*A^=a; *B^=b;
|
||||
#else
|
||||
*A=_umul128(*A,*B,B);
|
||||
#endif
|
||||
#else
|
||||
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
|
||||
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
|
||||
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
|
||||
#if(WYHASH_CONDOM>1)
|
||||
*A^=lo; *B^=hi;
|
||||
#else
|
||||
*A=lo; *B=hi;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
|
||||
//read functions
|
||||
#ifndef WYHASH_LITTLE_ENDIAN
|
||||
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#define WYHASH_LITTLE_ENDIAN 1
|
||||
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#define WYHASH_LITTLE_ENDIAN 0
|
||||
#endif
|
||||
#endif
|
||||
#if (WYHASH_LITTLE_ENDIAN)
|
||||
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
|
||||
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
|
||||
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
|
||||
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
|
||||
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
|
||||
#elif defined(_MSC_VER)
|
||||
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
|
||||
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
|
||||
#endif
|
||||
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
|
||||
//wyhash function
|
||||
static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
|
||||
#if(WYHASH_CONDOM>0)
|
||||
uint64_t a, b;
|
||||
if(_likely_(i<=8)){
|
||||
if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
|
||||
else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
|
||||
else a=b=0;
|
||||
}
|
||||
else{ a=_wyr8(p); b=_wyr8(p+i-8); }
|
||||
return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
|
||||
#else
|
||||
#define oneshot_shift ((i<8)*((8-i)<<3))
|
||||
return _wymix(secret[1]^len,_wymix((_wyr8(p)<<oneshot_shift)^secret[1],(_wyr8(p+i-8)>>oneshot_shift)^seed));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
|
||||
if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
|
||||
return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
|
||||
}
|
||||
|
||||
static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
|
||||
const uint8_t *p=(const uint8_t *)key;
|
||||
uint64_t i=len; seed^=*secret;
|
||||
if(_unlikely_(i>64)){
|
||||
uint64_t see1=seed;
|
||||
do{
|
||||
seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
|
||||
see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
|
||||
p+=64; i-=64;
|
||||
}while(i>64);
|
||||
seed^=see1;
|
||||
}
|
||||
return _wyfinish(p,len,seed,secret,i);
|
||||
}
|
||||
//utility functions
|
||||
const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
|
||||
static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
|
||||
static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
|
||||
static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
|
||||
static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
|
||||
static inline void make_secret(uint64_t seed, uint64_t *secret){
|
||||
uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
|
||||
for(size_t i=0;i<5;i++){
|
||||
uint8_t ok;
|
||||
do{
|
||||
ok=1; secret[i]=0;
|
||||
for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
|
||||
if(secret[i]%2==0){ ok=0; continue; }
|
||||
for(size_t j=0;j<i;j++)
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
|
||||
if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
|
||||
#elif defined(_MSC_VER)
|
||||
if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
|
||||
#endif
|
||||
if(!ok)continue;
|
||||
for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; }
|
||||
}while(!ok);
|
||||
}
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user