From a336fb62ff418141307eb59b593e0aa582609b2d Mon Sep 17 00:00:00 2001 From: Dale Weiler Date: Mon, 26 Nov 2012 00:21:16 +0000 Subject: [PATCH] fast optimized murmur hash with crc seeding, literally zero collision, and roughly only 52 instructions --- util.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 92 insertions(+), 10 deletions(-) diff --git a/util.c b/util.c index 6853393..49db707 100644 --- a/util.c +++ b/util.c @@ -525,21 +525,103 @@ typedef struct hash_node_t { struct hash_node_t *next; /* next node (linked list) */ } hash_node_t; - -size_t util_hthash(hash_table_t *ht, const char *key) { - uint64_t val = 0; - size_t len = strlen(key); - size_t itr = 0; +/* + * x86 and x86_64 optimized murmur hash functions for the hashtable + * we have individual implementations for optimal performance. + * + * Forced inlined as we wrap these up in the actual utility function + * below. These should be autovectorized by gcc. + */ +#ifdef __x86_64__ +GMQCC_INLINE uint32_t util_hthashfunc(hash_table_t *ht, const char *key, register size_t seed) { + const uint64_t mix = 0xC6A4A7935BD1E995; + const int rot = 47; + size_t size = strlen(key); + uint64_t hash = seed ^ (size - mix); + uint64_t alias = 0; + const uint64_t *beg = (const uint64_t*)key; + const uint64_t *end = beg + (size / 8); + const unsigned char *final = NULL; + + while (beg != end) { + alias = *beg++; + + alias *= mix; + alias ^= alias >> rot; + alias *= mix; + + hash ^= alias; + hash *= mix; + } + + final = (const unsigned char *)beg; + + switch (size & 7) { + case 7: hash ^= (uint64_t)(final[6]) << 48; + case 6: hash ^= (uint64_t)(final[5]) << 40; + case 5: hash ^= (uint64_t)(final[4]) << 32; + case 4: hash ^= (uint64_t)(final[3]) << 24; + case 3: hash ^= (uint64_t)(final[2]) << 16; + case 2: hash ^= (uint64_t)(final[1]) << 8; + case 1: hash ^= (uint64_t)(final[0]); + hash *= mix; + } + + hash ^= hash >> rot; + hash *= mix; + hash ^= hash >> rot; + + return (uint32_t)(hash % ht->size); +} +#else +GMQCC_INLINE uint32_t util_hthashfunc(hash_table_t *ht, const char *key, register size_t seed) { + const uint32_t mix = 0x5BD1E995; + const uint32_t rot = 24; + size_t size = strlen(key); + uint32_t hash = seed ^ size; + uint32_t alias = 0; + const unsigned char *data = (const unsigned char*)ket; - while (val < ((uint64_t)~1) /* MAX SIZE OF UINT64 */ && itr < len) { - val = val << 8; - val += key[itr]; + while (size >= 4) { + alias = *(uint32_t*)data; + + alias *= mix; + alias ^= alias >> rot; + alias *= mix; + + hash *= mix; + hash ^= alias; - itr++; + data += 4; + size -= 4; } - return val % ht->size; + switch (size) { + case 3: hash ^= data[2] << 16; + case 2: hash ^= data[1] << 8; + case 1: hash ^= data[0]; + hash *= mix; + } + + hash ^= hash >> 13; + hash *= mix; + hash ^= hash >> 15; + + return hash % ht->size; +} +#endif + +/* we use the crc table as seeds for the murmur hash :P */ +size_t util_hthash(hash_table_t *ht, const char *key) { + static size_t seed = 0; + register size_t hash = util_hthashfunc(ht, key, util_crc32_table[seed]); + + /* reset seed */ + if (seed >= sizeof(util_crc32_table) / sizeof(*util_crc32_table)) + seed = 0; + + return hash; } hash_node_t *_util_htnewpair(const char *key, void *value) { -- 2.39.2