2 * Copyright (C) 2012, 2013, 2014
5 * Permission is hereby granted, free of charge, to any person obtaining a copy of
6 * this software and associated documentation files (the "Software"), to deal in
7 * the Software without restriction, including without limitation the rights to
8 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is furnished to do
10 * so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 # define HASH_ROTL32(X, Y) _rotl((X), (Y))
29 #elif defined (__GNUC__) && (defined(__i386__) || defined(__amd64__))
30 static GMQCC_FORCEINLINE uint32_t hash_rotl32(volatile uint32_t x, int8_t r) {
31 __asm__ __volatile__ ("roll %1,%0" : "+r"(x) : "c"(r));
34 # define HASH_ROTL32(X, Y) hash_rotl32((volatile uint32_t)(X), (Y))
36 # define HASH_ROTL32(X, Y) (((X) << (Y)) | ((X) >> (32 - (Y))))
40 * This is a version of the Murmur3 hashing function optimized for various
41 * compilers/architectures. It uses the traditional Murmur2 mix staging
42 * but fixes the mix staging inner loops.
44 * Murmur 2 contains an inner loop such as:
57 * The two u32s that form the key are the same value for x
58 * this premix stage will perform the same results for both values. Unrolled
69 * This appears to be fine, except what happens when m == 1? well x
70 * cancels out entierly, leaving just:
75 * So all keys hash to the same value, but how often does m == 1?
76 * well, it turns out testing x for all possible values yeilds only
77 * 172,013,942 unique results instead of 2^32. So nearly ~4.6 bits
78 * are cancelled out on average!
80 * This means we have a 14.5% higher chance of collision. This is where
81 * Murmur3 comes in to save the day.
83 static GMQCC_FORCEINLINE uint32_t hash_murmur_mix32(uint32_t hash) {
93 * These constants were calculated with SMHasher to determine the best
94 * case senario for Murmur3:
95 * http://code.google.com/p/smhasher/
97 #define HASH_MURMUR_MASK1 0xCC9E2D51
98 #define HASH_MURMUR_MASK2 0x1B873593
99 #define HASH_MURMUR_SEED 0x9747B28C
101 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
102 # define HASH_MURMUR_SAFEREAD(PTR) (*((uint32_t*)(PTR)))
103 #elif PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
104 # if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR >= 3))
105 # define HASH_MURMUR_SAFEREAD(PTR) (__builtin_bswap32(*((uint32_t*)(PTR))))
108 /* Process individual bytes at this point since the endianess isn't known. */
109 #ifndef HASH_MURMUR_SAFEREAD
110 # define HASH_MURMUR_SAFEREAD(PTR) ((PTR)[0] | (PTR)[1] << 8 | (PTR)[2] << 16 | (PTR)[3] << 24)
113 #define HASH_MURMUR_BLOCK(H, K) \
115 K *= HASH_MURMUR_MASK1; \
116 K = HASH_ROTL32(K, 15); \
117 K *= HASH_MURMUR_MASK2; \
119 H = HASH_ROTL32(H, 13); \
120 H = H * 5 + 0xE6546B64; \
122 #define HASH_MURMUR_BYTES(COUNT, H, C, N, PTR, LENGTH) \
126 C = C >> 8 | *PTR++ << 24; \
130 HASH_MURMUR_BLOCK(H, C); \
135 #define HASH_MURMUR_TAIL(P, Z, H, C, N, PTR, LEN) \
138 HASH_MURMUR_BYTES(LEN, H, C, N, PTR, LEN); \
140 *Z = ((C) & ~0xFF) | (N); \
143 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
144 static GMQCC_FORCEINLINE void hash_murmur_process(uint32_t *ph1, uint32_t *carry, const void *key, int length) {
148 const uint8_t *ptr = (uint8_t*)key;
152 int it = (4 - n) & 3;
153 if (it && it <= length)
154 HASH_MURMUR_BYTES(it, h1, c, n, ptr, length);
156 end = ptr + length/4*4;
157 for (; ptr < end; ptr += 4) {
158 uint32_t k1 = HASH_MURMUR_SAFEREAD(ptr);
159 HASH_MURMUR_BLOCK(h1, k1);
161 HASH_MURMUR_TAIL(ph1, carry, h1, c, n, ptr, length);
164 static GMQCC_FORCEINLINE void hash_murmur_process(uint32_t *ph1, uint32_t *carry, const void *key, int length) {
169 const uint8_t *ptr = (uint8_t*)key;
173 int it = -(long)ptr & 3;
174 if (it && it <= length)
175 HASH_MURMUR_BYTES(it, h1, c, n, ptr, length);
177 end = ptr + length / 4 * 4;
180 for (; ptr < end; ptr += 4) {
181 k1 = HASH_MURMUR_SAFEREAD(ptr);
182 HASH_MURMUR_BLOCK(h1, k1);
186 # define NEXT(N, RIGHT, LEFT) \
188 for (; ptr < end; ptr += 4) { \
190 c = HASH_MURMUR_SAFEREAD(ptr); \
192 HASH_MURMUR_BLOCK(h1, k1); \
200 HASH_MURMUR_TAIL(ph1, carry, h1, c, n, ptr, length);
204 static GMQCC_FORCEINLINE uint32_t hash_murmur_result(uint32_t hash, uint32_t carry, size_t length) {
207 if (GMQCC_LIKELY(n)) {
208 k1 = carry >> (4 - n) * 8;
209 k1 *= HASH_MURMUR_MASK1;
210 k1 = HASH_ROTL32(k1, 15);
211 k1 *= HASH_MURMUR_MASK2;
215 hash = hash_murmur_mix32(hash);
220 static GMQCC_FORCEINLINE uint32_t hash_murmur(const void *GMQCC_RESTRICT key, size_t length) {
221 uint32_t hash = HASH_MURMUR_SEED;
223 hash_murmur_process(&hash, &carry, key, length);
224 return hash_murmur_result(hash, carry, length);
227 size_t hash(const char *key) {
233 for (; (uintptr_t)s % sizeof(size_t); s++)
235 return hash_murmur((const void *)key, s-a);
237 for (w = (const size_t*)s; !((*w-(size_t)-1/UCHAR_MAX) & ~*w & ((size_t)-1/UCHAR_MAX) * (UCHAR_MAX / 2 + 1)); w++);
238 for (s = (const char *)w; *s; s++);
240 return hash_murmur((const void *)key, s-a);
242 return hash_murmur((const void *)key, strlen(key));
247 #undef HASH_MURMUR_MASK1
248 #undef HASH_MURMUR_MASK2
249 #undef HASH_MURMUR_SEED
250 #undef HASH_MURMUR_SAFEREAD
251 #undef HASH_MURMUR_BLOCK
252 #undef HASH_MURMUR_BYTES
253 #undef HASH_MURMUR_TAIL