2 * Copyright (C) 2012, 2013, 2014
5 * Permission is hereby granted, free of charge, to any person obtaining a copy of
6 * this software and associated documentation files (the "Software"), to deal in
7 * the Software without restriction, including without limitation the rights to
8 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is furnished to do
10 * so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 # define HASH_ROTL32(X, Y) _rotl((X), (Y))
28 #elif defined (__GNUC__) && (defined(__i386__) || defined(__amd64__))
29 static GMQCC_FORCEINLINE uint32_t hash_rotl32(volatile uint32_t x, int8_t r) {
30 __asm__ __volatile__ ("roll %1,%0" : "+r"(x) : "c"(r));
33 # define HASH_ROTL32(X, Y) hash_rotl32((volatile uint32_t)(X), (Y))
35 # define HASH_ROTL32(X, Y) (((X) << (Y)) | ((X) >> (32 - (Y))))
39 * This is a version of the Murmur3 hashing function optimized for various
40 * compilers/architectures. It uses the traditional Murmur2 mix staging
41 * but fixes the mix staging inner loops.
43 * Murmur 2 contains an inner loop such as:
56 * The two u32s that form the key are the same value for x
57 * this premix stage will perform the same results for both values. Unrolled
68 * This appears to be fine, except what happens when m == 1? well x
69 * cancels out entierly, leaving just:
74 * So all keys hash to the same value, but how often does m == 1?
75 * well, it turns out testing x for all possible values yeilds only
76 * 172,013,942 unique results instead of 2^32. So nearly ~4.6 bits
77 * are cancelled out on average!
79 * This means we have a 14.5% higher chance of collision. This is where
80 * Murmur3 comes in to save the day.
82 static GMQCC_FORCEINLINE uint32_t hash_murmur_mix32(uint32_t hash) {
92 * These constants were calculated with SMHasher to determine the best
93 * case senario for Murmur3:
94 * http://code.google.com/p/smhasher/
96 #define HASH_MURMUR_MASK1 0xCC9E2D51
97 #define HASH_MURMUR_MASK2 0x1B873593
98 #define HASH_MURMUR_SEED 0x9747B28C
100 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
101 # define HASH_MURMUR_SAFEREAD(PTR) (*((uint32_t*)(PTR)))
102 #elif PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
103 # if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR >= 3))
104 # define HASH_MURMUR_SAFEREAD(PTR) (__builtin_bswap32(*((uint32_t*)(PTR))))
107 /* Process individual bytes at this point since the endianess isn't known. */
108 #ifndef HASH_MURMUR_SAFEREAD
109 # define HASH_MURMUR_SAFEREAD(PTR) ((PTR)[0] | (PTR)[1] << 8 | (PTR)[2] << 16 | (PTR)[3] << 24)
112 #define HASH_MURMUR_BLOCK(H, K) \
114 K *= HASH_MURMUR_MASK1; \
115 K = HASH_ROTL32(K, 15); \
116 K *= HASH_MURMUR_MASK2; \
118 H = HASH_ROTL32(H, 13); \
119 H = H * 5 + 0xE6546B64; \
121 #define HASH_MURMUR_BYTES(COUNT, H, C, N, PTR, LENGTH) \
125 C = C >> 8 | *PTR++ << 24; \
129 HASH_MURMUR_BLOCK(H, C); \
134 #define HASH_MURMUR_TAIL(P, Z, H, C, N, PTR, LEN) \
137 HASH_MURMUR_BYTES(LEN, H, C, N, PTR, LEN); \
139 *Z = ((C) & ~0xFF) | (N); \
142 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
143 static GMQCC_FORCEINLINE void hash_murmur_process(uint32_t *ph1, uint32_t *carry, const void *key, int length) {
147 const uint8_t *ptr = (uint8_t*)key;
151 int it = (4 - n) & 3;
152 if (it && it <= length)
153 HASH_MURMUR_BYTES(it, h1, c, n, ptr, length);
155 end = ptr + length/4*4;
156 for (; ptr < end; ptr += 4) {
157 uint32_t k1 = HASH_MURMUR_SAFEREAD(ptr);
158 HASH_MURMUR_BLOCK(h1, k1);
160 HASH_MURMUR_TAIL(ph1, carry, h1, c, n, ptr, length);
163 static GMQCC_FORCEINLINE void hash_murmur_process(uint32_t *ph1, uint32_t *carry, const void *key, int length) {
168 const uint8_t *ptr = (uint8_t*)key;
172 int it = -(long)ptr & 3;
173 if (it && it <= length)
174 HASH_MURMUR_BYTES(it, h1, c, n, ptr, length);
176 end = ptr + length / 4 * 4;
179 for (; ptr < end; ptr += 4) {
180 k1 = HASH_MURMUR_SAFEREAD(ptr);
181 HASH_MURMUR_BLOCK(h1, k1);
185 # define NEXT(N, RIGHT, LEFT) \
187 for (; ptr < end; ptr += 4) { \
189 c = HASH_MURMUR_SAFEREAD(ptr); \
191 HASH_MURMUR_BLOCK(h1, k1); \
199 HASH_MURMUR_TAIL(ph1, carry, h1, c, n, ptr, length);
203 static GMQCC_FORCEINLINE uint32_t hash_murmur_result(uint32_t hash, uint32_t carry, size_t length) {
206 if (GMQCC_LIKELY(n)) {
207 k1 = carry >> (4 - n) * 8;
208 k1 *= HASH_MURMUR_MASK1;
209 k1 = HASH_ROTL32(k1, 15);
210 k1 *= HASH_MURMUR_MASK2;
214 hash = hash_murmur_mix32(hash);
219 static GMQCC_FORCEINLINE uint32_t hash_murmur(const void *GMQCC_RESTRICT key, size_t length) {
220 uint32_t hash = HASH_MURMUR_SEED;
222 hash_murmur_process(&hash, &carry, key, length);
223 return hash_murmur_result(hash, carry, length);
227 * The following hash function implements it's own strlen to avoid using libc's
228 * which isn't always slow but isn't always fastest either.
230 * Some things to note about this strlen that are otherwise confusing to grasp
231 * at first is that it does intentionally depend on undefined behavior.
233 * The first step to the strlen is to ensure alignment before checking words,
234 * without this step we risk crossing a page boundry with the word check and
235 * that would cause a crash.
237 * The second step to the strlen contains intentional undefined behavior. When
238 * accessing a word of any size, the first byte of that word is accessible if
239 * and only if the whole word is accessible because words are aligned. This is
240 * indicated by the fact that size / alignment always divides the page size.
241 * One could argue that an architecture exists where size_t and alignment are
242 * different, if that were the case, the alignment will always assume to be the
243 * size of the type (size_t). So it's always safe in that regard.
245 * In other words, an aligned 2^n load cannot cross a page boundry unless
246 * n > log2(PAGE_SIZE). There are no known architectures which support such
247 * a wide load larger than PAGE_SIZE.
249 * Valgrind and address sanatizer may choke on this because they're strictly
250 * trying to find bugs, it's a false positive to assume this is a bug when it's
251 * intentional. To prevent these false positives, both things need to be taught
252 * about the intentional behavior; for address sanatizer this can be done with
253 * a compiler attribute, effectively preventing the function from being
254 * instrumented. Valgrind requires a little more work as there is no way to
255 * downright prevent a function from being instrumented, instead we can mark
256 * + sizeof(size_t) bytes ahead of each byte we're reading as we calculate
257 * the length of the string, then we can make that additional + sizeof(size_t)
258 * on the end undefined after the length has been calculated.
260 * If the compiler doesn't have the attribute to disable address sanatizer
261 * instrumentation we fall back to using libc's strlen instead. This isn't the
262 * best solution. On windows we can assume this method always because neither
263 * address sanatizer or valgrind exist.
266 /* Some compilers expose this */
267 #if defined(__has_feature)
268 # if __has_feature(address_sanitizer)
269 # define ASAN_DISABLE __attribute__((no_sanitize_address))
270 # define HAS_ASAN_DISABLE
274 /* If they don't try to find by version the attriubte was introduces */
275 #if defined(__GNUC__) && __GNUC__ >= 4
276 # define ASAN_DISABLE __attribute__((no_sanitize_address))
277 # define HAS_ASAN_DISABLE
278 #elif defined(__clang__) && __clang_major__ >= 3
279 # define ASAN_DISABLE __attribute__((no_sanatize_address))
280 # define HAS_ASAN_DISABLE
281 /* On windows asan doesn't exist */
282 #elif defined(_WIN32)
283 # define ASAN_DISABLE /* nothing */
284 # define HAS_ASAN_DISABLE
287 #ifndef HAS_ASAN_DISABLE
292 # include <valgrind/valgrind.h>
293 # include <valgrind/memcheck.h>
295 # define VALGRIND_MAKE_MEM_DEFINED(PTR, REDZONE_SIZE)
296 # define VALGRIND_MAKE_MEM_NOACCESS(PTR, REDZONE_SIZE)
299 #ifdef HAS_ASAN_DISABLE
300 #define STRLEN_ALIGN (sizeof(size_t))
301 #define STRLEN_ONES ((size_t)-1/UCHAR_MAX)
302 #define STRLEN_HIGHS (STRLEN_ONES * (UCHAR_MAX/2+1))
303 #define STRLEN_HASZERO(X) (((X)-STRLEN_ONES) & ~(X) & STRLEN_HIGHS)
304 ASAN_DISABLE size_t hash(const char *key) {
309 for (; (uintptr_t)s % STRLEN_ALIGN; s++) {
311 return hash_murmur((const void *)key, s-a);
314 VALGRIND_MAKE_MEM_DEFINED(s, STRLEN_ALIGN);
315 for (w = (const void *)s; !STRLEN_HASZERO(*w); w++) {
316 /* Make the next word legal to access */
317 VALGRIND_MAKE_MEM_DEFINED(w + STRLEN_ALIGN, STRLEN_ALIGN);
320 for (s = (const void *)w; *s; s++);
322 /* It's not legal to access this area anymore */
323 VALGRIND_MAKE_MEM_NOACCESS(s + 1, STRLEN_ALIGN);
325 return hash_murmur((const void *)key, s-a);
328 /* No way to disable asan instrumenting, use strlen. */
329 size_t hash(const char *key) {
330 return hash_murmur((const void *)key, strlen(key));
332 #endif /*! HAS_ASAN_DISABLE */