2 Copyright (C) 2001-2006, William Joseph.
5 This file is part of GtkRadiant.
7 GtkRadiant is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 GtkRadiant is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GtkRadiant; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #if !defined( INCLUDED_CONTAINER_HASHFUNC_H )
23 #define INCLUDED_CONTAINER_HASHFUNC_H
26 #include "string/string.h"
27 #include "container/array.h"
28 typedef unsigned long int ub4; /* unsigned 4-byte quantities */
29 typedef unsigned char ub1;
31 inline ub1 ub1_as_ub1_nocase( ub1 byte ){
32 return std::tolower( byte );
35 inline ub4 ub1x4_as_ub4_nocase( const ub1 bytes[4] ){
37 reinterpret_cast<ub1*>( &result )[0] = ub1_as_ub1_nocase( bytes[0] );
38 reinterpret_cast<ub1*>( &result )[1] = ub1_as_ub1_nocase( bytes[1] );
39 reinterpret_cast<ub1*>( &result )[2] = ub1_as_ub1_nocase( bytes[2] );
40 reinterpret_cast<ub1*>( &result )[3] = ub1_as_ub1_nocase( bytes[3] );
44 class ub1_default_traits
47 static ub1 as_ub1( ub1 byte ){
52 class ub1_nocase_traits
55 static ub1 as_ub1( ub1 byte ){
56 return ub1_as_ub1_nocase( byte );
60 class ub1x4_default_traits
63 static ub4 as_ub4( const ub1 bytes[4] ){
64 return *reinterpret_cast<const ub4*>( bytes );
68 class ub1x4_nocase_traits
71 static ub4 as_ub4( const ub1 bytes[4] ){
72 return ub1x4_as_ub4_nocase( bytes );
76 class ub4_default_traits
79 static ub4 as_ub4( ub4 i ){
84 class ub4_nocase_traits
87 static ub4 as_ub4( ub4 i ){
88 return ub1x4_as_ub4_nocase( reinterpret_cast<const ub1*>( &i ) );
93 // By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
94 // code any way you wish, private, educational, or commercial. It's free.
96 #define hashsize( n ) ( (ub4)1 << ( n ) )
97 #define hashmask( n ) ( hashsize( n ) - 1 )
100 --------------------------------------------------------------------
101 mix -- mix 3 32-bit values reversibly.
102 For every delta with one or two bit set, and the deltas of all three
103 high bits or all three low bits, whether the original value of a,b,c
104 is almost all zero or is uniformly distributed,
105 * If mix() is run forward or backward, at least 32 bits in a,b,c
106 have at least 1/4 probability of changing.
107 * If mix() is run forward, every bit of c will change between 1/3 and
108 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
109 mix() was built out of 36 single-cycle latency instructions in a
110 structure that could supported 2x parallelism, like so:
118 Unfortunately, superscalar Pentiums and Sparcs can't take advantage
119 of that parallelism. They've also turned some of those single-cycle
120 latency instructions into multi-cycle latency instructions. Still,
121 this is the fastest good hash I could find. There were about 2^^68
122 to choose from. I only looked at a billion or so.
123 --------------------------------------------------------------------
125 #define mix( a,b,c ) \
127 a -= b; a -= c; a ^= ( c >> 13 ); \
128 b -= c; b -= a; b ^= ( a << 8 ); \
129 c -= a; c -= b; c ^= ( b >> 13 ); \
130 a -= b; a -= c; a ^= ( c >> 12 ); \
131 b -= c; b -= a; b ^= ( a << 16 ); \
132 c -= a; c -= b; c ^= ( b >> 5 ); \
133 a -= b; a -= c; a ^= ( c >> 3 ); \
134 b -= c; b -= a; b ^= ( a << 10 ); \
135 c -= a; c -= b; c ^= ( b >> 15 ); \
138 /* same, but slower, works on systems that might have 8 byte ub4's */
139 #define mix2( a,b,c ) \
141 a -= b; a -= c; a ^= ( c >> 13 ); \
142 b -= c; b -= a; b ^= ( a << 8 ); \
143 c -= a; c -= b; c ^= ( ( b & 0xffffffff ) >> 13 ); \
144 a -= b; a -= c; a ^= ( ( c & 0xffffffff ) >> 12 ); \
145 b -= c; b -= a; b = ( b ^ ( a << 16 ) ) & 0xffffffff; \
146 c -= a; c -= b; c = ( c ^ ( b >> 5 ) ) & 0xffffffff; \
147 a -= b; a -= c; a = ( a ^ ( c >> 3 ) ) & 0xffffffff; \
148 b -= c; b -= a; b = ( b ^ ( a << 10 ) ) & 0xffffffff; \
149 c -= a; c -= b; c = ( c ^ ( b >> 15 ) ) & 0xffffffff; \
153 --------------------------------------------------------------------
154 hash() -- hash a variable-length key into a 32-bit value
155 k : the key (the unaligned variable-length array of bytes)
156 len : the length of the key, counting by bytes
157 level : can be any 4-byte value
158 Returns a 32-bit value. Every bit of the key affects every bit of
159 the return value. Every 1-bit and 2-bit delta achieves avalanche.
160 About 36+6len instructions.
162 The best hash table sizes are powers of 2. There is no need to do
163 mod a prime (mod is sooo slow!). If you need less than 32 bits,
164 use a bitmask. For example, if you need only 10 bits, do
165 h = (h & hashmask(10));
166 In which case, the hash table should have hashsize(10) elements.
168 If you are hashing n strings (ub1 **)k, do it like this:
169 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
171 See http://burlteburtle.net/bob/hash/evahash.html
172 Use for hash table lookup, or anything where one collision in 2^32 is
173 acceptable. Do NOT use for cryptographic purposes.
174 --------------------------------------------------------------------
177 template<typename UB1Traits, typename UB4x1Traits>
179 const ub1 *k, /* the key */
180 ub4 length, /* the length of the key */
181 ub4 initval, /* the previous hash, or an arbitrary value */
182 const UB1Traits& ub1traits,
183 const UB4x1Traits& ub4x1traits
187 /* Set up the internal state */
189 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
190 c = initval; /* the previous hash value */
192 /*---------------------------------------- handle most of the key */
195 a += ( k[0] + ( ( ub4 ) UB1Traits::as_ub1( k[1] ) << 8 ) + ( ( ub4 ) UB1Traits::as_ub1( k[2] ) << 16 ) + ( ( ub4 ) UB1Traits::as_ub1( k[3] ) << 24 ) );
196 b += ( k[4] + ( ( ub4 ) UB1Traits::as_ub1( k[5] ) << 8 ) + ( ( ub4 ) UB1Traits::as_ub1( k[6] ) << 16 ) + ( ( ub4 ) UB1Traits::as_ub1( k[7] ) << 24 ) );
197 c += ( k[8] + ( ( ub4 ) UB1Traits::as_ub1( k[9] ) << 8 ) + ( ( ub4 ) UB1Traits::as_ub1( k[10] ) << 16 ) + ( ( ub4 ) UB1Traits::as_ub1( k[11] ) << 24 ) );
202 /*------------------------------------- handle the last 11 bytes */
204 switch ( len ) /* all the case statements fall through */
206 #if defined(__GNUC__) && __GNUC__ < 7
207 case 11: c += ( ( ub4 ) UB1Traits::as_ub1( k[10] ) << 24 ); /* fall through */
208 case 10: c += ( ( ub4 ) UB1Traits::as_ub1( k[9] ) << 16 ); /* fall through */
209 case 9: c += ( ( ub4 ) UB1Traits::as_ub1( k[8] ) << 8 ); /* fall through */
210 /* the first byte of c is reserved for the length */
211 case 8: b += ( ( ub4 ) UB1Traits::as_ub1( k[7] ) << 24 ); /* fall through */
212 case 7: b += ( ( ub4 ) UB1Traits::as_ub1( k[6] ) << 16 ); /* fall through */
213 case 6: b += ( ( ub4 ) UB1Traits::as_ub1( k[5] ) << 8 ); /* fall through */
214 case 5: b += UB1Traits::as_ub1( k[4] ); /* fall through */
215 case 4: a += ( ( ub4 ) UB1Traits::as_ub1( k[3] ) << 24 ); /* fall through */
216 case 3: a += ( ( ub4 ) UB1Traits::as_ub1( k[2] ) << 16 ); /* fall through */
217 case 2: a += ( ( ub4 ) UB1Traits::as_ub1( k[1] ) << 8 ); /* fall through */
218 case 1: a += UB1Traits::as_ub1( k[0] );
220 case 11: c += ( ( ub4 ) UB1Traits::as_ub1( k[10] ) << 24 ); __attribute((fallthrough));
221 case 10: c += ( ( ub4 ) UB1Traits::as_ub1( k[9] ) << 16 ); __attribute((fallthrough));
222 case 9: c += ( ( ub4 ) UB1Traits::as_ub1( k[8] ) << 8 ); __attribute((fallthrough));
223 /* the first byte of c is reserved for the length */
224 case 8: b += ( ( ub4 ) UB1Traits::as_ub1( k[7] ) << 24 ); __attribute((fallthrough));
225 case 7: b += ( ( ub4 ) UB1Traits::as_ub1( k[6] ) << 16 ); __attribute((fallthrough));
226 case 6: b += ( ( ub4 ) UB1Traits::as_ub1( k[5] ) << 8 ); __attribute((fallthrough));
227 case 5: b += UB1Traits::as_ub1( k[4] ); __attribute((fallthrough));
228 case 4: a += ( ( ub4 ) UB1Traits::as_ub1( k[3] ) << 24 ); __attribute((fallthrough));
229 case 3: a += ( ( ub4 ) UB1Traits::as_ub1( k[2] ) << 16 ); __attribute((fallthrough));
230 case 2: a += ( ( ub4 ) UB1Traits::as_ub1( k[1] ) << 8 ); __attribute((fallthrough));
231 case 1: a += UB1Traits::as_ub1( k[0] );
233 /* case 0: nothing left to add */
236 /*-------------------------------------------- report the result */
241 --------------------------------------------------------------------
242 This works on all machines. hash2() is identical to hash() on
243 little-endian machines, except that the length has to be measured
244 in ub4s instead of bytes. It is much faster than hash(). It
246 -- that the key be an array of ub4's, and
247 -- that all your machines have the same endianness, and
248 -- that the length be the number of ub4's in the key
249 --------------------------------------------------------------------
251 template<typename UB4Traits>
253 const ub4 *k, /* the key */
254 ub4 length, /* the length of the key, in ub4s */
255 ub4 initval, /* the previous hash, or an arbitrary value */
256 const UB4Traits& ub4traits
260 /* Set up the internal state */
262 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
263 c = initval; /* the previous hash value */
265 /*---------------------------------------- handle most of the key */
268 a += UB4Traits::as_ub4( k[0] );
269 b += UB4Traits::as_ub4( k[1] );
270 c += UB4Traits::as_ub4( k[2] );
275 /*-------------------------------------- handle the last 2 ub4's */
277 switch ( len ) /* all the case statements fall through */
279 /* c is reserved for the length */
280 case 2: b += UB4Traits::as_ub4( k[1] );
281 case 1: a += UB4Traits::as_ub4( k[0] );
282 /* case 0: nothing left to add */
285 /*-------------------------------------------- report the result */
291 inline hash_t hash_ub1( const ub1* key, std::size_t len, hash_t previous = 0 ){
292 return hash( key, ub4( len ), previous, ub1_default_traits(), ub1x4_default_traits() );
295 inline hash_t hash_ub1_nocase( const ub1* key, std::size_t len, hash_t previous = 0 ){
296 return hash( key, ub4( len ), previous, ub1_nocase_traits(), ub1x4_nocase_traits() );
299 template<typename UB4Traits>
300 inline hash_t hash_ub4( const ub4* key, std::size_t len, const UB4Traits& traits, hash_t previous = 0 ){
301 return hash2( key,ub4( len ), previous, traits );
304 inline ub4 hash_combine( ub4 left, ub4 right ){
305 return hash_ub1( reinterpret_cast<const ub1*>( &left ), 4, right );
308 template<typename POD>
309 inline hash_t pod_hash( const POD& pod ){
310 return hash_ub1( reinterpret_cast<const ub1*>( &pod ), sizeof( POD ) );
313 inline hash_t string_hash( const char* string, hash_t previous = 0 ){
314 return hash_ub1( reinterpret_cast<const ub1*>( string ), string_length( string ), previous );
317 inline hash_t string_hash_nocase( const char* string, hash_t previous = 0 ){
318 return hash_ub1_nocase( reinterpret_cast<const ub1*>( string ), string_length( string ), previous );
323 typedef hash_t hash_type;
324 hash_type operator()( const char* string ) const {
325 return string_hash( string );
331 typedef hash_t hash_type;
332 hash_type operator()( const CopiedString& string ) const {
333 return string_hash( string.c_str() );
337 struct HashStringNoCase
339 typedef hash_t hash_type;
340 hash_type operator()( const CopiedString& string ) const {
341 return string_hash_nocase( string.c_str() );
345 /// \brief Length of a string in ub4.
346 /// "wibble" (6) gives 2,
347 /// "and" (3) gives 1,
348 /// "bleh" (4) gives 2
349 inline std::size_t string_length_ub4( const char* string ){
350 return ( ( string_length( string ) >> 2 ) + 1 ) << 2;
353 /// \brief Hashable key type that stores a string as an array of ub4 - making hashing faster.
354 /// Also caches the 32-bit result of the hash to speed up comparison of keys.
355 template<typename UB4Traits = ub4_default_traits>
361 void copy( const HashKey& other ){
362 std::copy( other.m_key.begin(), other.m_key.end(), m_key.begin() );
363 m_hash = other.m_hash;
365 void copy( const char* string ){
366 strncpy( reinterpret_cast<char*>( m_key.data() ), string, m_key.size() );
367 for ( Array<ub4>::iterator i = m_key.begin(); i != m_key.end(); ++i )
369 *i = UB4Traits::as_ub4( *i );
371 m_hash = hash_ub4( m_key.data(), m_key.size(), ub4_default_traits() );
373 bool equal( const HashKey& other ) const {
374 return m_hash == other.m_hash && m_key.size() == other.m_key.size()
375 && std::equal( m_key.begin(), m_key.end(), other.m_key.begin() );
379 HashKey( const HashKey& other ) : m_key( other.m_key.size() ){
382 HashKey( const char* string ) : m_key( string_length_ub4( string ) ){
385 HashKey& operator=( const char* string ){
386 m_key.resize( string_length_ub4( string ) );
390 bool operator==( const HashKey& other ) const {
391 return equal( other );
393 bool operator!=( const HashKey& other ) const {
394 return !equal( other );
396 hash_t hash() const {
400 const char* c_str() const {
401 return reinterpret_cast<const char*>( m_key.data() );
406 /// \brief Hash function to use with HashKey.
409 typedef hash_t hash_type;
410 hash_type operator()( const HashKey<ub4_default_traits>& key ) const {