util.c

   1 /*
   2  * Copyright (C) 2012, 2013
   3  *     Dale Weiler
   4  *     Wolfgang Bumiller
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   7  * this software and associated documentation files (the "Software"), to deal in
   8  * the Software without restriction, including without limitation the rights to
   9  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10  * of the Software, and to permit persons to whom the Software is furnished to do
  11  * so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in all
  14  * copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24 #include <stdarg.h>
  25 #include <errno.h>
  26 #include "gmqcc.h"
  27
  28 /* TODO: remove globals ... */
  29 static uint64_t mem_ab = 0;
  30 static uint64_t mem_db = 0;
  31 static uint64_t mem_at = 0;
  32 static uint64_t mem_dt = 0;
  33 static uint64_t mem_pk = 0;
  34 static uint64_t mem_hw = 0;
  35
  36 struct memblock_t {
  37     const char  *file;
  38     unsigned int line;
  39     size_t       byte;
  40     struct memblock_t *next;
  41     struct memblock_t *prev;
  42 };
  43
  44 #define PEAK_MEM             \
  45     do {                     \
  46         if (mem_hw > mem_pk) \
  47             mem_pk = mem_hw; \
  48     } while (0)
  49
  50 static struct memblock_t *mem_start = NULL;
  51
  52 void *util_memory_a(size_t byte, unsigned int line, const char *file) {
  53     struct memblock_t *info = (struct memblock_t*)malloc(sizeof(struct memblock_t) + byte);
  54     void              *data = (void*)(info+1);
  55     if (!info) return NULL;
  56     info->line = line;
  57     info->byte = byte;
  58     info->file = file;
  59     info->prev = NULL;
  60     info->next = mem_start;
  61     if (mem_start)
  62         mem_start->prev = info;
  63     mem_start = info;
  64
  65     mem_at++;
  66     mem_ab += info->byte;
  67     mem_hw += info->byte;
  68
  69     PEAK_MEM;
  70
  71     return data;
  72 }
  73
  74 void util_memory_d(void *ptrn) {
  75     struct memblock_t *info = NULL;
  76
  77     if (!ptrn) return;
  78     info = ((struct memblock_t*)ptrn - 1);
  79
  80     mem_db += info->byte;
  81     mem_hw -= info->byte;
  82     mem_dt++;
  83
  84     if (info->prev)
  85         info->prev->next = info->next;
  86     if (info->next)
  87         info->next->prev = info->prev;
  88     if (info == mem_start)
  89         mem_start = info->next;
  90
  91     free(info);
  92 }
  93
  94 void *util_memory_r(void *ptrn, size_t byte, unsigned int line, const char *file) {
  95     struct memblock_t *oldinfo = NULL;
  96
  97     struct memblock_t *newinfo;
  98
  99     if (!ptrn)
 100         return util_memory_a(byte, line, file);
 101     if (!byte) {
 102         util_memory_d(ptrn);
 103         return NULL;
 104     }
 105
 106     oldinfo = ((struct memblock_t*)ptrn - 1);
 107     newinfo = ((struct memblock_t*)malloc(sizeof(struct memblock_t) + byte));
 108
 109     /* new data */
 110     if (!newinfo) {
 111         util_memory_d(oldinfo+1);
 112         return NULL;
 113     }
 114
 115     /* copy old */
 116     memcpy(newinfo+1, oldinfo+1, oldinfo->byte);
 117
 118     /* free old */
 119     if (oldinfo->prev)
 120         oldinfo->prev->next = oldinfo->next;
 121     if (oldinfo->next)
 122         oldinfo->next->prev = oldinfo->prev;
 123     if (oldinfo == mem_start)
 124         mem_start = oldinfo->next;
 125
 126     /* fill info */
 127     newinfo->line = line;
 128     newinfo->byte = byte;
 129     newinfo->file = file;
 130     newinfo->prev = NULL;
 131     newinfo->next = mem_start;
 132     if (mem_start)
 133         mem_start->prev = newinfo;
 134     mem_start = newinfo;
 135
 136     mem_ab -= oldinfo->byte;
 137     mem_hw -= oldinfo->byte;
 138     mem_ab += newinfo->byte;
 139     mem_hw += newinfo->byte;
 140
 141     PEAK_MEM;
 142
 143     free(oldinfo);
 144
 145     return newinfo+1;
 146 }
 147
 148 static void util_dumpmem(struct memblock_t *memory, uint16_t cols) {
 149     uint32_t i, j;
 150     for (i = 0; i < memory->byte + ((memory->byte % cols) ? (cols - memory->byte % cols) : 0); i++) {
 151         if (i % cols == 0)    con_out("    0x%06X: ", i);
 152         if (i < memory->byte) con_out("%02X "   , 0xFF & ((char*)(memory + 1))[i]);
 153         else                  con_out("    ");
 154
 155         if ((uint16_t)(i % cols) == (cols - 1)) {
 156             for (j = i - (cols - 1); j <= i; j++) {
 157                 con_out("%c",
 158                     (j >= memory->byte)
 159                         ? ' '
 160                         : (isprint(((char*)(memory + 1))[j]))
 161                             ? 0xFF & ((char*)(memory + 1)) [j]
 162                             : '.'
 163                 );
 164             }
 165             con_out("\n");
 166         }
 167     }
 168 }
 169
 170 /*
 171  * The following is a VERY tight, efficent, hashtable for integer
 172  * values and keys, and for nothing more. We could make our existing
 173  * hashtable support type-genericness through a void * pointer but,
 174  * ideally that would make things more complicated. We also don't need
 175  * that much of a bloat for something as basic as this.
 176  */
 177 typedef struct {
 178     size_t key;
 179     size_t value;
 180 } size_entry_t;
 181 #define ST_SIZE 1024
 182
 183 typedef size_entry_t **size_table_t;
 184
 185 size_table_t util_st_new() {
 186     return (size_table_t)memset(
 187         mem_a(sizeof(size_entry_t*) * ST_SIZE),
 188         0, ST_SIZE * sizeof(size_entry_t*)
 189     );
 190 }
 191 void util_st_del(size_table_t table) {
 192     size_t i = 0;
 193     for (; i < ST_SIZE; i++) if(table[i]) mem_d(table[i]);
 194     mem_d(table);
 195 }
 196 size_entry_t *util_st_get(size_table_t table, size_t key) {
 197     size_t hash = (key % ST_SIZE);
 198     while (table[hash] && table[hash]->key != key)
 199         hash = (hash + 1) % ST_SIZE;
 200     return table[hash];
 201 }
 202 void util_st_put(size_table_t table, size_t key, size_t value) {
 203     size_t hash = (key % ST_SIZE);
 204     while (table[hash] && table[hash]->key != key)
 205         hash = (hash + 1) % ST_SIZE;
 206     table[hash]        = (size_entry_t*)mem_a(sizeof(size_entry_t));
 207     table[hash]->key   = key;
 208     table[hash]->value = value;
 209 }
 210
 211 static uint64_t      strdups      = 0;
 212 static uint64_t      vectors      = 0;
 213 static uint64_t      vector_sizes = 0;
 214 static uint64_t      hashtables   = 0;
 215 static size_table_t  vector_usage = NULL;
 216
 217 void util_meminfo() {
 218     struct memblock_t *info;
 219
 220     if (OPTS_OPTION_BOOL(OPTION_DEBUG)) {
 221         for (info = mem_start; info; info = info->next) {
 222             con_out("lost: %u (bytes) at %s:%u\n",
 223                 info->byte,
 224                 info->file,
 225                 info->line);
 226
 227             util_dumpmem(info, OPTS_OPTION_U16(OPTION_MEMDUMPCOLS));
 228         }
 229     }
 230
 231     if (OPTS_OPTION_BOOL(OPTION_DEBUG) ||
 232         OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
 233         con_out("Memory information:\n\
 234             Total allocations:   %llu\n\
 235             Total deallocations: %llu\n\
 236             Total allocated:     %f (MB)\n\
 237             Total deallocated:   %f (MB)\n\
 238             Total peak memory:   %f (MB)\n\
 239             Total leaked memory: %f (MB) in %llu allocations\n",
 240                 mem_at,
 241                 mem_dt,
 242                 (float)(mem_ab)           / 1048576.0f,
 243                 (float)(mem_db)           / 1048576.0f,
 244                 (float)(mem_pk)           / 1048576.0f,
 245                 (float)(mem_ab -  mem_db) / 1048576.0f,
 246
 247                 /* could be more clever */
 248                 (mem_at -  mem_dt)
 249         );
 250     }
 251
 252     if (OPTS_OPTION_BOOL(OPTION_STATISTICS) ||
 253         OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
 254         size_t i=0;
 255         size_t e=1;
 256
 257         con_out("Additional Statistics:\n\
 258             Total vectors allocated:    %llu\n\
 259             Total string duplicates:    %llu\n\
 260             Total hashtables allocated: %llu\n\
 261             Total unique vector sizes:  %llu\n",
 262             vectors,
 263             strdups,
 264             hashtables,
 265             vector_sizes
 266         );
 267
 268         for (; i < ST_SIZE; i++) {
 269             size_entry_t *entry;
 270
 271             if (!(entry = vector_usage[i]))
 272                 continue;
 273
 274             con_out("               %u| # of %3u (bytes) vectors: %u\n",
 275                 (unsigned)e,
 276                 (unsigned)entry->key,
 277                 (unsigned)entry->value
 278             );
 279             e++;
 280         }
 281     }
 282
 283     if (vector_usage)
 284         util_st_del(vector_usage);
 285 }
 286
 287 /*
 288  * Some string utility functions, because strdup uses malloc, and we want
 289  * to track all memory (without replacing malloc).
 290  */
 291 char *_util_Estrdup(const char *s, const char *file, size_t line) {
 292     size_t  len = 0;
 293     char   *ptr = NULL;
 294
 295     /* in case of -DNOTRACK */
 296     (void)file;
 297     (void)line;
 298
 299     if (!s)
 300         return NULL;
 301
 302     if ((len = strlen(s)) && (ptr = (char*)mem_af(len+1, line, file))) {
 303         memcpy(ptr, s, len);
 304         ptr[len] = '\0';
 305     }
 306     strdups++;
 307     return ptr;
 308 }
 309
 310 char *_util_Estrdup_empty(const char *s, const char *file, size_t line) {
 311     size_t  len = 0;
 312     char   *ptr = NULL;
 313
 314     /* in case of -DNOTRACK */
 315     (void)file;
 316     (void)line;
 317
 318     if (!s)
 319         return NULL;
 320
 321     len = strlen(s);
 322     if ((ptr = (char*)mem_af(len+1, line, file))) {
 323         memcpy(ptr, s, len);
 324         ptr[len] = '\0';
 325     }
 326     strdups++;
 327     return ptr;
 328 }
 329
 330 void util_debug(const char *area, const char *ms, ...) {
 331     va_list  va;
 332     if (!OPTS_OPTION_BOOL(OPTION_DEBUG))
 333         return;
 334
 335     if (!strcmp(area, "MEM") && !OPTS_OPTION_BOOL(OPTION_MEMCHK))
 336         return;
 337
 338     va_start(va, ms);
 339     con_out ("[%s] ", area);
 340     con_vout(ms, va);
 341     va_end  (va);
 342 }
 343
 344 /*
 345  * only required if big endian .. otherwise no need to swap
 346  * data.
 347  */
 348 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
 349     static GMQCC_INLINE void util_swap16(uint16_t *d, size_t l) {
 350         while (l--) {
 351             d[l] = (d[l] << 8) | (d[l] >> 8);
 352         }
 353     }
 354
 355     static GMQCC_INLINE void util_swap32(uint32_t *d, size_t l) {
 356         while (l--) {
 357             uint32_t v;
 358             v = ((d[l] << 8) & 0xFF00FF00) | ((d[l] >> 8) & 0x00FF00FF);
 359             d[l] = (v << 16) | (v >> 16);
 360         }
 361     }
 362
 363     /* Some strange system doesn't like constants that big, AND doesn't recognize an ULL suffix
 364      * so let's go the safe way
 365      */
 366     static GMQCC_INLINE void util_swap64(uint32_t *d, size_t l) {
 367         /*
 368         while (l--) {
 369             uint64_t v;
 370             v = ((d[l] << 8) & 0xFF00FF00FF00FF00) | ((d[l] >> 8) & 0x00FF00FF00FF00FF);
 371             v = ((v << 16) & 0xFFFF0000FFFF0000) | ((v >> 16) & 0x0000FFFF0000FFFF);
 372             d[l] = (v << 32) | (v >> 32);
 373         }
 374         */
 375         size_t i;
 376         for (i = 0; i < l; i += 2) {
 377             uint32_t v1 = d[i];
 378             d[i] = d[i+1];
 379             d[i+1] = v1;
 380             util_swap32(d+i, 2);
 381         }
 382     }
 383 #endif
 384
 385 void util_endianswap(void *_data, size_t length, unsigned int typesize) {
 386 #   if PLATFORM_BYTE_ORDER == -1 /* runtime check */
 387     if (*((char*)&typesize))
 388         return;
 389 #else
 390     /* prevent unused warnings */
 391     (void) _data;
 392     (void) length;
 393     (void) typesize;
 394
 395 #   if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
 396         return;
 397 #   else
 398         switch (typesize) {
 399             case 1: return;
 400             case 2:
 401                 util_swap16((uint16_t*)_data, length>>1);
 402                 return;
 403             case 4:
 404                 util_swap32((uint32_t*)_data, length>>2);
 405                 return;
 406             case 8:
 407                 util_swap64((uint32_t*)_data, length>>3);
 408                 return;
 409
 410             default: exit(EXIT_FAILURE); /* please blow the fuck up! */
 411         }
 412 #   endif
 413 #endif
 414 }
 415
 416 /*
 417  * CRC algorithms vary in the width of the polynomial, the value of said polynomial,
 418  * the initial value used for the register, weather the bits of each byte are reflected
 419  * before being processed, weather the algorithm itself feeds input bytes through the
 420  * register or XORs them with a byte from one end and then straight into the table, as
 421  * well as (but not limited to the idea of reflected versions) where the final register
 422  * value becomes reversed, and finally weather the value itself is used to XOR the final
 423  * register value.  AS such you can already imagine how painfully annoying CRCs are,
 424  * of course we stand to target Quake, which expects it's certian set of rules for proper
 425  * calculation of a CRC.
 426  *
 427  * In most traditional CRC algorithms on uses a reflected table driven method where a value
 428  * or register is reflected if it's bits are swapped around it's center.  For example:
 429  * take the bits 0101 is the 4-bit reflection of 1010, and respectfully 0011 would be the
 430  * reflection of 1100. Quake however expects a NON-Reflected CRC on the output, but still
 431  * requires a final XOR on the values (0xFFFF and 0x0000) this is a standard CCITT CRC-16
 432  * which I respectfully as a programmer don't agree with.
 433  *
 434  * So now you know what we target, and why we target it, despite how unsettling it may seem
 435  * but those are what Quake seems to request.
 436  */
 437
 438 static const uint16_t util_crc16_table[] = {
 439     0x0000,     0x1021,     0x2042,     0x3063,     0x4084,     0x50A5,
 440     0x60C6,     0x70E7,     0x8108,     0x9129,     0xA14A,     0xB16B,
 441     0xC18C,     0xD1AD,     0xE1CE,     0xF1EF,     0x1231,     0x0210,
 442     0x3273,     0x2252,     0x52B5,     0x4294,     0x72F7,     0x62D6,
 443     0x9339,     0x8318,     0xB37B,     0xA35A,     0xD3BD,     0xC39C,
 444     0xF3FF,     0xE3DE,     0x2462,     0x3443,     0x0420,     0x1401,
 445     0x64E6,     0x74C7,     0x44A4,     0x5485,     0xA56A,     0xB54B,
 446     0x8528,     0x9509,     0xE5EE,     0xF5CF,     0xC5AC,     0xD58D,
 447     0x3653,     0x2672,     0x1611,     0x0630,     0x76D7,     0x66F6,
 448     0x5695,     0x46B4,     0xB75B,     0xA77A,     0x9719,     0x8738,
 449     0xF7DF,     0xE7FE,     0xD79D,     0xC7BC,     0x48C4,     0x58E5,
 450     0x6886,     0x78A7,     0x0840,     0x1861,     0x2802,     0x3823,
 451     0xC9CC,     0xD9ED,     0xE98E,     0xF9AF,     0x8948,     0x9969,
 452     0xA90A,     0xB92B,     0x5AF5,     0x4AD4,     0x7AB7,     0x6A96,
 453     0x1A71,     0x0A50,     0x3A33,     0x2A12,     0xDBFD,     0xCBDC,
 454     0xFBBF,     0xEB9E,     0x9B79,     0x8B58,     0xBB3B,     0xAB1A,
 455     0x6CA6,     0x7C87,     0x4CE4,     0x5CC5,     0x2C22,     0x3C03,
 456     0x0C60,     0x1C41,     0xEDAE,     0xFD8F,     0xCDEC,     0xDDCD,
 457     0xAD2A,     0xBD0B,     0x8D68,     0x9D49,     0x7E97,     0x6EB6,
 458     0x5ED5,     0x4EF4,     0x3E13,     0x2E32,     0x1E51,     0x0E70,
 459     0xFF9F,     0xEFBE,     0xDFDD,     0xCFFC,     0xBF1B,     0xAF3A,
 460     0x9F59,     0x8F78,     0x9188,     0x81A9,     0xB1CA,     0xA1EB,
 461     0xD10C,     0xC12D,     0xF14E,     0xE16F,     0x1080,     0x00A1,
 462     0x30C2,     0x20E3,     0x5004,     0x4025,     0x7046,     0x6067,
 463     0x83B9,     0x9398,     0xA3FB,     0xB3DA,     0xC33D,     0xD31C,
 464     0xE37F,     0xF35E,     0x02B1,     0x1290,     0x22F3,     0x32D2,
 465     0x4235,     0x5214,     0x6277,     0x7256,     0xB5EA,     0xA5CB,
 466     0x95A8,     0x8589,     0xF56E,     0xE54F,     0xD52C,     0xC50D,
 467     0x34E2,     0x24C3,     0x14A0,     0x0481,     0x7466,     0x6447,
 468     0x5424,     0x4405,     0xA7DB,     0xB7FA,     0x8799,     0x97B8,
 469     0xE75F,     0xF77E,     0xC71D,     0xD73C,     0x26D3,     0x36F2,
 470     0x0691,     0x16B0,     0x6657,     0x7676,     0x4615,     0x5634,
 471     0xD94C,     0xC96D,     0xF90E,     0xE92F,     0x99C8,     0x89E9,
 472     0xB98A,     0xA9AB,     0x5844,     0x4865,     0x7806,     0x6827,
 473     0x18C0,     0x08E1,     0x3882,     0x28A3,     0xCB7D,     0xDB5C,
 474     0xEB3F,     0xFB1E,     0x8BF9,     0x9BD8,     0xABBB,     0xBB9A,
 475     0x4A75,     0x5A54,     0x6A37,     0x7A16,     0x0AF1,     0x1AD0,
 476     0x2AB3,     0x3A92,     0xFD2E,     0xED0F,     0xDD6C,     0xCD4D,
 477     0xBDAA,     0xAD8B,     0x9DE8,     0x8DC9,     0x7C26,     0x6C07,
 478     0x5C64,     0x4C45,     0x3CA2,     0x2C83,     0x1CE0,     0x0CC1,
 479     0xEF1F,     0xFF3E,     0xCF5D,     0xDF7C,     0xAF9B,     0xBFBA,
 480     0x8FD9,     0x9FF8,     0x6E17,     0x7E36,     0x4E55,     0x5E74,
 481     0x2E93,     0x3EB2,     0x0ED1,     0x1EF0
 482 };
 483
 484 /* Non - Reflected */
 485 uint16_t util_crc16(uint16_t current, const char *k, size_t len) {
 486     register uint16_t h = current;
 487     for (; len; --len, ++k)
 488         h = util_crc16_table[(h>>8)^((unsigned char)*k)]^(h<<8);
 489     return h;
 490 }
 491 /* Reflective Varation (for reference) */
 492 #if 0
 493 uint16_t util_crc16(const char *k, int len, const short clamp) {
 494     register uint16_t h= (uint16_t)0xFFFFFFFF;
 495     for (; len; --len, ++k)
 496         h = util_crc16_table[(h^((unsigned char)*k))&0xFF]^(h>>8);
 497     return (~h)%clamp;
 498 }
 499 #endif
 500
 501 size_t util_strtocmd(const char *in, char *out, size_t outsz) {
 502     size_t sz = 1;
 503     for (; *in && sz < outsz; ++in, ++out, ++sz)
 504         *out = (*in == '-') ? '_' : (isalpha(*in) && !isupper(*in)) ? *in + 'A' - 'a': *in;
 505     *out = 0;
 506     return sz-1;
 507 }
 508
 509 size_t util_strtononcmd(const char *in, char *out, size_t outsz) {
 510     size_t sz = 1;
 511     for (; *in && sz < outsz; ++in, ++out, ++sz)
 512         *out = (*in == '_') ? '-' : (isalpha(*in) && isupper(*in)) ? *in + 'a' - 'A' : *in;
 513     *out = 0;
 514     return sz-1;
 515 }
 516
 517 /* TODO: rewrite ... when I redo the ve cleanup */
 518 void _util_vec_grow(void **a, size_t i, size_t s) {
 519     vector_t     *d = vec_meta(*a);
 520     size_t        m = 0;
 521     size_entry_t *e = NULL;
 522     void         *p = NULL;
 523
 524     if (*a) {
 525         m = 2 * d->allocated + i;
 526         p = mem_r(d, s * m + sizeof(vector_t));
 527     } else {
 528         m = i + 1;
 529         p = mem_a(s * m + sizeof(vector_t));
 530         ((vector_t*)p)->used = 0;
 531         vectors++;
 532     }
 533
 534     if (!vector_usage)
 535         vector_usage = util_st_new();
 536
 537     if ((e = util_st_get(vector_usage, s))) {
 538         e->value ++;
 539     } else {
 540         util_st_put(vector_usage, s, 1); /* start off with 1 */
 541         vector_sizes++;
 542     }
 543
 544     *a = (vector_t*)p + 1;
 545     vec_meta(*a)->allocated = m;
 546 }
 547
 548 /*
 549  * Hash table for generic data, based on dynamic memory allocations
 550  * all around.  This is the internal interface, please look for
 551  * EXPOSED INTERFACE comment below
 552  */
 553 typedef struct hash_node_t {
 554     char               *key;   /* the key for this node in table */
 555     void               *value; /* pointer to the data as void*   */
 556     struct hash_node_t *next;  /* next node (linked list)        */
 557 } hash_node_t;
 558
 559 GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) {
 560     const uint32_t       mix   = 0x5BD1E995;
 561     const uint32_t       rot   = 24;
 562     size_t               size  = strlen(key);
 563     uint32_t             hash  = 0x1EF0 /* LICRC TAB */  ^ size;
 564     uint32_t             alias = 0;
 565     const unsigned char *data  = (const unsigned char*)key;
 566
 567     while (size >= 4) {
 568         alias  = (data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24));
 569         alias *= mix;
 570         alias ^= alias >> rot;
 571         alias *= mix;
 572
 573         hash  *= mix;
 574         hash  ^= alias;
 575
 576         data += 4;
 577         size -= 4;
 578     }
 579
 580     switch (size) {
 581         case 3: hash ^= data[2] << 16;
 582         case 2: hash ^= data[1] << 8;
 583         case 1: hash ^= data[0];
 584                 hash *= mix;
 585     }
 586
 587     hash ^= hash >> 13;
 588     hash *= mix;
 589     hash ^= hash >> 15;
 590
 591     return (size_t) (hash % ht->size);
 592 }
 593
 594 static hash_node_t *_util_htnewpair(const char *key, void *value) {
 595     hash_node_t *node;
 596     if (!(node = (hash_node_t*)mem_a(sizeof(hash_node_t))))
 597         return NULL;
 598
 599     if (!(node->key = util_strdupe(key))) {
 600         mem_d(node);
 601         return NULL;
 602     }
 603
 604     node->value = value;
 605     node->next  = NULL;
 606
 607     return node;
 608 }
 609
 610 /*
 611  * EXPOSED INTERFACE for the hashtable implementation
 612  * util_htnew(size)                             -- to make a new hashtable
 613  * util_htset(table, key, value, sizeof(value)) -- to set something in the table
 614  * util_htget(table, key)                       -- to get something from the table
 615  * util_htdel(table)                            -- to delete the table
 616  */
 617 hash_table_t *util_htnew(size_t size) {
 618     hash_table_t *hashtable = NULL;
 619     if (size < 1)
 620         return NULL;
 621
 622     if (!(hashtable = (hash_table_t*)mem_a(sizeof(hash_table_t))))
 623         return NULL;
 624
 625     if (!(hashtable->table = (hash_node_t**)mem_a(sizeof(hash_node_t*) * size))) {
 626         mem_d(hashtable);
 627         return NULL;
 628     }
 629
 630     hashtable->size = size;
 631     memset(hashtable->table, 0, sizeof(hash_node_t*) * size);
 632
 633     hashtables++;
 634     return hashtable;
 635 }
 636
 637 void util_htseth(hash_table_t *ht, const char *key, size_t bin, void *value) {
 638     hash_node_t *newnode = NULL;
 639     hash_node_t *next    = NULL;
 640     hash_node_t *last    = NULL;
 641
 642     next = ht->table[bin];
 643
 644     while (next && next->key && strcmp(key, next->key) > 0)
 645         last = next, next = next->next;
 646
 647     /* already in table, do a replace */
 648     if (next && next->key && strcmp(key, next->key) == 0) {
 649         next->value = value;
 650     } else {
 651         /* not found, grow a pair man :P */
 652         newnode = _util_htnewpair(key, value);
 653         if (next == ht->table[bin]) {
 654             newnode->next  = next;
 655             ht->table[bin] = newnode;
 656         } else if (!next) {
 657             last->next = newnode;
 658         } else {
 659             newnode->next = next;
 660             last->next = newnode;
 661         }
 662     }
 663 }
 664
 665 void util_htset(hash_table_t *ht, const char *key, void *value) {
 666     util_htseth(ht, key, util_hthash(ht, key), value);
 667 }
 668
 669 void *util_htgeth(hash_table_t *ht, const char *key, size_t bin) {
 670     hash_node_t *pair = ht->table[bin];
 671
 672     while (pair && pair->key && strcmp(key, pair->key) > 0)
 673         pair = pair->next;
 674
 675     if (!pair || !pair->key || strcmp(key, pair->key) != 0)
 676         return NULL;
 677
 678     return pair->value;
 679 }
 680
 681 void *util_htget(hash_table_t *ht, const char *key) {
 682     return util_htgeth(ht, key, util_hthash(ht, key));
 683 }
 684
 685 void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) {
 686     hash_node_t *pair;
 687     size_t len, keylen;
 688     int cmp;
 689
 690     keylen = strlen(key);
 691
 692     pair = ht->table[bin];
 693     while (pair && pair->key) {
 694         len = strlen(pair->key);
 695         if (len < keylen) {
 696             pair = pair->next;
 697             continue;
 698         }
 699         if (keylen == len) {
 700             cmp = strcmp(key, pair->key);
 701             if (cmp == 0)
 702                 return pair->value;
 703             if (cmp < 0)
 704                 return NULL;
 705             pair = pair->next;
 706             continue;
 707         }
 708         cmp = strcmp(key, pair->key + len - keylen);
 709         if (cmp == 0) {
 710             uintptr_t up = (uintptr_t)pair->value;
 711             up += len - keylen;
 712             return (void*)up;
 713         }
 714         pair = pair->next;
 715     }
 716     return NULL;
 717 }
 718
 719 /*
 720  * Free all allocated data in a hashtable, this is quite the amount
 721  * of work.
 722  */
 723 void util_htrem(hash_table_t *ht, void (*callback)(void *data)) {
 724     size_t i = 0;
 725     for (; i < ht->size; i++) {
 726         hash_node_t *n = ht->table[i];
 727         hash_node_t *p;
 728
 729         /* free in list */
 730         while (n) {
 731             if (n->key)
 732                 mem_d(n->key);
 733             if (callback)
 734                 callback(n->value);
 735             p = n;
 736             n = n->next;
 737             mem_d(p);
 738         }
 739
 740     }
 741     /* free table */
 742     mem_d(ht->table);
 743     mem_d(ht);
 744 }
 745
 746 void util_htrmh(hash_table_t *ht, const char *key, size_t bin, void (*cb)(void*)) {
 747     hash_node_t **pair = &ht->table[bin];
 748     hash_node_t *tmp;
 749
 750     while (*pair && (*pair)->key && strcmp(key, (*pair)->key) > 0)
 751         pair = &(*pair)->next;
 752
 753     tmp = *pair;
 754     if (!tmp || !tmp->key || strcmp(key, tmp->key) != 0)
 755         return;
 756
 757     if (cb)
 758         (*cb)(tmp->value);
 759
 760     *pair = tmp->next;
 761     mem_d(tmp->key);
 762     mem_d(tmp);
 763 }
 764
 765 void util_htrm(hash_table_t *ht, const char *key, void (*cb)(void*)) {
 766     util_htrmh(ht, key, util_hthash(ht, key), cb);
 767 }
 768
 769 void util_htdel(hash_table_t *ht) {
 770     util_htrem(ht, NULL);
 771 }
 772
 773 /*
 774  * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
 775  * exists, otherwise compiler error.
 776  *
 777  * TODO: fix for MSVC ....
 778  */
 779 int util_vasprintf(char **dat, const char *fmt, va_list args) {
 780     int   ret;
 781     int   len;
 782     char *tmp = NULL;
 783
 784     /*
 785      * For visuals tido _vsnprintf doesn't tell you the length of a
 786      * formatted string if it overflows. However there is a MSVC
 787      * intrinsic (which is documented wrong) called _vcsprintf which
 788      * will return the required amount to allocate.
 789      */
 790     #ifdef _MSC_VER
 791         if ((len = _vscprintf(fmt, args)) < 0) {
 792             *dat = NULL;
 793             return -1;
 794         }
 795
 796         tmp = (char*)mem_a(len + 1);
 797         if ((ret = _vsnprintf_s(tmp, len+1, len+1, fmt, args)) != len) {
 798             mem_d(tmp);
 799             *dat = NULL;
 800             return -1;
 801         }
 802         *dat = tmp;
 803         return len;
 804     #else
 805         /*
 806          * For everything else we have a decent conformint vsnprintf that
 807          * returns the number of bytes needed.  We give it a try though on
 808          * a short buffer, since efficently speaking, it could be nice to
 809          * above a second vsnprintf call.
 810          */
 811         char    buf[128];
 812         va_list cpy;
 813         va_copy(cpy, args);
 814         len = vsnprintf(buf, sizeof(buf), fmt, cpy);
 815         va_end (cpy);
 816
 817         if (len < (int)sizeof(buf)) {
 818             *dat = util_strdup(buf);
 819             return len;
 820         }
 821
 822         /* not large enough ... */
 823         tmp = (char*)mem_a(len + 1);
 824         if ((ret = vsnprintf(tmp, len + 1, fmt, args)) != len) {
 825             mem_d(tmp);
 826             *dat = NULL;
 827             return -1;
 828         }
 829
 830         *dat = tmp;
 831         return len;
 832     #endif
 833 }
 834 int util_asprintf(char **ret, const char *fmt, ...) {
 835     va_list  args;
 836     int      read;
 837     va_start(args, fmt);
 838     read = util_vasprintf(ret, fmt, args);
 839     va_end  (args);
 840
 841     return read;
 842 }
 843
 844 /*
 845  * These are various re-implementations (wrapping the real ones) of
 846  * string functions that MSVC consideres unsafe. We wrap these up and
 847  * use the safe varations on MSVC.
 848  */
 849 #ifdef _MSC_VER
 850     static char **util_strerror_allocated() {
 851         static char **data = NULL;
 852         return data;
 853     }
 854
 855     static void util_strerror_cleanup(void) {
 856         size_t i;
 857         char  **data = util_strerror_allocated();
 858         for (i = 0; i < vec_size(data); i++)
 859             mem_d(data[i]);
 860         vec_free(data);
 861     }
 862
 863     const char *util_strerror(int num) {
 864         char         *allocated = NULL;
 865         static bool   install   = false;
 866         static size_t tries     = 0;
 867         char        **vector    = util_strerror_allocated();
 868
 869         /* try installing cleanup handler */
 870         while (!install) {
 871             if (tries == 32)
 872                 return "(unknown)";
 873
 874             install = !atexit(&util_strerror_cleanup);
 875             tries ++;
 876         }
 877
 878         allocated = (char*)mem_a(4096); /* A page must be enough */
 879         strerror_s(allocated, 4096, num);
 880
 881         vec_push(vector, allocated);
 882         return (const char *)allocated;
 883     }
 884
 885     int util_snprintf(char *src, size_t bytes, const char *format, ...) {
 886         int      rt;
 887         va_list  va;
 888         va_start(va, format);
 889
 890         rt = vsprintf_s(src, bytes, format, va);
 891         va_end  (va);
 892
 893         return rt;
 894     }
 895
 896     char *util_strcat(char *dest, const char *src) {
 897         strcat_s(dest, strlen(src), src);
 898         return dest;
 899     }
 900
 901     char *util_strncpy(char *dest, const char *src, size_t num) {
 902         strncpy_s(dest, num, src, num);
 903         return dest;
 904     }
 905 #else
 906     const char *util_strerror(int num) {
 907         return strerror(num);
 908     }
 909
 910     int util_snprintf(char *src, size_t bytes, const char *format, ...) {
 911         int      rt;
 912         va_list  va;
 913         va_start(va, format);
 914         rt = vsnprintf(src, bytes, format, va);
 915         va_end  (va);
 916
 917         return rt;
 918     }
 919
 920     char *util_strcat(char *dest, const char *src) {
 921         return strcat(dest, src);
 922     }
 923
 924     char *util_strncpy(char *dest, const char *src, size_t num) {
 925         return strncpy(dest, src, num);
 926     }
 927
 928 #endif /*! _MSC_VER */
 929
 930 /*
 931  * Implementation of the Mersenne twister PRNG (pseudo random numer
 932  * generator).  Implementation of MT19937.  Has a period of 2^19937-1
 933  * which is a Mersenne Prime (hence the name).
 934  *
 935  * Implemented from specification and original paper:
 936  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
 937  *
 938  * This code is placed in the public domain by me personally
 939  * (Dale Weiler, a.k.a graphitemaster).
 940  */
 941
 942 #define MT_SIZE    624
 943 #define MT_PERIOD  397
 944 #define MT_SPACE   (MT_SIZE - MT_PERIOD)
 945
 946 static uint32_t mt_state[MT_SIZE];
 947 static size_t   mt_index = 0;
 948
 949 static GMQCC_INLINE void mt_generate() {
 950     /*
 951      * The loop has been unrolled here: the original paper and implemenation
 952      * Called for the following code:
 953      * for (register unsigned i = 0; i < MT_SIZE; ++i) {
 954      *     register uint32_t load;
 955      *     load  = (0x80000000 & mt_state[i])                 // most  significant 32nd bit
 956      *     load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
 957      *
 958      *     mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
 959      *
 960      *     if (load & 1) mt_state[i] ^= 0x9908B0DF;
 961      * }
 962      *
 963      * This essentially is a waste: we have two modulus operations, and
 964      * a branch that is executed every iteration from [0, MT_SIZE).
 965      *
 966      * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
 967      * information on how this clever trick works.
 968      */
 969     static const uint32_t matrix[2] = {
 970         0x00000000,
 971         0x9908B0Df
 972     };
 973     /*
 974      * This register gives up a little more speed by instructing the compiler
 975      * to force these into CPU registers (they're counters for indexing mt_state
 976      * which we can force the compiler to generate prefetch instructions for)
 977      */
 978     register uint32_t y;
 979     register uint32_t i;
 980
 981     /*
 982      * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
 983      * to [0, MT_SIZE)  (634 iterations).
 984      */
 985     for (i = 0; i < MT_SPACE; ++i) {
 986         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
 987         mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
 988
 989         i ++; /* loop unroll */
 990
 991         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
 992         mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
 993     }
 994
 995     /*
 996      * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
 997      * = 2*2*3*3*11])
 998      */
 999     i = MT_SPACE;
1000     while (i < MT_SIZE - 1) {
1001         /*
1002          * We expand this 11 times .. manually, no macros are required
1003          * here. This all fits in the CPU cache.
1004          */
1005         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1006         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1007         ++i;
1008         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1009         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1010         ++i;
1011         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1012         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1013         ++i;
1014         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1015         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1016         ++i;
1017         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1018         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1019         ++i;
1020         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1021         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1022         ++i;
1023         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1024         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1025         ++i;
1026         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1027         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1028         ++i;
1029         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1030         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1031         ++i;
1032         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1033         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1034         ++i;
1035         y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1036         mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1037         ++i;
1038     }
1039
1040     /* i = mt_state[623] */
1041     y                     = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
1042     mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
1043 }
1044
1045 void util_seed(uint32_t value) {
1046     /*
1047      * We seed the mt_state with a LCG (linear congruential generator)
1048      * We're operating exactly on exactly m=32, so there is no need to
1049      * use modulus.
1050      *
1051      * The multipler of choice is 0x6C07865, also knows as the Borosh-
1052      * Niederreiter multipler used for modulus 2^32.  More can be read
1053      * about this in Knuth's TAOCP Volume 2, page 106.
1054      *
1055      * If you don't own TAOCP something is wrong with you :-) .. so I
1056      * also provided a link to the original paper by Borosh and
1057      * Niederreiter.  It's called "Optional Multipliers for PRNG by The
1058      * Linear Congruential Method" (1983).
1059      * http://en.wikipedia.org/wiki/Linear_congruential_generator
1060      *
1061      * From said page, it says the following:
1062      * "A common Mersenne twister implementation, interestingly enough
1063      *  used an LCG to generate seed data."
1064      *
1065      * Remarks:
1066      * The data we're operating on is 32-bits for the mt_state array, so
1067      * there is no masking required with 0xFFFFFFFF
1068      */
1069     register size_t i;
1070
1071     mt_state[0] = value;
1072     for (i = 1; i < MT_SIZE; ++i)
1073         mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
1074 }
1075
1076 uint32_t util_rand() {
1077     register uint32_t y;
1078
1079     /*
1080      * This is inlined with any sane compiler (I checked)
1081      * for some reason though, SubC seems to be generating invalid
1082      * code when it inlines this.
1083      */
1084     if (!mt_index)
1085         mt_generate();
1086
1087     y = mt_state[mt_index];
1088
1089     /* Standard tempering */
1090     y ^= y >> 11;              /* +7 */
1091     y ^= y << 7  & 0x9D2C5680; /* +4 */
1092     y ^= y << 15 & 0xEFC60000; /* -4 */
1093     y ^= y >> 18;              /* -7 */
1094
1095     if(++mt_index == MT_SIZE)
1096          mt_index = 0;
1097
1098     return y;
1099 }