2 * Copyright (C) 2012, 2013
6 * Permission is hereby granted, free of charge, to any person obtaining a copy of
7 * this software and associated documentation files (the "Software"), to deal in
8 * the Software without restriction, including without limitation the rights to
9 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is furnished to do
11 * so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 /* TODO: remove globals ... */
29 static uint64_t mem_ab = 0;
30 static uint64_t mem_db = 0;
31 static uint64_t mem_at = 0;
32 static uint64_t mem_dt = 0;
33 static uint64_t mem_pk = 0;
34 static uint64_t mem_hw = 0;
40 struct memblock_t *next;
41 struct memblock_t *prev;
46 if (mem_hw > mem_pk) \
50 static struct memblock_t *mem_start = NULL;
52 void *util_memory_a(size_t byte, unsigned int line, const char *file) {
53 struct memblock_t *info = (struct memblock_t*)malloc(sizeof(struct memblock_t) + byte);
54 void *data = (void*)(info+1);
55 if (!info) return NULL;
60 info->next = mem_start;
62 mem_start->prev = info;
74 void util_memory_d(void *ptrn) {
75 struct memblock_t *info = NULL;
78 info = ((struct memblock_t*)ptrn - 1);
85 info->prev->next = info->next;
87 info->next->prev = info->prev;
88 if (info == mem_start)
89 mem_start = info->next;
94 void *util_memory_r(void *ptrn, size_t byte, unsigned int line, const char *file) {
95 struct memblock_t *oldinfo = NULL;
97 struct memblock_t *newinfo;
100 return util_memory_a(byte, line, file);
106 oldinfo = ((struct memblock_t*)ptrn - 1);
107 newinfo = ((struct memblock_t*)malloc(sizeof(struct memblock_t) + byte));
111 util_memory_d(oldinfo+1);
116 memcpy(newinfo+1, oldinfo+1, oldinfo->byte);
120 oldinfo->prev->next = oldinfo->next;
122 oldinfo->next->prev = oldinfo->prev;
123 if (oldinfo == mem_start)
124 mem_start = oldinfo->next;
127 newinfo->line = line;
128 newinfo->byte = byte;
129 newinfo->file = file;
130 newinfo->prev = NULL;
131 newinfo->next = mem_start;
133 mem_start->prev = newinfo;
136 mem_ab -= oldinfo->byte;
137 mem_hw -= oldinfo->byte;
138 mem_ab += newinfo->byte;
139 mem_hw += newinfo->byte;
148 static void util_dumpmem(struct memblock_t *memory, uint16_t cols) {
150 for (i = 0; i < memory->byte + ((memory->byte % cols) ? (cols - memory->byte % cols) : 0); i++) {
151 if (i % cols == 0) con_out(" 0x%06X: ", i);
152 if (i < memory->byte) con_out("%02X " , 0xFF & ((char*)(memory + 1))[i]);
155 if ((uint16_t)(i % cols) == (cols - 1)) {
156 for (j = i - (cols - 1); j <= i; j++) {
160 : (isprint(((char*)(memory + 1))[j]))
161 ? 0xFF & ((char*)(memory + 1)) [j]
171 * The following is a VERY tight, efficent, hashtable for integer
172 * values and keys, and for nothing more. We could make our existing
173 * hashtable support type-genericness through a void * pointer but,
174 * ideally that would make things more complicated. We also don't need
175 * that much of a bloat for something as basic as this.
183 typedef size_entry_t **size_table_t;
185 size_table_t util_st_new() {
186 return (size_table_t)memset(
187 mem_a(sizeof(size_entry_t*) * ST_SIZE),
188 0, ST_SIZE * sizeof(size_entry_t*)
191 void util_st_del(size_table_t table) {
193 for (; i < ST_SIZE; i++) if(table[i]) mem_d(table[i]);
196 size_entry_t *util_st_get(size_table_t table, size_t key) {
197 size_t hash = (key % ST_SIZE);
198 while (table[hash] && table[hash]->key != key)
199 hash = (hash + 1) % ST_SIZE;
202 void util_st_put(size_table_t table, size_t key, size_t value) {
203 size_t hash = (key % ST_SIZE);
204 while (table[hash] && table[hash]->key != key)
205 hash = (hash + 1) % ST_SIZE;
206 table[hash] = (size_entry_t*)mem_a(sizeof(size_entry_t));
207 table[hash]->key = key;
208 table[hash]->value = value;
211 static uint64_t strdups = 0;
212 static uint64_t vectors = 0;
213 static uint64_t vector_sizes = 0;
214 static uint64_t hashtables = 0;
215 static size_table_t vector_usage = NULL;
217 void util_meminfo() {
218 struct memblock_t *info;
220 if (OPTS_OPTION_BOOL(OPTION_DEBUG)) {
221 for (info = mem_start; info; info = info->next) {
222 con_out("lost: %u (bytes) at %s:%u\n",
227 util_dumpmem(info, OPTS_OPTION_U16(OPTION_MEMDUMPCOLS));
231 if (OPTS_OPTION_BOOL(OPTION_DEBUG) ||
232 OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
233 con_out("Memory information:\n\
234 Total allocations: %llu\n\
235 Total deallocations: %llu\n\
236 Total allocated: %f (MB)\n\
237 Total deallocated: %f (MB)\n\
238 Total peak memory: %f (MB)\n\
239 Total leaked memory: %f (MB) in %llu allocations\n",
242 (float)(mem_ab) / 1048576.0f,
243 (float)(mem_db) / 1048576.0f,
244 (float)(mem_pk) / 1048576.0f,
245 (float)(mem_ab - mem_db) / 1048576.0f,
247 /* could be more clever */
252 if (OPTS_OPTION_BOOL(OPTION_STATISTICS) ||
253 OPTS_OPTION_BOOL(OPTION_MEMCHK)) {
257 con_out("Additional Statistics:\n\
258 Total vectors allocated: %llu\n\
259 Total string duplicates: %llu\n\
260 Total hashtables allocated: %llu\n\
261 Total unique vector sizes: %llu\n",
268 for (; i < ST_SIZE; i++) {
271 if (!(entry = vector_usage[i]))
274 con_out(" %u| # of %3u (bytes) vectors: %u\n",
276 (unsigned)entry->key,
277 (unsigned)entry->value
284 util_st_del(vector_usage);
288 * Some string utility functions, because strdup uses malloc, and we want
289 * to track all memory (without replacing malloc).
291 char *_util_Estrdup(const char *s, const char *file, size_t line) {
295 /* in case of -DNOTRACK */
302 if ((len = strlen(s)) && (ptr = (char*)mem_af(len+1, line, file))) {
310 char *_util_Estrdup_empty(const char *s, const char *file, size_t line) {
314 /* in case of -DNOTRACK */
322 if ((ptr = (char*)mem_af(len+1, line, file))) {
330 void util_debug(const char *area, const char *ms, ...) {
332 if (!OPTS_OPTION_BOOL(OPTION_DEBUG))
335 if (!strcmp(area, "MEM") && !OPTS_OPTION_BOOL(OPTION_MEMCHK))
339 con_out ("[%s] ", area);
345 * only required if big endian .. otherwise no need to swap
348 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
349 static GMQCC_INLINE void util_swap16(uint16_t *d, size_t l) {
351 d[l] = (d[l] << 8) | (d[l] >> 8);
355 static GMQCC_INLINE void util_swap32(uint32_t *d, size_t l) {
358 v = ((d[l] << 8) & 0xFF00FF00) | ((d[l] >> 8) & 0x00FF00FF);
359 d[l] = (v << 16) | (v >> 16);
363 /* Some strange system doesn't like constants that big, AND doesn't recognize an ULL suffix
364 * so let's go the safe way
366 static GMQCC_INLINE void util_swap64(uint32_t *d, size_t l) {
370 v = ((d[l] << 8) & 0xFF00FF00FF00FF00) | ((d[l] >> 8) & 0x00FF00FF00FF00FF);
371 v = ((v << 16) & 0xFFFF0000FFFF0000) | ((v >> 16) & 0x0000FFFF0000FFFF);
372 d[l] = (v << 32) | (v >> 32);
376 for (i = 0; i < l; i += 2) {
385 void util_endianswap(void *_data, size_t length, unsigned int typesize) {
386 # if PLATFORM_BYTE_ORDER == -1 /* runtime check */
387 if (*((char*)&typesize))
390 /* prevent unused warnings */
395 # if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
401 util_swap16((uint16_t*)_data, length>>1);
404 util_swap32((uint32_t*)_data, length>>2);
407 util_swap64((uint32_t*)_data, length>>3);
410 default: exit(EXIT_FAILURE); /* please blow the fuck up! */
417 * CRC algorithms vary in the width of the polynomial, the value of said polynomial,
418 * the initial value used for the register, weather the bits of each byte are reflected
419 * before being processed, weather the algorithm itself feeds input bytes through the
420 * register or XORs them with a byte from one end and then straight into the table, as
421 * well as (but not limited to the idea of reflected versions) where the final register
422 * value becomes reversed, and finally weather the value itself is used to XOR the final
423 * register value. AS such you can already imagine how painfully annoying CRCs are,
424 * of course we stand to target Quake, which expects it's certian set of rules for proper
425 * calculation of a CRC.
427 * In most traditional CRC algorithms on uses a reflected table driven method where a value
428 * or register is reflected if it's bits are swapped around it's center. For example:
429 * take the bits 0101 is the 4-bit reflection of 1010, and respectfully 0011 would be the
430 * reflection of 1100. Quake however expects a NON-Reflected CRC on the output, but still
431 * requires a final XOR on the values (0xFFFF and 0x0000) this is a standard CCITT CRC-16
432 * which I respectfully as a programmer don't agree with.
434 * So now you know what we target, and why we target it, despite how unsettling it may seem
435 * but those are what Quake seems to request.
438 static const uint16_t util_crc16_table[] = {
439 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5,
440 0x60C6, 0x70E7, 0x8108, 0x9129, 0xA14A, 0xB16B,
441 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 0x1231, 0x0210,
442 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
443 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C,
444 0xF3FF, 0xE3DE, 0x2462, 0x3443, 0x0420, 0x1401,
445 0x64E6, 0x74C7, 0x44A4, 0x5485, 0xA56A, 0xB54B,
446 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
447 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6,
448 0x5695, 0x46B4, 0xB75B, 0xA77A, 0x9719, 0x8738,
449 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 0x48C4, 0x58E5,
450 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
451 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969,
452 0xA90A, 0xB92B, 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96,
453 0x1A71, 0x0A50, 0x3A33, 0x2A12, 0xDBFD, 0xCBDC,
454 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
455 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03,
456 0x0C60, 0x1C41, 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD,
457 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 0x7E97, 0x6EB6,
458 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
459 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A,
460 0x9F59, 0x8F78, 0x9188, 0x81A9, 0xB1CA, 0xA1EB,
461 0xD10C, 0xC12D, 0xF14E, 0xE16F, 0x1080, 0x00A1,
462 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
463 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C,
464 0xE37F, 0xF35E, 0x02B1, 0x1290, 0x22F3, 0x32D2,
465 0x4235, 0x5214, 0x6277, 0x7256, 0xB5EA, 0xA5CB,
466 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
467 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447,
468 0x5424, 0x4405, 0xA7DB, 0xB7FA, 0x8799, 0x97B8,
469 0xE75F, 0xF77E, 0xC71D, 0xD73C, 0x26D3, 0x36F2,
470 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
471 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9,
472 0xB98A, 0xA9AB, 0x5844, 0x4865, 0x7806, 0x6827,
473 0x18C0, 0x08E1, 0x3882, 0x28A3, 0xCB7D, 0xDB5C,
474 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
475 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0,
476 0x2AB3, 0x3A92, 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D,
477 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 0x7C26, 0x6C07,
478 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
479 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA,
480 0x8FD9, 0x9FF8, 0x6E17, 0x7E36, 0x4E55, 0x5E74,
481 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
484 /* Non - Reflected */
485 uint16_t util_crc16(uint16_t current, const char *k, size_t len) {
486 register uint16_t h = current;
487 for (; len; --len, ++k)
488 h = util_crc16_table[(h>>8)^((unsigned char)*k)]^(h<<8);
491 /* Reflective Varation (for reference) */
493 uint16_t util_crc16(const char *k, int len, const short clamp) {
494 register uint16_t h= (uint16_t)0xFFFFFFFF;
495 for (; len; --len, ++k)
496 h = util_crc16_table[(h^((unsigned char)*k))&0xFF]^(h>>8);
501 size_t util_strtocmd(const char *in, char *out, size_t outsz) {
503 for (; *in && sz < outsz; ++in, ++out, ++sz)
504 *out = (*in == '-') ? '_' : (isalpha(*in) && !isupper(*in)) ? *in + 'A' - 'a': *in;
509 size_t util_strtononcmd(const char *in, char *out, size_t outsz) {
511 for (; *in && sz < outsz; ++in, ++out, ++sz)
512 *out = (*in == '_') ? '-' : (isalpha(*in) && isupper(*in)) ? *in + 'a' - 'A' : *in;
517 /* TODO: rewrite ... when I redo the ve cleanup */
518 void _util_vec_grow(void **a, size_t i, size_t s) {
519 vector_t *d = vec_meta(*a);
521 size_entry_t *e = NULL;
525 m = 2 * d->allocated + i;
526 p = mem_r(d, s * m + sizeof(vector_t));
529 p = mem_a(s * m + sizeof(vector_t));
530 ((vector_t*)p)->used = 0;
535 vector_usage = util_st_new();
537 if ((e = util_st_get(vector_usage, s))) {
540 util_st_put(vector_usage, s, 1); /* start off with 1 */
544 *a = (vector_t*)p + 1;
545 vec_meta(*a)->allocated = m;
549 * Hash table for generic data, based on dynamic memory allocations
550 * all around. This is the internal interface, please look for
551 * EXPOSED INTERFACE comment below
553 typedef struct hash_node_t {
554 char *key; /* the key for this node in table */
555 void *value; /* pointer to the data as void* */
556 struct hash_node_t *next; /* next node (linked list) */
559 GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) {
560 const uint32_t mix = 0x5BD1E995;
561 const uint32_t rot = 24;
562 size_t size = strlen(key);
563 uint32_t hash = 0x1EF0 /* LICRC TAB */ ^ size;
565 const unsigned char *data = (const unsigned char*)key;
568 alias = (data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24));
570 alias ^= alias >> rot;
581 case 3: hash ^= data[2] << 16;
582 case 2: hash ^= data[1] << 8;
583 case 1: hash ^= data[0];
591 return (size_t) (hash % ht->size);
594 static hash_node_t *_util_htnewpair(const char *key, void *value) {
596 if (!(node = (hash_node_t*)mem_a(sizeof(hash_node_t))))
599 if (!(node->key = util_strdupe(key))) {
611 * EXPOSED INTERFACE for the hashtable implementation
612 * util_htnew(size) -- to make a new hashtable
613 * util_htset(table, key, value, sizeof(value)) -- to set something in the table
614 * util_htget(table, key) -- to get something from the table
615 * util_htdel(table) -- to delete the table
617 hash_table_t *util_htnew(size_t size) {
618 hash_table_t *hashtable = NULL;
622 if (!(hashtable = (hash_table_t*)mem_a(sizeof(hash_table_t))))
625 if (!(hashtable->table = (hash_node_t**)mem_a(sizeof(hash_node_t*) * size))) {
630 hashtable->size = size;
631 memset(hashtable->table, 0, sizeof(hash_node_t*) * size);
637 void util_htseth(hash_table_t *ht, const char *key, size_t bin, void *value) {
638 hash_node_t *newnode = NULL;
639 hash_node_t *next = NULL;
640 hash_node_t *last = NULL;
642 next = ht->table[bin];
644 while (next && next->key && strcmp(key, next->key) > 0)
645 last = next, next = next->next;
647 /* already in table, do a replace */
648 if (next && next->key && strcmp(key, next->key) == 0) {
651 /* not found, grow a pair man :P */
652 newnode = _util_htnewpair(key, value);
653 if (next == ht->table[bin]) {
654 newnode->next = next;
655 ht->table[bin] = newnode;
657 last->next = newnode;
659 newnode->next = next;
660 last->next = newnode;
665 void util_htset(hash_table_t *ht, const char *key, void *value) {
666 util_htseth(ht, key, util_hthash(ht, key), value);
669 void *util_htgeth(hash_table_t *ht, const char *key, size_t bin) {
670 hash_node_t *pair = ht->table[bin];
672 while (pair && pair->key && strcmp(key, pair->key) > 0)
675 if (!pair || !pair->key || strcmp(key, pair->key) != 0)
681 void *util_htget(hash_table_t *ht, const char *key) {
682 return util_htgeth(ht, key, util_hthash(ht, key));
685 void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) {
690 keylen = strlen(key);
692 pair = ht->table[bin];
693 while (pair && pair->key) {
694 len = strlen(pair->key);
700 cmp = strcmp(key, pair->key);
708 cmp = strcmp(key, pair->key + len - keylen);
710 uintptr_t up = (uintptr_t)pair->value;
720 * Free all allocated data in a hashtable, this is quite the amount
723 void util_htrem(hash_table_t *ht, void (*callback)(void *data)) {
725 for (; i < ht->size; i++) {
726 hash_node_t *n = ht->table[i];
746 void util_htrmh(hash_table_t *ht, const char *key, size_t bin, void (*cb)(void*)) {
747 hash_node_t **pair = &ht->table[bin];
750 while (*pair && (*pair)->key && strcmp(key, (*pair)->key) > 0)
751 pair = &(*pair)->next;
754 if (!tmp || !tmp->key || strcmp(key, tmp->key) != 0)
765 void util_htrm(hash_table_t *ht, const char *key, void (*cb)(void*)) {
766 util_htrmh(ht, key, util_hthash(ht, key), cb);
769 void util_htdel(hash_table_t *ht) {
770 util_htrem(ht, NULL);
774 * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
775 * exists, otherwise compiler error.
777 * TODO: fix for MSVC ....
779 int util_vasprintf(char **dat, const char *fmt, va_list args) {
785 * For visuals tido _vsnprintf doesn't tell you the length of a
786 * formatted string if it overflows. However there is a MSVC
787 * intrinsic (which is documented wrong) called _vcsprintf which
788 * will return the required amount to allocate.
791 if ((len = _vscprintf(fmt, args)) < 0) {
796 tmp = (char*)mem_a(len + 1);
797 if ((ret = _vsnprintf_s(tmp, len+1, len+1, fmt, args)) != len) {
806 * For everything else we have a decent conformint vsnprintf that
807 * returns the number of bytes needed. We give it a try though on
808 * a short buffer, since efficently speaking, it could be nice to
809 * above a second vsnprintf call.
814 len = vsnprintf(buf, sizeof(buf), fmt, cpy);
817 if (len < (int)sizeof(buf)) {
818 *dat = util_strdup(buf);
822 /* not large enough ... */
823 tmp = (char*)mem_a(len + 1);
824 if ((ret = vsnprintf(tmp, len + 1, fmt, args)) != len) {
834 int util_asprintf(char **ret, const char *fmt, ...) {
838 read = util_vasprintf(ret, fmt, args);
845 * These are various re-implementations (wrapping the real ones) of
846 * string functions that MSVC consideres unsafe. We wrap these up and
847 * use the safe varations on MSVC.
850 static char **util_strerror_allocated() {
851 static char **data = NULL;
855 static void util_strerror_cleanup(void) {
857 char **data = util_strerror_allocated();
858 for (i = 0; i < vec_size(data); i++)
863 const char *util_strerror(int num) {
864 char *allocated = NULL;
865 static bool install = false;
866 static size_t tries = 0;
867 char **vector = util_strerror_allocated();
869 /* try installing cleanup handler */
874 install = !atexit(&util_strerror_cleanup);
878 allocated = (char*)mem_a(4096); /* A page must be enough */
879 strerror_s(allocated, 4096, num);
881 vec_push(vector, allocated);
882 return (const char *)allocated;
885 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
888 va_start(va, format);
890 rt = vsprintf_s(src, bytes, format, va);
896 char *util_strcat(char *dest, const char *src) {
897 strcat_s(dest, strlen(src), src);
901 char *util_strncpy(char *dest, const char *src, size_t num) {
902 strncpy_s(dest, num, src, num);
906 const char *util_strerror(int num) {
907 return strerror(num);
910 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
913 va_start(va, format);
914 rt = vsnprintf(src, bytes, format, va);
920 char *util_strcat(char *dest, const char *src) {
921 return strcat(dest, src);
924 char *util_strncpy(char *dest, const char *src, size_t num) {
925 return strncpy(dest, src, num);
928 #endif /*! _MSC_VER */
931 * Implementation of the Mersenne twister PRNG (pseudo random numer
932 * generator). Implementation of MT19937. Has a period of 2^19937-1
933 * which is a Mersenne Prime (hence the name).
935 * Implemented from specification and original paper:
936 * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
938 * This code is placed in the public domain by me personally
939 * (Dale Weiler, a.k.a graphitemaster).
943 #define MT_PERIOD 397
944 #define MT_SPACE (MT_SIZE - MT_PERIOD)
946 static uint32_t mt_state[MT_SIZE];
947 static size_t mt_index = 0;
949 static GMQCC_INLINE void mt_generate() {
951 * The loop has been unrolled here: the original paper and implemenation
952 * Called for the following code:
953 * for (register unsigned i = 0; i < MT_SIZE; ++i) {
954 * register uint32_t load;
955 * load = (0x80000000 & mt_state[i]) // most significant 32nd bit
956 * load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
958 * mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
960 * if (load & 1) mt_state[i] ^= 0x9908B0DF;
963 * This essentially is a waste: we have two modulus operations, and
964 * a branch that is executed every iteration from [0, MT_SIZE).
966 * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
967 * information on how this clever trick works.
969 static const uint32_t matrix[2] = {
974 * This register gives up a little more speed by instructing the compiler
975 * to force these into CPU registers (they're counters for indexing mt_state
976 * which we can force the compiler to generate prefetch instructions for)
982 * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
983 * to [0, MT_SIZE) (634 iterations).
985 for (i = 0; i < MT_SPACE; ++i) {
986 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
987 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
989 i ++; /* loop unroll */
991 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
992 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
996 * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
1000 while (i < MT_SIZE - 1) {
1002 * We expand this 11 times .. manually, no macros are required
1003 * here. This all fits in the CPU cache.
1005 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1006 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1008 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1009 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1011 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1012 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1014 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1015 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1017 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1018 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1020 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1021 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1023 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1024 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1026 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1027 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1029 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1030 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1032 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1033 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1035 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
1036 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
1040 /* i = mt_state[623] */
1041 y = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
1042 mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
1045 void util_seed(uint32_t value) {
1047 * We seed the mt_state with a LCG (linear congruential generator)
1048 * We're operating exactly on exactly m=32, so there is no need to
1051 * The multipler of choice is 0x6C07865, also knows as the Borosh-
1052 * Niederreiter multipler used for modulus 2^32. More can be read
1053 * about this in Knuth's TAOCP Volume 2, page 106.
1055 * If you don't own TAOCP something is wrong with you :-) .. so I
1056 * also provided a link to the original paper by Borosh and
1057 * Niederreiter. It's called "Optional Multipliers for PRNG by The
1058 * Linear Congruential Method" (1983).
1059 * http://en.wikipedia.org/wiki/Linear_congruential_generator
1061 * From said page, it says the following:
1062 * "A common Mersenne twister implementation, interestingly enough
1063 * used an LCG to generate seed data."
1066 * The data we're operating on is 32-bits for the mt_state array, so
1067 * there is no masking required with 0xFFFFFFFF
1071 mt_state[0] = value;
1072 for (i = 1; i < MT_SIZE; ++i)
1073 mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
1076 uint32_t util_rand() {
1077 register uint32_t y;
1080 * This is inlined with any sane compiler (I checked)
1081 * for some reason though, SubC seems to be generating invalid
1082 * code when it inlines this.
1087 y = mt_state[mt_index];
1089 /* Standard tempering */
1090 y ^= y >> 11; /* +7 */
1091 y ^= y << 7 & 0x9D2C5680; /* +4 */
1092 y ^= y << 15 & 0xEFC60000; /* -4 */
1093 y ^= y >> 18; /* -7 */
1095 if(++mt_index == MT_SIZE)