+
+/*
+ * A basic implementation of a hash-set. Unlike a hashtable, a hash
+ * set doesn't maintain key-value pairs. It simply maintains a key
+ * that can be set, removed, and checked for.
+ *
+ * See EXPOSED interface comment below
+ */
+#define GMQCC_HASHSET_PRIME0 0x0049
+#define GMQCC_HASHSET_PRIME1 0x1391
+
+static int util_hsput(hash_set_t *set, void *item) {
+ size_t hash = (size_t)item; /* shouldn't drop the bits */
+ size_t iter;
+
+ /* a == 0 || a == 1 */
+ if (hash >> 1)
+ return -1;
+
+ iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash);
+
+ /* while (set->items[iter] != 0 && set->items[iter] != 1) */
+ while (!(set->items[iter] >> 1)) {
+ if (set->items[iter] == hash)
+ return 0;
+
+ iter = set->mask & (iter + GMQCC_HASHSET_PRIME1);
+ }
+
+ set->total ++;
+ set->items[iter] = hash;
+
+ return 1;
+}
+
+static void util_hsupdate(hash_set_t *set) {
+ size_t *old;
+ size_t end;
+ size_t itr;
+
+ /* time to rehash? */
+ if ((float)set->total >= (size_t)((double)set->capacity * 0.85)) {
+ old = set->items;
+ end = set->capacity;
+
+ set->bits ++;
+ set->capacity = (size_t)(1 << set->bits);
+ set->mask = set->capacity - 1;
+ set->items = (size_t*)mem_a(set->capacity * sizeof(size_t));
+ set->total = 0;
+
+ /*assert(set->items);*/
+
+ /*
+ * this shouldn't be slow? if so unroll it a little perhaps
+ * (shouldn't be though)
+ */
+ for (itr = 0; itr < end; itr++)
+ util_hsput(set, (void*)old[itr]);
+
+ mem_d(old);
+ }
+}
+
+/*
+ * EXPOSED interface: all of these functions are exposed to the outside
+ * for use. The stuff above is static because it's the "internal" mechanics
+ * for syncronizing the set for updating, and putting data into the set.
+ */
+int util_hsadd(hash_set_t *set, void *item) {
+ int run = util_hsput(set, item); /* inlined */
+ util_hsupdate(set);
+
+ return run;
+}
+
+/* remove item in set */
+int util_hsrem(hash_set_t *set, void *item) {
+ size_t hash = (size_t)item;
+ size_t iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash);
+
+ while (set->items[iter]) {
+ if (set->items[iter] == hash) {
+ set->items[iter] = 1;
+ set->total --;
+
+ return 1;
+ }
+ iter = set->mask & (iter + GMQCC_HASHSET_PRIME1);
+ }
+
+ return 0;
+}
+
+/* check if item is set */
+int util_hshas(hash_set_t *set, void *item) {
+ size_t hash = (size_t)item;
+ size_t iter = set->mask & (GMQCC_HASHSET_PRIME0 * hash);
+
+ while (set->items[iter]) {
+ if (set->items[iter] == hash)
+ return 1;
+
+ iter = set->mask & (iter + GMQCC_HASHSET_PRIME1);
+ }
+
+ return 0;
+}
+
+hash_set_t *util_hsnew(void) {
+ hash_set_t *set;
+
+ if (!(set = (hash_set_t*)mem_a(sizeof(hash_set_t))))
+ return NULL;
+
+ set->bits = 3;
+ set->total = 0;
+ set->capacity = (size_t)(1 << set->bits);
+ set->mask = set->capacity - 1;
+ set->items = (size_t*)mem_a(set->capacity * sizeof(size_t));
+
+ if (!set->items) {
+ util_hsdel(set);
+ return NULL;
+ }
+
+ return set;
+}
+
+void util_hsdel(hash_set_t *set) {
+ if (!set) return;
+
+ if (set->items)
+ mem_d(set->items);
+
+ mem_d(set);
+}
+#undef GMQCC_HASHSET_PRIME0
+#undef GMQCC_HASHSET_PRIME1
+
+
+/*
+ * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
+ * exists, otherwise compiler error.
+ *
+ * TODO: fix for MSVC ....
+ */
+int util_vasprintf(char **dat, const char *fmt, va_list args) {
+ int ret;
+ int len;
+ char *tmp = NULL;
+
+ /*
+ * For visuals tido _vsnprintf doesn't tell you the length of a
+ * formatted string if it overflows. However there is a MSVC
+ * intrinsic (which is documented wrong) called _vcsprintf which
+ * will return the required amount to allocate.
+ */
+ #ifdef _MSC_VER
+ char *str;
+ if ((len = _vscprintf(fmt, args)) < 0) {
+ *dat = NULL;
+ return -1;
+ }
+
+ tmp = mem_a(len + 1);
+ if ((ret = _vsnprintf(tmp, len+1, fmt, args)) != len) {
+ mem_d(tmp);
+ *dat = NULL;
+ return -1;
+ }
+ *dat = tmp;
+ return len;
+ #else
+ /*
+ * For everything else we have a decent conformint vsnprintf that
+ * returns the number of bytes needed. We give it a try though on
+ * a short buffer, since efficently speaking, it could be nice to
+ * above a second vsnprintf call.
+ */
+ char buf[128];
+ va_list cpy;
+ va_copy(cpy, args);
+ len = vsnprintf(buf, sizeof(buf), fmt, cpy);
+ va_end (cpy);
+
+ if (len < (int)sizeof(buf)) {
+ *dat = util_strdup(buf);
+ return len;
+ }
+
+ /* not large enough ... */
+ tmp = (char*)mem_a(len + 1);
+ if ((ret = vsnprintf(tmp, len + 1, fmt, args)) != len) {
+ mem_d(tmp);
+ *dat = NULL;
+ return -1;
+ }
+
+ *dat = tmp;
+ return len;
+ #endif
+}
+int util_asprintf(char **ret, const char *fmt, ...) {
+ va_list args;
+ int read;
+ va_start(args, fmt);
+ read = util_vasprintf(ret, fmt, args);
+ va_end (args);
+
+ return read;
+}
+
+/*
+ * Implementation of the Mersenne twister PRNG (pseudo random numer
+ * generator). Implementation of MT19937. Has a period of 2^19937-1
+ * which is a Mersenne Prime (hence the name).
+ *
+ * Implemented from specification and original paper:
+ * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
+ *
+ * This code is placed in the public domain by me personally
+ * (Dale Weiler, a.k.a graphitemaster).
+ */
+
+#define MT_SIZE 624
+#define MT_PERIOD 397
+#define MT_SPACE (MT_SIZE - MT_PERIOD)
+
+static uint32_t mt_state[MT_SIZE];
+static size_t mt_index = 0;
+
+static GMQCC_INLINE void mt_generate() {
+ /*
+ * The loop has been unrolled here: the original paper and implemenation
+ * Called for the following code:
+ * for (register unsigned i = 0; i < MT_SIZE; ++i) {
+ * register uint32_t load;
+ * load = (0x80000000 & mt_state[i]) // most significant 32nd bit
+ * load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
+ *
+ * mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
+ *
+ * if (load & 1) mt_state[i] ^= 0x9908B0DF;
+ * }
+ *
+ * This essentially is a waste: we have two modulus operations, and
+ * a branch that is executed every iteration from [0, MT_SIZE).
+ *
+ * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
+ * information on how this clever trick works.
+ */
+ static const uint32_t matrix[2] = {
+ 0x00000000,
+ 0x9908B0Df
+ };
+ /*
+ * This register gives up a little more speed by instructing the compiler
+ * to force these into CPU registers (they're counters for indexing mt_state
+ * which we can force the compiler to generate prefetch instructions for)
+ */
+ register uint32_t y;
+ register uint32_t i;
+
+ /*
+ * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
+ * to [0, MT_SIZE) (634 iterations).
+ */
+ for (i = 0; i < MT_SPACE; ++i) {
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
+
+ i ++; /* loop unroll */
+
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
+ }
+
+ /*
+ * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
+ * = 2*2*3*3*11])
+ */
+ i = MT_SPACE;
+ while (i < MT_SIZE - 1) {
+ /*
+ * We expand this 11 times .. manually, no macros are required
+ * here. This all fits in the CPU cache.
+ */
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+ mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+ ++i;
+ }
+
+ /* i = mt_state[623] */
+ y = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
+ mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
+}
+
+void util_seed(uint32_t value) {
+ /*
+ * We seed the mt_state with a LCG (linear congruential generator)
+ * We're operating exactly on exactly m=32, so there is no need to
+ * use modulus.
+ *
+ * The multipler of choice is 0x6C07865, also knows as the Borosh-
+ * Niederreiter multipler used for modulus 2^32. More can be read
+ * about this in Knuth's TAOCP Volume 2, page 106.
+ *
+ * If you don't own TAOCP something is wrong with you :-) .. so I
+ * also provided a link to the original paper by Borosh and
+ * Niederreiter. It's called "Optional Multipliers for PRNG by The
+ * Linear Congruential Method" (1983).
+ * http://en.wikipedia.org/wiki/Linear_congruential_generator
+ *
+ * From said page, it says the following:
+ * "A common Mersenne twister implementation, interestingly enough
+ * used an LCG to generate seed data."
+ *
+ * Remarks:
+ * The data we're operating on is 32-bits for the mt_state array, so
+ * there is no masking required with 0xFFFFFFFF
+ */
+ register size_t i;
+
+ mt_state[0] = value;
+ for (i = 1; i < MT_SIZE; ++i)
+ mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
+}
+
+uint32_t util_rand() {
+ register uint32_t y;
+
+ /*
+ * This is inlined with any sane compiler (I checked)
+ * for some reason though, SubC seems to be generating invalid
+ * code when it inlines this.
+ */
+ if (!mt_index)
+ mt_generate();
+
+ y = mt_state[mt_index];
+
+ /* Standard tempering */
+ y ^= y >> 11; /* +7 */
+ y ^= y << 7 & 0x9D2C5680; /* +4 */
+ y ^= y << 15 & 0xEFC60000; /* -4 */
+ y ^= y >> 18; /* -7 */
+
+ if(++mt_index == MT_SIZE)
+ mt_index = 0;
+
+ return y;
+}