From fa155f8a42a95111099b8c97a6617cb81317ea6c Mon Sep 17 00:00:00 2001
From: Dale Weiler <killfieldengine@gmail.com>
Date: Mon, 24 Dec 2012 01:43:27 +0000
Subject: [PATCH] Added my awesome MT1997 PRNG, and use it instead of stdio's
 rand()/srand() .. which are implementation specific .. and simply unsafe (for
 example one of the compilers at work simply has it's standard library
 implementation of rand() return 0 always (which is perfectly conformant)).

---
 ftepp.c |  14 ++---
 gmqcc.h |   3 +
 main.c  |   3 +
 util.c  | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 184 insertions(+), 7 deletions(-)

diff --git a/ftepp.c b/ftepp.c
index e1ee582..ab51bd9 100644
--- a/ftepp.c
+++ b/ftepp.c
@@ -77,8 +77,8 @@ typedef struct {
  * Implement the predef subsystem now.  We can do this safely with the
  * help of lexer contexts.
  */  
-static int ftepp_predef_countval = 0;
-static int ftepp_predef_randval  = 0;
+static uint32_t ftepp_predef_countval = 0;
+static uint32_t ftepp_predef_randval  = 0;
 
 /* __LINE__ */
 char *ftepp_predef_line(lex_file *context) {
@@ -98,7 +98,7 @@ char *ftepp_predef_file(lex_file *context) {
 /* __COUNTER_LAST__ */
 char *ftepp_predef_counterlast(lex_file *context) {
     char   *value = (char*)mem_a(128);
-    sprintf(value, "%d", ftepp_predef_countval);
+    sprintf(value, "%u", ftepp_predef_countval);
 
     (void)context;
     return value;
@@ -107,7 +107,7 @@ char *ftepp_predef_counterlast(lex_file *context) {
 char *ftepp_predef_counter(lex_file *context) {
     char   *value = (char*)mem_a(128);
     ftepp_predef_countval ++;
-    sprintf(value, "%d", ftepp_predef_countval);
+    sprintf(value, "%u", ftepp_predef_countval);
     (void)context;
 
     return value;
@@ -115,8 +115,8 @@ char *ftepp_predef_counter(lex_file *context) {
 /* __RANDOM__ */
 char *ftepp_predef_random(lex_file *context) {
     char  *value = (char*)mem_a(128);
-    ftepp_predef_randval = rand() % 0xFFFF; /* short int */
-    sprintf(value, "%d", ftepp_predef_randval);
+    ftepp_predef_randval = (util_rand() % 0xFF) + 1;
+    sprintf(value, "%u", ftepp_predef_randval);
 
     (void)context;
     return value;
@@ -124,7 +124,7 @@ char *ftepp_predef_random(lex_file *context) {
 /* __RANDOM_LAST__ */
 char *ftepp_predef_randomlast(lex_file *context) {
     char   *value = (char*)mem_a(128);
-    sprintf(value, "%d", ftepp_predef_randval);
+    sprintf(value, "%u", ftepp_predef_randval);
 
     (void)context;
     return value;
diff --git a/gmqcc.h b/gmqcc.h
index 9e8af45..65161c2 100644
--- a/gmqcc.h
+++ b/gmqcc.h
@@ -260,6 +260,9 @@ size_t util_strtononcmd (const char *, char *, size_t);
 
 uint16_t util_crc16(uint16_t crc, const char *data, size_t len);
 
+void     util_seed(uint32_t);
+uint32_t util_rand();
+
 #ifdef NOTRACK
 #    define mem_a(x)    malloc (x)
 #    define mem_d(x)    free   ((void*)x)
diff --git a/main.c b/main.c
index aa1e3a0..a52e704 100644
--- a/main.c
+++ b/main.c
@@ -23,6 +23,7 @@
  */
 #include "gmqcc.h"
 #include "lexer.h"
+#include <time.h>
 
 /* TODO: cleanup this whole file .. it's a fuckign mess */
 
@@ -505,6 +506,8 @@ int main(int argc, char **argv) {
     con_init ();
     opts_init("progs.dat", COMPILER_GMQCC, (1024 << 3));
 
+    util_seed(time(0));
+
     if (!options_parse(argc, argv)) {
         return usage();
     }
diff --git a/util.c b/util.c
index f4dbfc4..26803c8 100644
--- a/util.c
+++ b/util.c
@@ -538,3 +538,174 @@ void util_htdel(hash_table_t *ht) {
     mem_d(ht->table);
     mem_d(ht);
 }
+
+/*
+ * Implementation of the Mersenne twister PRNG (pseudo random numer
+ * generator).  Implementation of MT19937.  Has a period of 2^19937-1
+ * which is a Mersenne Prime (hence the name).
+ *
+ * Implemented from specification and original paper:
+ * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
+ *
+ * This code is placed in the public domain by me personally
+ * (Dale Weiler, a.k.a graphitemaster).
+ */
+
+#define MT_SIZE    624
+#define MT_PERIOD  397
+#define MT_SPACE   (MT_SIZE - MT_PERIOD)
+
+static uint32_t mt_state[MT_SIZE];
+static size_t   mt_index = 0;
+
+static GMQCC_INLINE void mt_generate() {
+    /*
+     * The loop has been unrolled here: the original paper and implemenation
+     * Called for the following code:
+     * for (register unsigned i = 0; i < MT_SIZE; ++i) {
+     *     register uint32_t load;
+     *     load  = (0x80000000 & mt_state[i])                 // most  significant 32nd bit
+     *     load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
+     *
+     *     mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
+     *
+     *     if (load & 1) mt_state[i] ^= 0x9908B0DF;
+     * }
+     *
+     * This essentially is a waste: we have two modulus operations, and
+     * a branch that is executed every iteration from [0, MT_SIZE).
+     *
+     * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
+     * information on how this clever trick works. 
+     */
+    static const uint32_t matrix[2] = {
+        0x00000000,
+        0x9908B0Df
+    };
+    /*
+     * This register gives up a little more speed by instructing the compiler
+     * to force these into CPU registers (they're counters for indexing mt_state
+     * which we can force the compiler to generate prefetch instructions for)
+     */
+    register uint32_t y;
+    register uint32_t i;
+
+    /*
+     * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
+     * to [0, MT_SIZE)  (634 iterations).
+     */
+    for (i = 0; i < MT_SPACE; ++i) {
+        y           = (0x800000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
+
+        i ++; /* loop unroll */
+
+        y           = (0x800000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
+    }
+
+    /*
+     * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
+     * = 2*2*3*3*11])
+     */
+    i = MT_SPACE;
+    while (i < MT_SIZE - 1) {
+        /*
+         * We expand this 11 times .. manually, no macros are required
+         * here. This all fits in the CPU cache.
+         */
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+        y           = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
+        mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
+        ++i;
+    }
+
+    /* i = mt_state[623] */
+    y                     = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
+    mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
+}
+
+void util_seed(uint32_t value) {
+    /*
+     * We seed the mt_state with a LCG (linear congruential generator)
+     * We're operating exactly on exactly m=32, so there is no need to
+     * use modulus.
+     *
+     * The multipler of choice is 0x6C07865, also knows as the Borosh-
+     * Niederreiter multipler used for modulus 2^32.  More can be read
+     * about this in Knuth's TAOCP Volume 2, page 106.
+     *
+     * If you don't own TAOCP something is wrong with you :-) .. so I
+     * also provided a link to the original paper by Borosh and
+     * Niederreiter.  It's called "Optional Multipliers for PRNG by The
+     * Linear Congruential Method" (1983).
+     * http://en.wikipedia.org/wiki/Linear_congruential_generator
+     *
+     * From said page, it says the following:
+     * "A common Mersenne twister implementation, interestingly enough
+     *  used an LCG to generate seed data."
+     *
+     * Remarks:
+     * The data we're operating on is 32-bits for the mt_state array, so
+     * there is no masking required with 0xFFFFFFFF
+     */
+    register size_t i;
+
+    mt_state[0] = value;
+    for (i = 1; i < MT_SIZE; ++i)
+        mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
+}
+
+uint32_t util_rand() {
+    register uint32_t y;
+
+    /*
+     * This is inlined with any sane compiler (I checked)
+     * for some reason though, SubC seems to be generating invalid
+     * code when it inlines this.
+     */
+    if (!mt_index)
+        mt_generate();
+
+    y = mt_state[mt_index];
+
+    /* Standard tempering */
+    y ^= y >> 11;              /* +7 */
+    y ^= y << 7  & 0x9D2C5680; /* +4 */
+    y ^= y << 15 & 0xEFC60000; /* -4 */
+    y ^= y >> 18;              /* -7 */
+
+    if(++mt_index == MT_SIZE)
+         mt_index = 0;
+
+    return y;
+}
-- 
2.39.2