2 * Copyright (C) 2012, 2013
6 * Permission is hereby granted, free of charge, to any person obtaining a copy of
7 * this software and associated documentation files (the "Software"), to deal in
8 * the Software without restriction, including without limitation the rights to
9 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is furnished to do
11 * so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * Initially this was handled with a table in the gmqcc.h header, but
31 * much to my surprise the contents of the table was duplicated for
32 * each translation unit, causing all these strings to be duplicated
33 * for every .c file it was included into. This method culls back on
34 * it. This is a 'utility' function because the executor also depends
35 * on this for dissasembled bytecode.
37 const char *util_instr_str[VINSTR_END] = {
38 "DONE", "MUL_F", "MUL_V", "MUL_FV",
39 "MUL_VF", "DIV_F", "ADD_F", "ADD_V",
40 "SUB_F", "SUB_V", "EQ_F", "EQ_V",
41 "EQ_S", "EQ_E", "EQ_FNC", "NE_F",
42 "NE_V", "NE_S", "NE_E", "NE_FNC",
43 "LE", "GE", "LT", "GT",
44 "LOAD_F", "LOAD_V", "LOAD_S", "LOAD_ENT",
45 "LOAD_FLD", "LOAD_FNC", "ADDRESS", "STORE_F",
46 "STORE_V", "STORE_S", "STORE_ENT", "STORE_FLD",
47 "STORE_FNC", "STOREP_F", "STOREP_V", "STOREP_S",
48 "STOREP_ENT", "STOREP_FLD", "STOREP_FNC", "RETURN",
49 "NOT_F", "NOT_V", "NOT_S", "NOT_ENT",
50 "NOT_FNC", "IF", "IFNOT", "CALL0",
51 "CALL1", "CALL2", "CALL3", "CALL4",
52 "CALL5", "CALL6", "CALL7", "CALL8",
53 "STATE", "GOTO", "AND", "OR",
57 void util_debug(const char *area, const char *ms, ...) {
59 if (!OPTS_OPTION_BOOL(OPTION_DEBUG))
62 if (!strcmp(area, "MEM") && !OPTS_OPTION_BOOL(OPTION_MEMCHK))
66 con_out ("[%s] ", area);
72 * only required if big endian .. otherwise no need to swap
75 #if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG
76 static GMQCC_INLINE void util_swap16(uint16_t *d, size_t l) {
78 d[l] = (d[l] << 8) | (d[l] >> 8);
82 static GMQCC_INLINE void util_swap32(uint32_t *d, size_t l) {
85 v = ((d[l] << 8) & 0xFF00FF00) | ((d[l] >> 8) & 0x00FF00FF);
86 d[l] = (v << 16) | (v >> 16);
90 /* Some strange system doesn't like constants that big, AND doesn't recognize an ULL suffix
91 * so let's go the safe way
93 static GMQCC_INLINE void util_swap64(uint32_t *d, size_t l) {
97 v = ((d[l] << 8) & 0xFF00FF00FF00FF00) | ((d[l] >> 8) & 0x00FF00FF00FF00FF);
98 v = ((v << 16) & 0xFFFF0000FFFF0000) | ((v >> 16) & 0x0000FFFF0000FFFF);
99 d[l] = (v << 32) | (v >> 32);
103 for (i = 0; i < l; i += 2) {
112 void util_endianswap(void *_data, size_t length, unsigned int typesize) {
113 # if PLATFORM_BYTE_ORDER == -1 /* runtime check */
114 if (*((char*)&typesize))
117 /* prevent unused warnings */
122 # if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE
128 util_swap16((uint16_t*)_data, length>>1);
131 util_swap32((uint32_t*)_data, length>>2);
134 util_swap64((uint32_t*)_data, length>>3);
137 default: exit(EXIT_FAILURE); /* please blow the fuck up! */
144 * CRC algorithms vary in the width of the polynomial, the value of said polynomial,
145 * the initial value used for the register, weather the bits of each byte are reflected
146 * before being processed, weather the algorithm itself feeds input bytes through the
147 * register or XORs them with a byte from one end and then straight into the table, as
148 * well as (but not limited to the idea of reflected versions) where the final register
149 * value becomes reversed, and finally weather the value itself is used to XOR the final
150 * register value. AS such you can already imagine how painfully annoying CRCs are,
151 * of course we stand to target Quake, which expects it's certian set of rules for proper
152 * calculation of a CRC.
154 * In most traditional CRC algorithms on uses a reflected table driven method where a value
155 * or register is reflected if it's bits are swapped around it's center. For example:
156 * take the bits 0101 is the 4-bit reflection of 1010, and respectfully 0011 would be the
157 * reflection of 1100. Quake however expects a NON-Reflected CRC on the output, but still
158 * requires a final XOR on the values (0xFFFF and 0x0000) this is a standard CCITT CRC-16
159 * which I respectfully as a programmer don't agree with.
161 * So now you know what we target, and why we target it, despite how unsettling it may seem
162 * but those are what Quake seems to request.
165 static const uint16_t util_crc16_table[] = {
166 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5,
167 0x60C6, 0x70E7, 0x8108, 0x9129, 0xA14A, 0xB16B,
168 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 0x1231, 0x0210,
169 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
170 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C,
171 0xF3FF, 0xE3DE, 0x2462, 0x3443, 0x0420, 0x1401,
172 0x64E6, 0x74C7, 0x44A4, 0x5485, 0xA56A, 0xB54B,
173 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
174 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6,
175 0x5695, 0x46B4, 0xB75B, 0xA77A, 0x9719, 0x8738,
176 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 0x48C4, 0x58E5,
177 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
178 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969,
179 0xA90A, 0xB92B, 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96,
180 0x1A71, 0x0A50, 0x3A33, 0x2A12, 0xDBFD, 0xCBDC,
181 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
182 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03,
183 0x0C60, 0x1C41, 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD,
184 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 0x7E97, 0x6EB6,
185 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
186 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A,
187 0x9F59, 0x8F78, 0x9188, 0x81A9, 0xB1CA, 0xA1EB,
188 0xD10C, 0xC12D, 0xF14E, 0xE16F, 0x1080, 0x00A1,
189 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
190 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C,
191 0xE37F, 0xF35E, 0x02B1, 0x1290, 0x22F3, 0x32D2,
192 0x4235, 0x5214, 0x6277, 0x7256, 0xB5EA, 0xA5CB,
193 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
194 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447,
195 0x5424, 0x4405, 0xA7DB, 0xB7FA, 0x8799, 0x97B8,
196 0xE75F, 0xF77E, 0xC71D, 0xD73C, 0x26D3, 0x36F2,
197 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
198 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9,
199 0xB98A, 0xA9AB, 0x5844, 0x4865, 0x7806, 0x6827,
200 0x18C0, 0x08E1, 0x3882, 0x28A3, 0xCB7D, 0xDB5C,
201 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
202 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0,
203 0x2AB3, 0x3A92, 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D,
204 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 0x7C26, 0x6C07,
205 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
206 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA,
207 0x8FD9, 0x9FF8, 0x6E17, 0x7E36, 0x4E55, 0x5E74,
208 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
211 /* Non - Reflected */
212 uint16_t util_crc16(uint16_t current, const char *k, size_t len) {
213 register uint16_t h = current;
214 for (; len; --len, ++k)
215 h = util_crc16_table[(h>>8)^((unsigned char)*k)]^(h<<8);
218 /* Reflective Varation (for reference) */
220 uint16_t util_crc16(const char *k, int len, const short clamp) {
221 register uint16_t h= (uint16_t)0xFFFFFFFF;
222 for (; len; --len, ++k)
223 h = util_crc16_table[(h^((unsigned char)*k))&0xFF]^(h>>8);
228 size_t util_strtocmd(const char *in, char *out, size_t outsz) {
230 for (; *in && sz < outsz; ++in, ++out, ++sz)
231 *out = (*in == '-') ? '_' : (util_isalpha(*in) && !util_isupper(*in)) ? *in + 'A' - 'a': *in;
236 size_t util_strtononcmd(const char *in, char *out, size_t outsz) {
238 for (; *in && sz < outsz; ++in, ++out, ++sz)
239 *out = (*in == '_') ? '-' : (util_isalpha(*in) && util_isupper(*in)) ? *in + 'a' - 'A' : *in;
245 * Portable implementation of vasprintf/asprintf. Assumes vsnprintf
246 * exists, otherwise compiler error.
248 * TODO: fix for MSVC ....
250 int util_vasprintf(char **dat, const char *fmt, va_list args) {
256 * For visuals tido _vsnprintf doesn't tell you the length of a
257 * formatted string if it overflows. However there is a MSVC
258 * intrinsic (which is documented wrong) called _vcsprintf which
259 * will return the required amount to allocate.
262 if ((len = _vscprintf(fmt, args)) < 0) {
267 tmp = (char*)mem_a(len + 1);
268 if ((ret = _vsnprintf_s(tmp, len+1, len+1, fmt, args)) != len) {
277 * For everything else we have a decent conformint vsnprintf that
278 * returns the number of bytes needed. We give it a try though on
279 * a short buffer, since efficently speaking, it could be nice to
280 * above a second vsnprintf call.
285 len = vsnprintf(buf, sizeof(buf), fmt, cpy);
288 if (len < (int)sizeof(buf)) {
289 *dat = util_strdup(buf);
293 /* not large enough ... */
294 tmp = (char*)mem_a(len + 1);
295 if ((ret = vsnprintf(tmp, len + 1, fmt, args)) != len) {
305 int util_asprintf(char **ret, const char *fmt, ...) {
309 read = util_vasprintf(ret, fmt, args);
316 * These are various re-implementations (wrapping the real ones) of
317 * string functions that MSVC consideres unsafe. We wrap these up and
318 * use the safe varations on MSVC.
321 static char **util_strerror_allocated() {
322 static char **data = NULL;
326 static void util_strerror_cleanup(void) {
328 char **data = util_strerror_allocated();
329 for (i = 0; i < vec_size(data); i++)
334 const char *util_strerror(int num) {
335 char *allocated = NULL;
336 static bool install = false;
337 static size_t tries = 0;
338 char **vector = util_strerror_allocated();
340 /* try installing cleanup handler */
345 install = !atexit(&util_strerror_cleanup);
349 allocated = (char*)mem_a(4096); /* A page must be enough */
350 strerror_s(allocated, 4096, num);
352 vec_push(vector, allocated);
353 return (const char *)allocated;
356 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
359 va_start(va, format);
361 rt = vsprintf_s(src, bytes, format, va);
367 char *util_strcat(char *dest, const char *src) {
368 strcat_s(dest, strlen(src), src);
372 char *util_strncpy(char *dest, const char *src, size_t num) {
373 strncpy_s(dest, num, src, num);
377 const char *util_strerror(int num) {
378 return strerror(num);
381 int util_snprintf(char *src, size_t bytes, const char *format, ...) {
384 va_start(va, format);
385 rt = vsnprintf(src, bytes, format, va);
391 char *util_strcat(char *dest, const char *src) {
392 return strcat(dest, src);
395 char *util_strncpy(char *dest, const char *src, size_t num) {
396 return strncpy(dest, src, num);
399 #endif /*! _MSC_VER */
402 * Implementation of the Mersenne twister PRNG (pseudo random numer
403 * generator). Implementation of MT19937. Has a period of 2^19937-1
404 * which is a Mersenne Prime (hence the name).
406 * Implemented from specification and original paper:
407 * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/ARTICLES/mt.pdf
409 * This code is placed in the public domain by me personally
410 * (Dale Weiler, a.k.a graphitemaster).
414 #define MT_PERIOD 397
415 #define MT_SPACE (MT_SIZE - MT_PERIOD)
417 static uint32_t mt_state[MT_SIZE];
418 static size_t mt_index = 0;
420 static GMQCC_INLINE void mt_generate(void) {
422 * The loop has been unrolled here: the original paper and implemenation
423 * Called for the following code:
424 * for (register unsigned i = 0; i < MT_SIZE; ++i) {
425 * register uint32_t load;
426 * load = (0x80000000 & mt_state[i]) // most significant 32nd bit
427 * load |= (0x7FFFFFFF & mt_state[(i + 1) % MT_SIZE]) // least significant 31nd bit
429 * mt_state[i] = mt_state[(i + MT_PERIOD) % MT_SIZE] ^ (load >> 1);
431 * if (load & 1) mt_state[i] ^= 0x9908B0DF;
434 * This essentially is a waste: we have two modulus operations, and
435 * a branch that is executed every iteration from [0, MT_SIZE).
437 * Please see: http://www.quadibloc.com/crypto/co4814.htm for more
438 * information on how this clever trick works.
440 static const uint32_t matrix[2] = {
445 * This register gives up a little more speed by instructing the compiler
446 * to force these into CPU registers (they're counters for indexing mt_state
447 * which we can force the compiler to generate prefetch instructions for)
453 * Said loop has been unrolled for MT_SPACE (226 iterations), opposed
454 * to [0, MT_SIZE) (634 iterations).
456 for (i = 0; i < MT_SPACE-1; ++i) {
457 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
458 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
460 i ++; /* loop unroll */
462 y = (0x80000000 & mt_state[i]) | (0x7FFFFFF & mt_state[i + 1]);
463 mt_state[i] = mt_state[i + MT_PERIOD] ^ (y >> 1) ^ matrix[y & 1];
467 * collapsing the walls unrolled (evenly dividing 396 [632-227 = 396
471 while (i < MT_SIZE-2) {
473 * We expand this 11 times .. manually, no macros are required
474 * here. This all fits in the CPU cache.
476 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
477 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
479 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
480 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
482 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
483 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
485 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
486 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
488 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
489 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
491 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
492 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
494 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
495 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
497 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
498 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
500 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
501 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
503 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
504 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
506 y = (0x80000000 & mt_state[i]) | (0x7FFFFFFF & mt_state[i + 1]);
507 mt_state[i] = mt_state[i - MT_SPACE] ^ (y >> 1) ^ matrix[y & 1];
511 /* i = mt_state[623] */
512 y = (0x80000000 & mt_state[MT_SIZE - 1]) | (0x7FFFFFFF & mt_state[MT_SIZE - 1]);
513 mt_state[MT_SIZE - 1] = mt_state[MT_PERIOD - 1] ^ (y >> 1) ^ matrix[y & 1];
516 void util_seed(uint32_t value) {
518 * We seed the mt_state with a LCG (linear congruential generator)
519 * We're operating exactly on exactly m=32, so there is no need to
522 * The multipler of choice is 0x6C07865, also knows as the Borosh-
523 * Niederreiter multipler used for modulus 2^32. More can be read
524 * about this in Knuth's TAOCP Volume 2, page 106.
526 * If you don't own TAOCP something is wrong with you :-) .. so I
527 * also provided a link to the original paper by Borosh and
528 * Niederreiter. It's called "Optional Multipliers for PRNG by The
529 * Linear Congruential Method" (1983).
530 * http://en.wikipedia.org/wiki/Linear_congruential_generator
532 * From said page, it says the following:
533 * "A common Mersenne twister implementation, interestingly enough
534 * used an LCG to generate seed data."
537 * The data we're operating on is 32-bits for the mt_state array, so
538 * there is no masking required with 0xFFFFFFFF
543 for (i = 1; i < MT_SIZE; ++i)
544 mt_state[i] = 0x6C078965 * (mt_state[i - 1] ^ mt_state[i - 1] >> 30) + i;
547 uint32_t util_rand() {
551 * This is inlined with any sane compiler (I checked)
552 * for some reason though, SubC seems to be generating invalid
553 * code when it inlines this.
558 y = mt_state[mt_index];
560 /* Standard tempering */
561 y ^= y >> 11; /* +7 */
562 y ^= y << 7 & 0x9D2C5680; /* +4 */
563 y ^= y << 15 & 0xEFC60000; /* -4 */
564 y ^= y >> 18; /* -7 */
566 if(++mt_index == MT_SIZE)