Fix murmur hash seeding

[xonotic/gmqcc.git] / util.c
diff --git a/util.c b/util.c

index 02e4e6c1986a89852c7ec6a19f7458a133a94d4e..d1eb5b43754052448cb655dcfa57ae82ba72aa6c 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1,6 +1,7 @@
  /*
   * Copyright (C) 2012
   *     Dale Weiler
+ *     Wolfgang Bumiller
   *
   * Permission is hereby granted, free of charge, to any person obtaining a copy of
   * this software and associated documentation files (the "Software"), to deal in
@@ -24,46 +25,126 @@
  #include <errno.h>
  #include "gmqcc.h"
  
-unsigned long long mem_ab = 0;
-unsigned long long mem_db = 0;
-unsigned long long mem_at = 0;
-unsigned long long mem_dt = 0;
+uint64_t mem_ab = 0;
+uint64_t mem_db = 0;
+uint64_t mem_at = 0;
+uint64_t mem_dt = 0;
  
  struct memblock_t {
      const char  *file;
      unsigned int line;
-    unsigned int byte;
+    size_t       byte;
+    struct memblock_t *next;
+    struct memblock_t *prev;
  };
  
-void *util_memory_a(unsigned int byte, unsigned int line, const char *file) {
+static struct memblock_t *mem_start = NULL;
+
+void *util_memory_a(size_t byte, unsigned int line, const char *file) {
      struct memblock_t *info = malloc(sizeof(struct memblock_t) + byte);
-    void              *data =(void*)((uintptr_t)info+sizeof(struct memblock_t));
-    if (!data) return NULL;
+    void              *data = (void*)(info+1);
+    if (!info) return NULL;
      info->line = line;
      info->byte = byte;
      info->file = file;
+    info->prev = NULL;
+    info->next = mem_start;
+    if (mem_start)
+        mem_start->prev = info;
+    mem_start = info;
  
-    util_debug("MEM", "allocation: % 8u (bytes) address 0x%08X @ %s:%u\n", byte, data, file, line);
+    util_debug("MEM", "allocation:   % 8u (bytes) address 0x%08X @ %s:%u\n", byte, data, file, line);
      mem_at++;
      mem_ab += info->byte;
+
      return data;
  }
  
  void util_memory_d(void *ptrn, unsigned int line, const char *file) {
+    struct memblock_t *info = NULL;
+
      if (!ptrn) return;
-    void              *data = (void*)((uintptr_t)ptrn-sizeof(struct memblock_t));
-    struct memblock_t *info = (struct memblock_t*)data;
+    info = ((struct memblock_t*)ptrn - 1);
  
-    util_debug("MEM", "released:   % 8u (bytes) address 0x%08X @ %s:%u\n", info->byte, data, file, line);
+    util_debug("MEM", "released:     % 8u (bytes) address 0x%08X @ %s:%u\n", info->byte, ptrn, file, line);
      mem_db += info->byte;
      mem_dt++;
-    free(data);
+
+    if (info->prev)
+        info->prev->next = info->next;
+    if (info->next)
+        info->next->prev = info->prev;
+    if (info == mem_start)
+        mem_start = info->next;
+
+    free(info);
+}
+
+void *util_memory_r(void *ptrn, size_t byte, unsigned int line, const char *file) {
+    struct memblock_t *oldinfo = NULL;
+
+    struct memblock_t *newinfo;
+
+    if (!ptrn)
+        return util_memory_a(byte, line, file);
+    if (!byte) {
+        util_memory_d(ptrn, line, file);
+        return NULL;
+    }
+
+    oldinfo = ((struct memblock_t*)ptrn - 1);
+    newinfo = ((struct memblock_t*)malloc(sizeof(struct memblock_t) + byte));
+
+    util_debug("MEM", "reallocation: % 8u -> %u (bytes) address 0x%08X -> 0x%08X @ %s:%u\n", oldinfo->byte, byte, ptrn, (void*)(newinfo+1), file, line);
+
+    /* new data */
+    if (!newinfo) {
+        util_memory_d(oldinfo+1, line, file);
+        return NULL;
+    }
+
+    /* copy old */
+    memcpy(newinfo+1, oldinfo+1, oldinfo->byte);
+
+    /* free old */
+    if (oldinfo->prev)
+        oldinfo->prev->next = oldinfo->next;
+    if (oldinfo->next)
+        oldinfo->next->prev = oldinfo->prev;
+    if (oldinfo == mem_start)
+        mem_start = oldinfo->next;
+
+    /* fill info */
+    newinfo->line = line;
+    newinfo->byte = byte;
+    newinfo->file = file;
+    newinfo->prev = NULL;
+    newinfo->next = mem_start;
+    if (mem_start)
+        mem_start->prev = newinfo;
+    mem_start = newinfo;
+
+    mem_ab -= oldinfo->byte;
+    mem_ab += newinfo->byte;
+
+    free(oldinfo);
+
+    return newinfo+1;
  }
  
  void util_meminfo() {
-    if (!opts_memchk)
+    struct memblock_t *info;
+
+    if (!opts.memchk)
          return;
  
+    for (info = mem_start; info; info = info->next) {
+        util_debug("MEM", "lost:       % 8u (bytes) at %s:%u\n",
+            info->byte,
+            info->file,
+            info->line);
+    }
+
      util_debug("MEM", "Memory information:\n\
          Total allocations:   %llu\n\
          Total deallocations: %llu\n\
@@ -100,9 +181,9 @@ char *util_strdup(const char *s) {
   * as well.  This function shouldn't be used to create a
   * char array that is later freed (it uses pointer arith)
   */
-char *util_strrq(char *s) {
-    char *dst = s;
-    char *src = s;
+char *util_strrq(const char *s) {
+    char *dst = (char*)s;
+    char *src = (char*)s;
      char  chr;
      while ((chr = *src++) != '\0') {
          if (chr == '\\') {
@@ -118,31 +199,62 @@ char *util_strrq(char *s) {
  }
  
  /*
- * Remove newline from a string (if it exists).  This is
- * done pointer wise instead of strlen(), and an array
- * access.
+ * Chops a substring from an existing string by creating a
+ * copy of it and null terminating it at the required position.
   */
-char *util_strrnl(char *src) {
-    if (!src) return NULL;
-    char   *cpy = src;
-    while (*cpy && *cpy != '\n')
-        cpy++;
-
-    *cpy = '\0';
-    return src;
+char *util_strchp(const char *s, const char *e) {
+    const char *c = NULL;
+    if (!s || !e)
+        return NULL;
+
+    c = s;
+    while (c != e)
+        c++;
+
+    return util_strdup(s);
+}
+
+/*
+ * Returns true if string is all uppercase, otherwise
+ * it returns false.
+ */
+bool util_strupper(const char *str) {
+    while (*str) {
+        if(!isupper(*str))
+            return false;
+        str++;
+    }
+    return true;
+}
+
+/*
+ * Returns true if string is all digits, otherwise
+ * it returns false.
+ */
+bool util_strdigit(const char *str) {
+    while (*str) {
+        if(!isdigit(*str))
+            return false;
+        str++;
+    }
+    return true;
+}
+
+bool util_strncmpexact(const char *src, const char *ned, size_t len) {
+    return (!strncmp(src, ned, len) && !src[len]);
  }
  
  void util_debug(const char *area, const char *ms, ...) {
-    if (!opts_debug)
+    va_list  va;
+    if (!opts.debug)
+        return;
+
+    if (!strcmp(area, "MEM") && !opts.memchk)
          return;
  
-    va_list  va;
      va_start(va, ms);
-    fprintf (stdout, "DEBUG: ");
-    fputc   ('[',  stdout);
-    fprintf(stdout, "%s", area);
-    fputs   ("] ", stdout);
-    vfprintf(stdout, ms, va);
+    con_out ("[%s] ", area);
+    con_vout(ms, va);
      va_end  (va);
  }
  
@@ -159,8 +271,8 @@ void util_endianswap(void *m, int s, int l) {
      if(*((char *)&s))
          return;
  
-    for(; w < l; w++) {
-        for(;  i < s << 1; i++) {
+    for(; w < (size_t)l; w++) {
+        for(;  i < (size_t)(s << 1); i++) {
              unsigned char *p = (unsigned char *)m+w*s;
              unsigned char  t = p[i];
              p[i]             = p[s-i-1];
@@ -169,6 +281,28 @@ void util_endianswap(void *m, int s, int l) {
      }
  }
  
+/*
+ * CRC algorithms vary in the width of the polynomial, the value of said polynomial,
+ * the initial value used for the register, weather the bits of each byte are reflected
+ * before being processed, weather the algorithm itself feeds input bytes through the
+ * register or XORs them with a byte from one end and then straight into the table, as
+ * well as (but not limited to the idea of reflected versions) where the final register
+ * value becomes reversed, and finally weather the value itself is used to XOR the final
+ * register value.  AS such you can already imagine how painfully annoying CRCs are,
+ * of course we stand to target Quake, which expects it's certian set of rules for proper
+ * calculation of a CRC.
+ *
+ * In most traditional CRC algorithms on uses a reflected table driven method where a value
+ * or register is reflected if it's bits are swapped around it's center.  For example:
+ * take the bits 0101 is the 4-bit reflection of 1010, and respectfully 0011 would be the
+ * reflection of 1100. Quakle however expects a NON-Reflected CRC on the output, but still
+ * requires a final XOR on the values (0xFFFF and 0x0000) this is a standard CCITT CRC-16
+ * which I respectfully as a programmer don't agree with.
+ *
+ * So now you know what we target, and why we target it, despite how unsettling it may seem
+ * but those are what Quake seems to request.
+ */
+
  /*
   * This is an implementation of CRC32 & CRC16. The polynomials have been
   * offline computed for faster generation at the cost of larger code size.
@@ -266,21 +400,33 @@ static const uint16_t util_crc16_table[] = {
      0x8FD9,     0x9FF8,     0x6E17,     0x7E36,     0x4E55,     0x5E74,
      0x2E93,     0x3EB2,     0x0ED1,     0x1EF0
  };
-    
+
  /*
   * Implements a CRC function for X worth bits using (uint[X]_t)
   * as type. and util_crc[X]_table.
+
+ * Quake expects a non-reflective CRC.
   */
  #define CRC(X) \
+uint##X##_t util_crc##X(uint##X##_t current, const char *k, size_t len) {  \
+    register uint##X##_t h= current;                                  \
+    for (; len; --len, ++k)                                           \
+        h = util_crc##X##_table[(h>>8)^((unsigned char)*k)]^(h<<8);   \
+    return h;                                                         \
+}
+CRC(32)
+CRC(16)
+#undef CRC
+/*
+#define CRC(X) \
  uint##X##_t util_crc##X(const char *k, int len, const short clamp) {  \
      register uint##X##_t h= (uint##X##_t)0xFFFFFFFF;                  \
      for (; len; --len, ++k)                                           \
          h = util_crc##X##_table[(h^((unsigned char)*k))&0xFF]^(h>>8); \
      return (~h)%clamp;                                                \
  }
-CRC(32)
-CRC(16)
-#undef CRC
+*/
+
  
  /*
   * Implements libc getline for systems that don't have it, which is
@@ -294,7 +440,7 @@ int util_getline(char **lineptr, size_t *n, FILE *stream) {
      if (!lineptr || !n || !stream)
          return -1;
      if (!*lineptr) {
-        if (!(*lineptr = mem_a((*n=64))))
+        if (!(*lineptr = (char*)mem_a((*n=64))))
              return -1;
      }
  
@@ -305,16 +451,10 @@ int util_getline(char **lineptr, size_t *n, FILE *stream) {
          int c = getc(stream);
  
          if (chr < 2) {
-            char *tmp = mem_a((*n+=(*n>16)?*n:64));
-            if  (!tmp)
-                return -1;
-
+            *n += (*n > 16) ? *n : 64;
              chr = *n + *lineptr - pos;
-            strcpy(tmp,*lineptr);
-            if (!(*lineptr = tmp)) {
-                mem_d (tmp);
+            if (!(*lineptr = (char*)mem_r(*lineptr,*n)))
                  return -1;
-            }
              pos = *n - chr + *lineptr;
          }
  
@@ -335,3 +475,266 @@ int util_getline(char **lineptr, size_t *n, FILE *stream) {
      *pos = '\0';
      return (ret = pos - *lineptr);
  }
+
+size_t util_strtocmd(const char *in, char *out, size_t outsz) {
+    size_t sz = 1;
+    for (; *in && sz < outsz; ++in, ++out, ++sz)
+        *out = (*in == '-') ? '_' : (isalpha(*in) && !isupper(*in)) ? *in + 'A' - 'a': *in;
+    *out = 0;
+    return sz-1;
+}
+
+size_t util_strtononcmd(const char *in, char *out, size_t outsz) {
+    size_t sz = 1;
+    for (; *in && sz < outsz; ++in, ++out, ++sz)
+        *out = (*in == '_') ? '-' : (isalpha(*in) && isupper(*in)) ? *in + 'a' - 'A' : *in;
+    *out = 0;
+    return sz-1;
+}
+
+
+FILE *util_fopen(const char *filename, const char *mode)
+{
+#ifdef _MSC_VER
+    FILE *out;
+    if (fopen_s(&out, filename, mode) != 0)
+        return NULL;
+    return out;
+#else
+    return fopen(filename, mode);
+#endif
+}
+
+void _util_vec_grow(void **a, size_t i, size_t s) {
+    size_t m = *a ? 2*_vec_beg(*a)+i : i+1;
+    void  *p = mem_r((*a ? _vec_raw(*a) : NULL), s * m + sizeof(size_t)*2);
+    if (!*a)
+        ((size_t*)p)[1] = 0;
+    *a = (void*)((size_t*)p + 2);
+    _vec_beg(*a) = m;
+}
+
+/*
+ * Hash table for generic data, based on dynamic memory allocations
+ * all around.  This is the internal interface, please look for
+ * EXPOSED INTERFACE comment below
+ */
+typedef struct hash_node_t {
+    char               *key;   /* the key for this node in table */
+    void               *value; /* pointer to the data as void*   */
+    struct hash_node_t *next;  /* next node (linked list)        */
+} hash_node_t;
+
+/*
+ * x86 and x86_64 optimized murmur hash functions for the hashtable
+ * we have individual implementations for optimal performance.
+ *
+ * Forced inlined as we wrap these up in the actual utility function
+ * below.  These should be autovectorized by gcc.
+ */
+#ifdef __x86_64__
+static GMQCC_INLINE uint32_t util_hthashfunc(hash_table_t *ht, const char *key, size_t seed) {
+    const uint64_t       mix   = 0xC6A4A7935BD1E995UL;
+    const int            rot   = 47;
+    size_t               size  = strlen(key);
+    uint64_t             hash  = seed ^ (size - mix);
+    uint64_t             alias = 0;
+    const uint64_t      *beg   = (const uint64_t*)key;
+    const uint64_t      *end   = beg + (size / 8);
+    const unsigned char *final = NULL;
+
+    while (beg != end) {
+        alias = *beg++;
+
+        alias *= mix;
+        alias ^= alias >> rot;
+        alias *= mix;
+
+        hash  ^= alias;
+        hash  *= mix;
+    }
+
+    final = (const unsigned char *)beg;
+
+    switch (size & 7) {
+        case 7: hash ^= (uint64_t)(final[6]) << 48;
+        case 6: hash ^= (uint64_t)(final[5]) << 40;
+        case 5: hash ^= (uint64_t)(final[4]) << 32;
+        case 4: hash ^= (uint64_t)(final[3]) << 24;
+        case 3: hash ^= (uint64_t)(final[2]) << 16;
+        case 2: hash ^= (uint64_t)(final[1]) << 8;
+        case 1: hash ^= (uint64_t)(final[0]);
+                hash *= mix;
+    }
+
+    hash ^= hash >> rot;
+    hash *= mix;
+    hash ^= hash >> rot;
+
+    return (uint32_t)(hash % ht->size);
+}
+
+#else
+static GMQCC_INLINE uint32_t util_hthashfunc(hash_table_t *ht, const char *key, size_t seed) {
+    const uint32_t       mix   = 0x5BD1E995;
+    const uint32_t       rot   = 24;
+    size_t               size  = strlen(key);
+    uint32_t             hash  = seed ^ size;
+    uint32_t             alias = 0;
+    const unsigned char *data  = (const unsigned char*)key;
+
+    while (size >= 4) {
+        alias = *(uint32_t*)data;
+
+        alias *= mix;
+        alias ^= alias >> rot;
+        alias *= mix;
+
+        hash  *= mix;
+        hash  ^= alias;
+
+        data += 4;
+        size -= 4;
+    }
+
+    switch (size) {
+        case 3: hash ^= data[2] << 16;
+        case 2: hash ^= data[1] << 8;
+        case 1: hash ^= data[0];
+                hash *= mix;
+    }
+
+    hash ^= hash >> 13;
+    hash *= mix;
+    hash ^= hash >> 15;
+
+    return hash % ht->size;
+}
+#endif
+
+/* we use the crc table as seeds for the murmur hash :P */
+size_t util_hthash(hash_table_t *ht, const char *key) {
+    static   size_t seed = 0;
+    register size_t hash = util_hthashfunc(ht, key, util_crc32_table[seed++]);
+
+    /* reset seed */
+    if (seed >= sizeof(util_crc32_table) / sizeof(*util_crc32_table))
+        seed  = 0;
+
+    return hash;
+}
+
+hash_node_t *_util_htnewpair(const char *key, void *value) {
+    hash_node_t *node;
+    if (!(node = mem_a(sizeof(hash_node_t))))
+        return NULL;
+
+    if (!(node->key = util_strdup(key))) {
+        mem_d(node);
+        return NULL;
+    }
+
+    node->value = value;
+    node->next  = NULL;
+
+    return node;
+}
+
+/*
+ * EXPOSED INTERFACE for the hashtable implementation
+ * util_htnew(size)                             -- to make a new hashtable
+ * util_htset(table, key, value, sizeof(value)) -- to set something in the table
+ * util_htget(table, key)                       -- to get something from the table
+ * util_htdel(table)                            -- to delete the table
+ */
+hash_table_t *util_htnew(size_t size) {
+    hash_table_t *hashtable = NULL;
+    if (size < 1)
+        return NULL;
+
+    if (!(hashtable = mem_a(sizeof(hash_table_t))))
+        return NULL;
+
+    if (!(hashtable->table = mem_a(sizeof(hash_node_t*) * size))) {
+        mem_d(hashtable);
+        return NULL;
+    }
+
+    hashtable->size = size;
+    memset(hashtable->table, 0, sizeof(hash_node_t*) * size);
+
+    return hashtable;
+}
+
+void util_htseth(hash_table_t *ht, const char *key, size_t bin, void *value) {
+    hash_node_t *newnode = NULL;
+    hash_node_t *next    = NULL;
+    hash_node_t *last    = NULL;
+
+    next = ht->table[bin];
+
+    while (next && next->key && strcmp(key, next->key) > 0)
+        last = next, next = next->next;
+
+    /* already in table, do a replace */
+    if (next && next->key && strcmp(key, next->key) == 0) {
+        next->value = value;
+    } else {
+        /* not found, grow a pair man :P */
+        newnode = _util_htnewpair(key, value);
+        if (next == ht->table[bin]) {
+            newnode->next  = next;
+            ht->table[bin] = newnode;
+        } else if (!next) {
+            last->next = newnode;
+        } else {
+            newnode->next = next;
+            last->next = newnode;
+        }
+    }
+}
+
+void util_htset(hash_table_t *ht, const char *key, void *value) {
+    util_htseth(ht, key, util_hthash(ht, key), value);
+}
+
+void *util_htgeth(hash_table_t *ht, const char *key, size_t bin) {
+    hash_node_t *pair = ht->table[bin];
+
+    while (pair && pair->key && strcmp(key, pair->key) > 0)
+        pair = pair->next;
+
+    if (!pair || !pair->key || strcmp(key, pair->key) != 0)
+        return NULL;
+
+    return pair->value;
+}
+
+void *util_htget(hash_table_t *ht, const char *key) {
+    return util_htgeth(ht, key, util_hthash(ht, key));
+}
+
+/*
+ * Free all allocated data in a hashtable, this is quite the amount
+ * of work.
+ */
+void util_htdel(hash_table_t *ht) {
+    size_t i = 0;
+    for (; i < ht->size; i++) {
+        hash_node_t *n = ht->table[i];
+        hash_node_t *p;
+
+        /* free in list */
+        while (n) {
+            if (n->key)
+                mem_d(n->key);
+            p = n;
+            n = n->next;
+            mem_d(p);
+        }
+
+    }
+    /* free table */
+    mem_d(ht->table);
+    mem_d(ht);
+}