+/*
+ * Copyright (C) 2012, 2013
+ * Dale Weiler
+ * Wolfgang Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
#include "gmqcc.h"
+/*
+ * For the valgrind integration of our allocator. This allows us to have
+ * more `accurate` valgrind output for our allocator, and also secures the
+ * possible underflows (where one could obtain access to the redzone that
+ * represents info about that allocation).
+ */
+#ifndef NVALGRIND
+# include <valgrind/valgrind.h>
+# include <valgrind/memcheck.h>
+#else
+# define VALGRIND_MALLOCLIKE_BLOCK(PTR, ALLOC_SIZE, REDZONE_SIZE, ZEROED)
+# define VALGRIND_FREELIKE_BLOCK(PTR, REDZONE_SIZE)
+# define VALGRIND_MAKE_MEM_DEFINED(PTR, REDZONE_SIZE)
+# define VALGRIND_MAKE_MEM_NOACCESS(PTR, REDZONE_SIZE)
+#endif
+
/*
* GMQCC performs tons of allocations, constructions, and crazyness
* all around. When trying to optimizes systems, or just get fancy
const char *file;
size_t line;
size_t size;
+ const char *expr;
struct stat_mem_block_s *next;
struct stat_mem_block_s *prev;
} stat_mem_block_t;
static uint64_t stat_mem_deallocated_total = 0;
static uint64_t stat_mem_high = 0;
static uint64_t stat_mem_peak = 0;
+static uint64_t stat_mem_strdups = 0;
static uint64_t stat_used_strdups = 0;
static uint64_t stat_used_vectors = 0;
static uint64_t stat_used_hashtables = 0;
static stat_size_table_t stat_size_hashtables = NULL;
static stat_mem_block_t *stat_mem_block_root = NULL;
-
/*
* A tiny size_t key-value hashtbale for tracking vector and hashtable
* sizes. We can use it for other things too, if we need to. This is
* very TIGHT, and efficent in terms of space though.
*/
-static stat_size_table_t stat_size_new() {
+static stat_size_table_t stat_size_new(void) {
return (stat_size_table_t)memset(
mem_a(sizeof(stat_size_entry_t*) * ST_SIZE),
0, ST_SIZE * sizeof(stat_size_entry_t*)
size_t hash = (key % ST_SIZE);
while (table[hash] && table[hash]->key != key)
hash = (hash + 1) % ST_SIZE;
- table[hash] = (stat_size_entry_t*)mem_a(sizeof(stat_size_entry_t));
- table[hash]->key = key;
+ table[hash] = (stat_size_entry_t*)mem_a(sizeof(stat_size_entry_t));
+ table[hash]->key = key;
table[hash]->value = value;
}
* information as a header, returns the memory + 1 past it, can be
* retrieved again with - 1. Where type is stat_mem_block_t*.
*/
-void *stat_mem_allocate(size_t size, size_t line, const char *file) {
+void *stat_mem_allocate(size_t size, size_t line, const char *file, const char *expr) {
stat_mem_block_t *info = (stat_mem_block_t*)malloc(sizeof(stat_mem_block_t) + size);
void *data = (void*)(info + 1);
-
- if(!info)
+
+ if(GMQCC_UNLIKELY(!info))
return NULL;
-
+
info->line = line;
info->size = size;
info->file = file;
+ info->expr = expr;
info->prev = NULL;
info->next = stat_mem_block_root;
-
- if (stat_mem_block_root)
+
+ /* likely since it only happens once */
+ if (GMQCC_LIKELY(stat_mem_block_root != NULL)) {
+ VALGRIND_MAKE_MEM_DEFINED(stat_mem_block_root, sizeof(stat_mem_block_t));
stat_mem_block_root->prev = info;
-
+ VALGRIND_MAKE_MEM_NOACCESS(stat_mem_block_root, sizeof(stat_mem_block_t));
+ }
+
stat_mem_block_root = info;
stat_mem_allocated += size;
stat_mem_high += size;
stat_mem_allocated_total ++;
-
+
if (stat_mem_high > stat_mem_peak)
stat_mem_peak = stat_mem_high;
+ VALGRIND_MALLOCLIKE_BLOCK(data, size, sizeof(stat_mem_block_t), 0);
return data;
}
void stat_mem_deallocate(void *ptr) {
stat_mem_block_t *info = NULL;
-
- if (!ptr)
+
+ if (GMQCC_UNLIKELY(!ptr))
return;
-
+
info = ((stat_mem_block_t*)ptr - 1);
-
+
+ /*
+ * we need access to the redzone that represents the info block
+ * so lets do that.
+ */
+ VALGRIND_MAKE_MEM_DEFINED(info, sizeof(stat_mem_block_t));
+
stat_mem_deallocated += info->size;
stat_mem_high -= info->size;
stat_mem_deallocated_total ++;
-
- if (info->prev) info->prev->next = info->next;
- if (info->next) info->next->prev = info->prev;
-
+
+ if (info->prev) {
+ /* just need access for a short period */
+ VALGRIND_MAKE_MEM_DEFINED(info->prev, sizeof(stat_mem_block_t));
+ info->prev->next = info->next;
+ /* don't need access anymore */
+ VALGRIND_MAKE_MEM_NOACCESS(info->prev, sizeof(stat_mem_block_t));
+ }
+ if (info->next) {
+ /* just need access for a short period */
+ VALGRIND_MAKE_MEM_DEFINED(info->next, sizeof(stat_mem_block_t));
+ info->next->prev = info->prev;
+ /* don't need access anymore */
+ VALGRIND_MAKE_MEM_NOACCESS(info->next, sizeof(stat_mem_block_t));
+ }
+
/* move ahead */
if (info == stat_mem_block_root)
stat_mem_block_root = info->next;
-
+
free(info);
+ VALGRIND_MAKE_MEM_NOACCESS(info, sizeof(stat_mem_block_t));
+ VALGRIND_FREELIKE_BLOCK(ptr, sizeof(stat_mem_block_t));
}
-void *stat_mem_reallocate(void *ptr, size_t size, size_t line, const char *file) {
+void *stat_mem_reallocate(void *ptr, size_t size, size_t line, const char *file, const char *expr) {
stat_mem_block_t *oldinfo = NULL;
stat_mem_block_t *newinfo;
-
- if (!ptr)
- return stat_mem_allocate(size, line, file);
-
- /* stay consistent with glic */
- if (!size) {
+
+ if (GMQCC_UNLIKELY(!ptr))
+ return stat_mem_allocate(size, line, file, expr);
+
+ /* stay consistent with glibc */
+ if (GMQCC_UNLIKELY(!size)) {
stat_mem_deallocate(ptr);
return NULL;
}
-
+
oldinfo = ((stat_mem_block_t*)ptr - 1);
newinfo = ((stat_mem_block_t*)malloc(sizeof(stat_mem_block_t) + size));
-
- if (!newinfo) {
+
+ if (GMQCC_UNLIKELY(!newinfo)) {
stat_mem_deallocate(ptr);
return NULL;
}
-
+
+ VALGRIND_MALLOCLIKE_BLOCK(newinfo + 1, size, sizeof(stat_mem_block_t), 0);
+
+ /* we need access to the old info redzone */
+ VALGRIND_MAKE_MEM_DEFINED(oldinfo, sizeof(stat_mem_block_t));
+
memcpy(newinfo+1, oldinfo+1, oldinfo->size);
-
- if (oldinfo->prev) oldinfo->prev->next = oldinfo->next;
- if (oldinfo->next) oldinfo->next->prev = oldinfo->prev;
-
+
+ if (oldinfo->prev) {
+ /* just need access for a short period */
+ VALGRIND_MAKE_MEM_DEFINED(oldinfo->prev, sizeof(stat_mem_block_t));
+ oldinfo->prev->next = oldinfo->next;
+ /* don't need access anymore */
+ VALGRIND_MAKE_MEM_NOACCESS(oldinfo->prev, sizeof(stat_mem_block_t));
+ }
+
+ if (oldinfo->next) {
+ /* just need access for a short period */
+ VALGRIND_MAKE_MEM_DEFINED(oldinfo->next, sizeof(stat_mem_block_t));
+ oldinfo->next->prev = oldinfo->prev;
+ /* don't need access anymore */
+ VALGRIND_MAKE_MEM_NOACCESS(oldinfo->next, sizeof(stat_mem_block_t));
+ }
+
/* move ahead */
if (oldinfo == stat_mem_block_root)
stat_mem_block_root = oldinfo->next;
-
+
+ /* we need access to the redzone for the newinfo block */
+ VALGRIND_MAKE_MEM_DEFINED(newinfo, sizeof(stat_mem_block_t));
+
newinfo->line = line;
newinfo->size = size;
newinfo->file = file;
+ newinfo->expr = expr;
newinfo->prev = NULL;
newinfo->next = stat_mem_block_root;
-
- if (stat_mem_block_root)
+
+ /*
+ * likely since the only time there is no root is when it's
+ * being initialized first.
+ */
+ if (GMQCC_LIKELY(stat_mem_block_root != NULL)) {
+ /* we need access to the root */
+ VALGRIND_MAKE_MEM_DEFINED(stat_mem_block_root, sizeof(stat_mem_block_t));
stat_mem_block_root->prev = newinfo;
-
+ /* kill access */
+ VALGRIND_MAKE_MEM_NOACCESS(stat_mem_block_root, sizeof(stat_mem_block_t));
+ }
+
stat_mem_block_root = newinfo;
stat_mem_allocated -= oldinfo->size;
stat_mem_high -= oldinfo->size;
stat_mem_allocated += newinfo->size;
stat_mem_high += newinfo->size;
-
+
+ /*
+ * we're finished with the redzones, lets kill the access
+ * to them.
+ */
+ VALGRIND_MAKE_MEM_NOACCESS(newinfo, sizeof(stat_mem_block_t));
+ VALGRIND_MAKE_MEM_NOACCESS(oldinfo, sizeof(stat_mem_block_t));
+
if (stat_mem_high > stat_mem_peak)
stat_mem_peak = stat_mem_high;
-
+
free(oldinfo);
-
+ VALGRIND_FREELIKE_BLOCK(ptr, sizeof(stat_mem_block_t));
return newinfo + 1;
}
char *stat_mem_strdup(const char *src, size_t line, const char *file, bool empty) {
size_t len = 0;
char *ptr = NULL;
-
+
if (!src)
return NULL;
-
+
len = strlen(src);
- if (((!empty) ? len : true) && (ptr = (char*)stat_mem_allocate(len + 1, line, file))) {
+ if (((!empty) ? len : true) && (ptr = (char*)stat_mem_allocate(len + 1, line, file, "strdup"))) {
memcpy(ptr, src, len);
ptr[len] = '\0';
}
-
+
stat_used_strdups ++;
+ stat_mem_strdups += len;
return ptr;
}
size_t m = 0;
stat_size_entry_t *e = NULL;
void *p = NULL;
-
+
if (*a) {
m = 2 * d->allocated + i;
p = mem_r(d, s * m + sizeof(vector_t));
((vector_t*)p)->used = 0;
stat_used_vectors++;
}
-
+
if (!stat_size_vectors)
stat_size_vectors = stat_size_new();
struct hash_node_t *next; /* next node (linked list) */
} hash_node_t;
+/*
+ * This is a patched version of the Murmur2 hashing function to use
+ * a proper pre-mix and post-mix setup. Infact this is Murmur3 for
+ * the most part just reinvented.
+ *
+ * Murmur 2 contains an inner loop such as:
+ * while (l >= 4) {
+ * u32 k = *(u32*)d;
+ * k *= m;
+ * k ^= k >> r;
+ * k *= m;
+ *
+ * h *= m;
+ * h ^= k;
+ * d += 4;
+ * l -= 4;
+ * }
+ *
+ * The two u32s that form the key are the same value x (pulled from data)
+ * this premix stage will perform the same results for both values. Unrolled
+ * this produces just:
+ * x *= m;
+ * x ^= x >> r;
+ * x *= m;
+ *
+ * h *= m;
+ * h ^= x;
+ * h *= m;
+ * h ^= x;
+ *
+ * This appears to be fine, except what happens when m == 1? well x
+ * cancels out entierly, leaving just:
+ * x ^= x >> r;
+ * h ^= x;
+ * h ^= x;
+ *
+ * So all keys hash to the same value, but how often does m == 1?
+ * well, it turns out testing x for all possible values yeilds only
+ * 172,013,942 unique results instead of 2^32. So nearly ~4.6 bits
+ * are cancelled out on average!
+ *
+ * This means we have a 14.5% (rounded) chance of colliding more, which
+ * results in another bucket/chain for the hashtable.
+ *
+ * We fix it buy upgrading the pre and post mix ssystems to align with murmur
+ * hash 3.
+ */
+#if 1
+#define GMQCC_ROTL32(X, R) (((X) << (R)) | ((X) >> (32 - (R))))
+GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) {
+ const unsigned char *data = (const unsigned char *)key;
+ const size_t len = strlen(key);
+ const size_t block = len / 4;
+ const uint32_t mask1 = 0xCC9E2D51;
+ const uint32_t mask2 = 0x1B873593;
+ const uint32_t *blocks = (const uint32_t*)(data + block * 4);
+ const unsigned char *tail = (const unsigned char *)(data + block * 4);
+
+ size_t i;
+ uint32_t k;
+ uint32_t h = 0x1EF0 ^ len;
+
+ for (i = -((int)block); i; i++) {
+ k = blocks[i];
+ k *= mask1;
+ k = GMQCC_ROTL32(k, 15);
+ k *= mask2;
+ h ^= k;
+ h = GMQCC_ROTL32(h, 13);
+ h = h * 5 + 0xE6546B64;
+ }
+
+ k = 0;
+ switch (len & 3) {
+ case 3:
+ k ^= tail[2] << 16;
+ case 2:
+ k ^= tail[1] << 8;
+ case 1:
+ k ^= tail[0];
+ k *= mask1;
+ k = GMQCC_ROTL32(k, 15);
+ k *= mask2;
+ h ^= k;
+ }
+
+ h ^= len;
+ h ^= h >> 16;
+ h *= 0x85EBCA6B;
+ h ^= h >> 13;
+ h *= 0xC2B2AE35;
+ h ^= h >> 16;
+
+ return (size_t) (h % ht->size);
+}
+#undef GMQCC_ROTL32
+#else
+/* We keep the old for reference */
GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) {
const uint32_t mix = 0x5BD1E995;
const uint32_t rot = 24;
return (size_t) (hash % ht->size);
}
+#endif
static hash_node_t *_util_htnewpair(const char *key, void *value) {
hash_node_t *node;
hash_table_t *util_htnew(size_t size) {
hash_table_t *hashtable = NULL;
stat_size_entry_t *find = NULL;
-
+
if (size < 1)
return NULL;
-
+
if (!stat_size_hashtables)
stat_size_hashtables = stat_size_new();
mem_d(hashtable);
return NULL;
}
-
+
if ((find = stat_size_get(stat_size_hashtables, size)))
find->value++;
else {
- stat_used_hashtables++;
+ stat_type_hashtables++;
stat_size_put(stat_size_hashtables, size, 1);
}
hashtable->size = size;
memset(hashtable->table, 0, sizeof(hash_node_t*) * size);
- stat_type_hashtables++;
+ stat_used_hashtables++;
return hashtable;
}
return util_htgeth(ht, key, util_hthash(ht, key));
}
+void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin);
void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) {
hash_node_t *pair;
size_t len, keylen;
*/
void util_htrem(hash_table_t *ht, void (*callback)(void *data)) {
size_t i = 0;
- for (; i < ht->size; i++) {
+
+ for (; i < ht->size; ++i) {
hash_node_t *n = ht->table[i];
hash_node_t *p;
if (callback)
callback(n->value);
p = n;
- n = n->next;
+ n = p->next;
mem_d(p);
}
* The following functions below implement printing / dumping of statistical
* information.
*/
-static void stat_dump_mem_contents(stat_mem_block_t *memory, uint16_t cols) {
- uint32_t i, j;
- for (i = 0; i < memory->size + ((memory->size % cols) ? (cols - memory->size % cols) : 0); i++) {
- if (i % cols == 0) con_out(" 0x%06X: ", i);
- if (i < memory->size) con_out("%02X " , 0xFF & ((unsigned char*)(memory + 1))[i]);
- else con_out(" ");
-
- if ((uint16_t)(i % cols) == (cols - 1)) {
- for (j = i - (cols - 1); j <= i; j++) {
- con_out("%c",
- (j >= memory->size)
- ? ' '
- : (isprint(((unsigned char*)(memory + 1))[j]))
- ? 0xFF & ((unsigned char*)(memory + 1)) [j]
- : '.'
- );
- }
- con_out("\n");
+static void stat_dump_mem_contents(stat_mem_block_t *block, uint16_t cols) {
+ unsigned char *buffer = mem_a(cols);
+ unsigned char *memory = (unsigned char *)(block + 1);
+ size_t i;
+
+ for (i = 0; i < block->size; i++) {
+ if (!(i % 16)) {
+ if (i != 0)
+ con_out(" %s\n", buffer);
+ con_out(" 0x%08X: ", i);
}
+
+ con_out(" %02X", memory[i]);
+
+ buffer[i % cols] = ((memory[i] < 0x20) || (memory[i] > 0x7E))
+ ? '.'
+ : memory[i];
+
+ buffer[(i % cols) + 1] = '\0';
}
+
+ while ((i % cols) != 0) {
+ con_out(" ");
+ i++;
+ }
+
+ con_out(" %s\n", buffer);
+ mem_d(buffer);
}
-static void stat_dump_mem_leaks() {
+static void stat_dump_mem_leaks(void) {
stat_mem_block_t *info;
+ /* we need access to the root for this */
+ VALGRIND_MAKE_MEM_DEFINED(stat_mem_block_root, sizeof(stat_mem_block_t));
for (info = stat_mem_block_root; info; info = info->next) {
- con_out("lost: %u (bytes) at %s:%u\n",
+ /* we need access to the block */
+ VALGRIND_MAKE_MEM_DEFINED(info, sizeof(stat_mem_block_t));
+ con_out("lost: %u (bytes) at %s:%u from expression `%s`\n",
info->size,
info->file,
- info->line
+ info->line,
+ info->expr
);
-
+
stat_dump_mem_contents(info, OPTS_OPTION_U16(OPTION_MEMDUMPCOLS));
+
+ /*
+ * we're finished with the access, the redzone should be marked
+ * inaccesible so that invalid read/writes that could 'step-into'
+ * those redzones will show up as invalid read/writes in valgrind.
+ */
+ VALGRIND_MAKE_MEM_NOACCESS(info, sizeof(stat_mem_block_t));
}
+ VALGRIND_MAKE_MEM_NOACCESS(stat_mem_block_root, sizeof(stat_mem_block_t));
}
-static void stat_dump_mem_info() {
- con_out("Memory information:\n\
+static void stat_dump_mem_info(void) {
+ con_out("Memory Information:\n\
Total allocations: %llu\n\
Total deallocations: %llu\n\
Total allocated: %f (MB)\n\
static void stat_dump_stats_table(stat_size_table_t table, const char *string, uint64_t *size) {
size_t i,j;
-
+
if (!table)
return;
-
- for (i = 0, j = 0; i < ST_SIZE; i++) {
+
+ for (i = 0, j = 1; i < ST_SIZE; i++) {
stat_size_entry_t *entry;
if (!(entry = table[i]))
con_out(string, (unsigned)j, (unsigned)entry->key, (unsigned)entry->value);
j++;
-
+
if (size)
*size += entry->key * entry->value;
}
}
void stat_info() {
- if (OPTS_OPTION_BOOL(OPTION_DEBUG))
- stat_dump_mem_leaks();
-
- if (OPTS_OPTION_BOOL(OPTION_DEBUG) ||
- OPTS_OPTION_BOOL(OPTION_MEMCHK))
- stat_dump_mem_info();
-
if (OPTS_OPTION_BOOL(OPTION_MEMCHK) ||
OPTS_OPTION_BOOL(OPTION_STATISTICS)) {
uint64_t mem = 0;
-
- con_out("\nAdditional Statistics:\n\
- Total vectors allocated: %llu\n\
- Total string duplicates: %llu\n\
- Total hashtables allocated: %llu\n\
- Total unique vector sizes: %llu\n",
+
+ con_out("Memory Statistics:\n\
+ Total vectors allocated: %llu\n\
+ Total string duplicates: %llu\n\
+ Total string duplicate memory: %f (MB)\n\
+ Total hashtables allocated: %llu\n\
+ Total unique vector sizes: %llu\n",
stat_used_vectors,
stat_used_strdups,
+ (float)(stat_mem_strdups) / 1048576.0f,
stat_used_hashtables,
stat_type_vectors
);
-
+
stat_dump_stats_table (
stat_size_vectors,
- " %2u| # of %4u byte vectors: %u\n",
+ " %2u| # of %5u byte vectors: %u\n",
&mem
);
-
+
con_out (
" Total unique hashtable sizes: %llu\n",
stat_type_hashtables
);
-
+
stat_dump_stats_table (
stat_size_hashtables,
- " %2u| # of %4u element hashtables: %u\n",
+ " %2u| # of %5u element hashtables: %u\n",
NULL
);
-
+
con_out (
- " Total vector memory: %f (MB)\n",
+ " Total vector memory: %f (MB)\n\n",
(float)(mem) / 1048576.0f
);
}
stat_size_del(stat_size_vectors);
if (stat_size_hashtables)
stat_size_del(stat_size_hashtables);
+
+ if (OPTS_OPTION_BOOL(OPTION_DEBUG) ||
+ OPTS_OPTION_BOOL(OPTION_MEMCHK))
+ stat_dump_mem_info();
+
+ if (OPTS_OPTION_BOOL(OPTION_DEBUG))
+ stat_dump_mem_leaks();
}
#undef ST_SIZE