X-Git-Url: https://git.xonotic.org/?p=xonotic%2Fgmqcc.git;a=blobdiff_plain;f=stat.c;h=ffb5b227f26283b41add9cb27aefee081fa94c8c;hp=721361fdb01114e8f51c631deb0e721c878f52c1;hb=89893f9a2468575e78eef64e8f6a2628034e9c56;hpb=3e75750ad6893e1af78474cc085cff64bb7c98c8 diff --git a/stat.c b/stat.c index 721361f..ffb5b22 100644 --- a/stat.c +++ b/stat.c @@ -24,10 +24,25 @@ #include #include -#include #include "gmqcc.h" +/* + * For the valgrind integration of our allocator. This allows us to have + * more `accurate` valgrind output for our allocator, and also secures the + * possible underflows (where one could obtain access to the redzone that + * represents info about that allocation). + */ +#ifndef NVALGRIND +# include +# include +#else +# define VALGRIND_MALLOCLIKE_BLOCK(PTR, ALLOC_SIZE, REDZONE_SIZE, ZEROED) +# define VALGRIND_FREELIKE_BLOCK(PTR, REDZONE_SIZE) +# define VALGRIND_MAKE_MEM_DEFINED(PTR, REDZONE_SIZE) +# define VALGRIND_MAKE_MEM_NOACCESS(PTR, REDZONE_SIZE) +#endif + /* * GMQCC performs tons of allocations, constructions, and crazyness * all around. When trying to optimizes systems, or just get fancy @@ -56,6 +71,7 @@ static uint64_t stat_mem_allocated_total = 0; static uint64_t stat_mem_deallocated_total = 0; static uint64_t stat_mem_high = 0; static uint64_t stat_mem_peak = 0; +static uint64_t stat_mem_strdups = 0; static uint64_t stat_used_strdups = 0; static uint64_t stat_used_vectors = 0; static uint64_t stat_used_hashtables = 0; @@ -93,8 +109,8 @@ static void stat_size_put(stat_size_table_t table, size_t key, size_t value) { size_t hash = (key % ST_SIZE); while (table[hash] && table[hash]->key != key) hash = (hash + 1) % ST_SIZE; - table[hash] = (stat_size_entry_t*)mem_a(sizeof(stat_size_entry_t)); - table[hash]->key = key; + table[hash] = (stat_size_entry_t*)mem_a(sizeof(stat_size_entry_t)); + table[hash]->key = key; table[hash]->value = value; } @@ -107,7 +123,7 @@ void *stat_mem_allocate(size_t size, size_t line, const char *file) { stat_mem_block_t *info = (stat_mem_block_t*)malloc(sizeof(stat_mem_block_t) + size); void *data = (void*)(info + 1); - if(!info) + if(GMQCC_UNLIKELY(!info)) return NULL; info->line = line; @@ -116,8 +132,12 @@ void *stat_mem_allocate(size_t size, size_t line, const char *file) { info->prev = NULL; info->next = stat_mem_block_root; - if (stat_mem_block_root) + /* likely since it only happens once */ + if (GMQCC_LIKELY(stat_mem_block_root != NULL)) { + VALGRIND_MAKE_MEM_DEFINED(stat_mem_block_root, sizeof(stat_mem_block_t)); stat_mem_block_root->prev = info; + VALGRIND_MAKE_MEM_NOACCESS(stat_mem_block_root, sizeof(stat_mem_block_t)); + } stat_mem_block_root = info; stat_mem_allocated += size; @@ -127,40 +147,61 @@ void *stat_mem_allocate(size_t size, size_t line, const char *file) { if (stat_mem_high > stat_mem_peak) stat_mem_peak = stat_mem_high; + VALGRIND_MALLOCLIKE_BLOCK(data, size, sizeof(stat_mem_block_t), 0); return data; } void stat_mem_deallocate(void *ptr) { stat_mem_block_t *info = NULL; - if (!ptr) + if (GMQCC_UNLIKELY(!ptr)) return; info = ((stat_mem_block_t*)ptr - 1); + /* + * we need access to the redzone that represents the info block + * so lets do that. + */ + VALGRIND_MAKE_MEM_DEFINED(info, sizeof(stat_mem_block_t)); + stat_mem_deallocated += info->size; stat_mem_high -= info->size; stat_mem_deallocated_total ++; - if (info->prev) info->prev->next = info->next; - if (info->next) info->next->prev = info->prev; + if (info->prev) { + /* just need access for a short period */ + VALGRIND_MAKE_MEM_DEFINED(info->prev, sizeof(stat_mem_block_t)); + info->prev->next = info->next; + /* don't need access anymore */ + VALGRIND_MAKE_MEM_NOACCESS(info->prev, sizeof(stat_mem_block_t)); + } + if (info->next) { + /* just need access for a short period */ + VALGRIND_MAKE_MEM_DEFINED(info->next, sizeof(stat_mem_block_t)); + info->next->prev = info->prev; + /* don't need access anymore */ + VALGRIND_MAKE_MEM_NOACCESS(info->next, sizeof(stat_mem_block_t)); + } /* move ahead */ if (info == stat_mem_block_root) stat_mem_block_root = info->next; free(info); + VALGRIND_MAKE_MEM_NOACCESS(info, sizeof(stat_mem_block_t)); + VALGRIND_FREELIKE_BLOCK(ptr, sizeof(stat_mem_block_t)); } void *stat_mem_reallocate(void *ptr, size_t size, size_t line, const char *file) { stat_mem_block_t *oldinfo = NULL; stat_mem_block_t *newinfo; - if (!ptr) + if (GMQCC_UNLIKELY(!ptr)) return stat_mem_allocate(size, line, file); - /* stay consistent with glic */ - if (!size) { + /* stay consistent with glibc */ + if (GMQCC_UNLIKELY(!size)) { stat_mem_deallocate(ptr); return NULL; } @@ -168,28 +209,58 @@ void *stat_mem_reallocate(void *ptr, size_t size, size_t line, const char *file) oldinfo = ((stat_mem_block_t*)ptr - 1); newinfo = ((stat_mem_block_t*)malloc(sizeof(stat_mem_block_t) + size)); - if (!newinfo) { + if (GMQCC_UNLIKELY(!newinfo)) { stat_mem_deallocate(ptr); return NULL; } + VALGRIND_MALLOCLIKE_BLOCK(newinfo + 1, size, sizeof(stat_mem_block_t), 0); + + /* we need access to the old info redzone */ + VALGRIND_MAKE_MEM_DEFINED(oldinfo, sizeof(stat_mem_block_t)); + memcpy(newinfo+1, oldinfo+1, oldinfo->size); - if (oldinfo->prev) oldinfo->prev->next = oldinfo->next; - if (oldinfo->next) oldinfo->next->prev = oldinfo->prev; + if (oldinfo->prev) { + /* just need access for a short period */ + VALGRIND_MAKE_MEM_DEFINED(oldinfo->prev, sizeof(stat_mem_block_t)); + oldinfo->prev->next = oldinfo->next; + /* don't need access anymore */ + VALGRIND_MAKE_MEM_NOACCESS(oldinfo->prev, sizeof(stat_mem_block_t)); + } + + if (oldinfo->next) { + /* just need access for a short period */ + VALGRIND_MAKE_MEM_DEFINED(oldinfo->next, sizeof(stat_mem_block_t)); + oldinfo->next->prev = oldinfo->prev; + /* don't need access anymore */ + VALGRIND_MAKE_MEM_NOACCESS(oldinfo->next, sizeof(stat_mem_block_t)); + } /* move ahead */ if (oldinfo == stat_mem_block_root) stat_mem_block_root = oldinfo->next; + /* we need access to the redzone for the newinfo block */ + VALGRIND_MAKE_MEM_DEFINED(newinfo, sizeof(stat_mem_block_t)); + newinfo->line = line; newinfo->size = size; newinfo->file = file; newinfo->prev = NULL; newinfo->next = stat_mem_block_root; - if (stat_mem_block_root) + /* + * likely since the only time there is no root is when it's + * being initialized first. + */ + if (GMQCC_LIKELY(stat_mem_block_root != NULL)) { + /* we need access to the root */ + VALGRIND_MAKE_MEM_DEFINED(stat_mem_block_root, sizeof(stat_mem_block_t)); stat_mem_block_root->prev = newinfo; + /* kill access */ + VALGRIND_MAKE_MEM_NOACCESS(stat_mem_block_root, sizeof(stat_mem_block_t)); + } stat_mem_block_root = newinfo; stat_mem_allocated -= oldinfo->size; @@ -197,11 +268,18 @@ void *stat_mem_reallocate(void *ptr, size_t size, size_t line, const char *file) stat_mem_allocated += newinfo->size; stat_mem_high += newinfo->size; + /* + * we're finished with the redzones, lets kill the access + * to them. + */ + VALGRIND_MAKE_MEM_NOACCESS(newinfo, sizeof(stat_mem_block_t)); + VALGRIND_MAKE_MEM_NOACCESS(oldinfo, sizeof(stat_mem_block_t)); + if (stat_mem_high > stat_mem_peak) stat_mem_peak = stat_mem_high; free(oldinfo); - + VALGRIND_FREELIKE_BLOCK(ptr, sizeof(stat_mem_block_t)); return newinfo + 1; } @@ -224,6 +302,7 @@ char *stat_mem_strdup(const char *src, size_t line, const char *file, bool empty } stat_used_strdups ++; + stat_mem_strdups += len; return ptr; } @@ -271,6 +350,104 @@ typedef struct hash_node_t { struct hash_node_t *next; /* next node (linked list) */ } hash_node_t; +/* + * This is a patched version of the Murmur2 hashing function to use + * a proper pre-mix and post-mix setup. Infact this is Murmur3 for + * the most part just reinvented. + * + * Murmur 2 contains an inner loop such as: + * while (l >= 4) { + * u32 k = *(u32*)d; + * k *= m; + * k ^= k >> r; + * k *= m; + * + * h *= m; + * h ^= k; + * d += 4; + * l -= 4; + * } + * + * The two u32s that form the key are the same value x (pulled from data) + * this premix stage will perform the same results for both values. Unrolled + * this produces just: + * x *= m; + * x ^= x >> r; + * x *= m; + * + * h *= m; + * h ^= x; + * h *= m; + * h ^= x; + * + * This appears to be fine, except what happens when m == 1? well x + * cancels out entierly, leaving just: + * x ^= x >> r; + * h ^= x; + * h ^= x; + * + * So all keys hash to the same value, but how often does m == 1? + * well, it turns out testing x for all possible values yeilds only + * 172,013,942 unique results instead of 2^32. So nearly ~4.6 bits + * are cancelled out on average! + * + * This means we have a 14.5% (rounded) chance of colliding more, which + * results in another bucket/chain for the hashtable. + * + * We fix it buy upgrading the pre and post mix ssystems to align with murmur + * hash 3. + */ +#if 1 +#define GMQCC_ROTL32(X, R) (((X) << (R)) | ((X) >> (32 - (R)))) +GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) { + const unsigned char *data = (const unsigned char *)key; + const size_t len = strlen(key); + const size_t block = len / 4; + const uint32_t mask1 = 0xCC9E2D51; + const uint32_t mask2 = 0x1B873593; + const uint32_t *blocks = (const uint32_t*)(data + block * 4); + const unsigned char *tail = (const unsigned char *)(data + block * 4); + + size_t i; + uint32_t k; + uint32_t h = 0x1EF0 ^ len; + + for (i = -block; i; i++) { + k = blocks[i]; + k *= mask1; + k = GMQCC_ROTL32(k, 15); + k *= mask2; + h ^= k; + h = GMQCC_ROTL32(h, 13); + h = h * 5 + 0xE6546B64; + } + + k = 0; + switch (len & 3) { + case 3: + k ^= tail[2] << 16; + case 2: + k ^= tail[1] << 8; + case 1: + k ^= tail[0]; + k *= mask1; + k = GMQCC_ROTL32(k, 15); + k *= mask2; + h ^= k; + } + + h ^= len; + h ^= h >> 16; + h *= 0x85EBCA6B; + h ^= h >> 13; + h *= 0xC2B2AE35; + h ^= h >> 16; + + return (size_t) (h % ht->size); +} +#undef GMQCC_ROTL32 +#else +/* We keep the old for reference */ GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) { const uint32_t mix = 0x5BD1E995; const uint32_t rot = 24; @@ -305,6 +482,7 @@ GMQCC_INLINE size_t util_hthash(hash_table_t *ht, const char *key) { return (size_t) (hash % ht->size); } +#endif static hash_node_t *_util_htnewpair(const char *key, void *value) { hash_node_t *node; @@ -409,6 +587,7 @@ void *util_htget(hash_table_t *ht, const char *key) { return util_htgeth(ht, key, util_hthash(ht, key)); } +void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin); void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) { hash_node_t *pair; size_t len, keylen; @@ -449,7 +628,8 @@ void *code_util_str_htgeth(hash_table_t *ht, const char *key, size_t bin) { */ void util_htrem(hash_table_t *ht, void (*callback)(void *data)) { size_t i = 0; - for (; i < ht->size; i++) { + + for (; i < ht->size; ++i) { hash_node_t *n = ht->table[i]; hash_node_t *p; @@ -460,7 +640,7 @@ void util_htrem(hash_table_t *ht, void (*callback)(void *data)) { if (callback) callback(n->value); p = n; - n = n->next; + n = p->next; mem_d(p); } @@ -513,7 +693,7 @@ static void stat_dump_mem_contents(stat_mem_block_t *memory, uint16_t cols) { con_out("%c", (j >= memory->size) ? ' ' - : (isprint(((unsigned char*)(memory + 1))[j])) + : (util_isprint(((unsigned char*)(memory + 1))[j])) ? 0xFF & ((unsigned char*)(memory + 1)) [j] : '.' ); @@ -525,7 +705,11 @@ static void stat_dump_mem_contents(stat_mem_block_t *memory, uint16_t cols) { static void stat_dump_mem_leaks(void) { stat_mem_block_t *info; + /* we need access to the root for this */ + VALGRIND_MAKE_MEM_DEFINED(stat_mem_block_root, sizeof(stat_mem_block_t)); for (info = stat_mem_block_root; info; info = info->next) { + /* we need access to the block */ + VALGRIND_MAKE_MEM_DEFINED(info, sizeof(stat_mem_block_t)); con_out("lost: %u (bytes) at %s:%u\n", info->size, info->file, @@ -533,11 +717,19 @@ static void stat_dump_mem_leaks(void) { ); stat_dump_mem_contents(info, OPTS_OPTION_U16(OPTION_MEMDUMPCOLS)); + + /* + * we're finished with the access, the redzone should be marked + * inaccesible so that invalid read/writes that could 'step-into' + * those redzones will show up as invalid read/writes in valgrind. + */ + VALGRIND_MAKE_MEM_NOACCESS(info, sizeof(stat_mem_block_t)); } + VALGRIND_MAKE_MEM_NOACCESS(stat_mem_block_root, sizeof(stat_mem_block_t)); } static void stat_dump_mem_info(void) { - con_out("Memory information:\n\ + con_out("Memory Information:\n\ Total allocations: %llu\n\ Total deallocations: %llu\n\ Total allocated: %f (MB)\n\ @@ -575,31 +767,26 @@ static void stat_dump_stats_table(stat_size_table_t table, const char *string, u } void stat_info() { - if (OPTS_OPTION_BOOL(OPTION_DEBUG)) - stat_dump_mem_leaks(); - - if (OPTS_OPTION_BOOL(OPTION_DEBUG) || - OPTS_OPTION_BOOL(OPTION_MEMCHK)) - stat_dump_mem_info(); - if (OPTS_OPTION_BOOL(OPTION_MEMCHK) || OPTS_OPTION_BOOL(OPTION_STATISTICS)) { uint64_t mem = 0; - con_out("\nAdditional Statistics:\n\ - Total vectors allocated: %llu\n\ - Total string duplicates: %llu\n\ - Total hashtables allocated: %llu\n\ - Total unique vector sizes: %llu\n", + con_out("Memory Statistics:\n\ + Total vectors allocated: %llu\n\ + Total string duplicates: %llu\n\ + Total string duplicate memory: %f (MB)\n\ + Total hashtables allocated: %llu\n\ + Total unique vector sizes: %llu\n", stat_used_vectors, stat_used_strdups, + (float)(stat_mem_strdups) / 1048576.0f, stat_used_hashtables, stat_type_vectors ); stat_dump_stats_table ( stat_size_vectors, - " %2u| # of %4u byte vectors: %u\n", + " %2u| # of %5u byte vectors: %u\n", &mem ); @@ -610,12 +797,12 @@ void stat_info() { stat_dump_stats_table ( stat_size_hashtables, - " %2u| # of %4u element hashtables: %u\n", + " %2u| # of %5u element hashtables: %u\n", NULL ); con_out ( - " Total vector memory: %f (MB)\n", + " Total vector memory: %f (MB)\n\n", (float)(mem) / 1048576.0f ); } @@ -624,5 +811,12 @@ void stat_info() { stat_size_del(stat_size_vectors); if (stat_size_hashtables) stat_size_del(stat_size_hashtables); + + if (OPTS_OPTION_BOOL(OPTION_DEBUG) || + OPTS_OPTION_BOOL(OPTION_MEMCHK)) + stat_dump_mem_info(); + + if (OPTS_OPTION_BOOL(OPTION_DEBUG)) + stat_dump_mem_leaks(); } #undef ST_SIZE