#define CORRECT_POOL_SIZE (128*1024*1024)
-#define CORRECT_POOL_GETLEN(X) *((size_t*)(X) - 1)
/*
* A forward allcator for the corrector. This corrector requires a lot
* of allocations. This forward allocator combats all those allocations
* and speeds us up a little. It also saves us space in a way since each
* allocation isn't wasting a little header space for when NOTRACK isn't
* defined.
- */
+ */
static unsigned char **correct_pool_data = NULL;
static unsigned char *correct_pool_this = NULL;
static size_t correct_pool_addr = 0;
* have one of the fastest hash functions for strings, but if you do a
* lost of hashing (which we do, almost 3 million hashes per identifier)
* a hashtable becomes slow.
- */
+ */
correct_trie_t* correct_trie_new() {
correct_trie_t *t = (correct_trie_t*)mem_a(sizeof(correct_trie_t));
t->value = NULL;
/*
* Implementation of the corrector algorithm commences. A very efficent
* brute-force attack (thanks to tries and mempool :-)).
- */
+ */
static GMQCC_INLINE size_t *correct_find(correct_trie_t *table, const char *word) {
return (size_t*)correct_trie_get(table, word);
}
void correct_add(correct_trie_t* table, size_t ***size, const char *ident) {
size_t *data = NULL;
const char *add = ident;
-
+
if (!correct_update(&table, add)) {
data = (size_t*)mem_a(sizeof(size_t));
*data = 1;
* need to take a size_t ** to carry it along (would all the argument
* overhead be worth it?)
*/
-static size_t correct_deletion(const char *ident, char **array, size_t index) {
+static size_t correct_deletion(const char *ident, char **array) {
size_t itr = 0;
const size_t len = strlen(ident);
char *a = (char*)correct_pool_alloc(len+1);
memcpy(a, ident, itr);
memcpy(a + itr, ident + itr + 1, len - itr);
- array[index + itr] = a;
+ array[itr] = a;
}
return itr;
}
-static size_t correct_transposition(const char *ident, char **array, size_t index) {
+static size_t correct_transposition(const char *ident, char **array) {
size_t itr = 0;
const size_t len = strlen(ident);
tmp = a[itr];
a[itr ] = a[itr+1];
a[itr+1] = tmp;
- array[index + itr] = a;
+ array[itr] = a;
}
return itr;
}
-static size_t correct_alteration(const char *ident, char **array, size_t index) {
+static size_t correct_alteration(const char *ident, char **array) {
size_t itr = 0;
size_t jtr = 0;
size_t ktr = 0;
char *a = (char*)correct_pool_alloc(len+1);
memcpy(a, ident, len+1);
a[itr] = correct_alpha[jtr];
- array[index + ktr] = a;
+ array[ktr] = a;
}
}
return ktr;
}
-static size_t correct_insertion(const char *ident, char **array, size_t index) {
+static size_t correct_insertion(const char *ident, char **array) {
size_t itr = 0;
size_t jtr = 0;
size_t ktr = 0;
memcpy(a, ident, itr);
memcpy(a + itr + 1, ident + itr, len - itr + 1);
a[itr] = correct_alpha[jtr];
- array[index + ktr] = a;
+ array[ktr] = a;
}
}
* transposition = len - 1
* alteration = len * sizeof(correct_alpha)
* insertion = (len + 1) * sizeof(correct_alpha)
- */
+ */
register size_t len = strlen(ident);
return (len) + (len - 1) + (len * (sizeof(correct_alpha)-1)) + ((len + 1) * (sizeof(correct_alpha)-1));
if (!find)
return NULL;
- next = correct_deletion (ident, find, 0);
- next += correct_transposition(ident, find, next);
- next += correct_alteration (ident, find, next);
- /*****/ correct_insertion (ident, find, next);
+ next = correct_deletion (ident, find);
+ next += correct_transposition(ident, find+next);
+ next += correct_alteration (ident, find+next);
+ /*****/ correct_insertion (ident, find+next);
return find;
}
* We could use a hashtable but the space complexity isn't worth it
* since we're only going to determine the "did you mean?" identifier
* on error.
- */
+ */
static int correct_exist(char **array, size_t rows, char *ident) {
size_t itr;
/*
* cmpl %eax, %ebx ; ebx = &LHS[END_POS]
*
* jbe correct_cmp_eq
- * movb (%edx), %cl ; micro-optimized on even atoms :-)
+ * movb (%edx), %cl ; micro-optimized even on atoms :-)
* cmpb %cl, (%eax) ; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* jg correct_cmp_gt
* jge correct_cmp_loop
static GMQCC_INLINE char **correct_known_resize(char **res, size_t *allocated, size_t size) {
size_t oldallocated = *allocated;
char **out;
- if (size+1 < *allocated)
+ if (size < oldallocated)
return res;
- *allocated += 32;
- out = correct_pool_alloc(sizeof(*res) * *allocated);
+ out = correct_pool_alloc(sizeof(*res) * oldallocated + 32);
memcpy(out, res, sizeof(*res) * oldallocated);
+
+ *allocated += 32;
return out;
}
/*
* This is the exposed interface:
* takes a table for the dictonary a vector of sizes (used for internal
- * probability calculation, and an identifier to "correct"
- *
- * the add function works the same. Except the identifier is used to
- * add to the dictonary.
+ * probability calculation), and an identifier to "correct".
*/
char *correct_str(correct_trie_t* table, const char *ident) {
char **e1 = NULL;