From a77797d6a646e2370e6212ea8901bbfeb9e5a920 Mon Sep 17 00:00:00 2001 From: Dale Weiler Date: Tue, 13 Jan 2015 19:48:57 -0500 Subject: [PATCH] Remove spelling corrector --- correct.c | 548 ----------------------------------------------------- gmqcc.h | 21 -- include.mk | 2 +- main.c | 8 - opts.c | 1 - opts.def | 1 - parser.c | 111 ----------- parser.h | 4 - 8 files changed, 1 insertion(+), 695 deletions(-) delete mode 100644 correct.c diff --git a/correct.c b/correct.c deleted file mode 100644 index 1f7a381..0000000 --- a/correct.c +++ /dev/null @@ -1,548 +0,0 @@ -/* - * Copyright (C) 2012, 2013, 2014, 2015 - * Dale Weiler - * Wolfgang Bumiller - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished to do - * so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include "gmqcc.h" - -/* - * This is a very clever method for correcting mistakes in QuakeC code - * most notably when invalid identifiers are used or inproper assignments; - * we can proprly lookup in multiple dictonaries (depening on the rules - * of what the task is trying to acomplish) to find the best possible - * match. - * - * - * A little about how it works, and probability theory: - * - * When given an identifier (which we will denote I), we're essentially - * just trying to choose the most likely correction for that identifier. - * (the actual "correction" can very well be the identifier itself). - * There is actually no way to know for sure that certian identifers - * such as "lates", need to be corrected to "late" or "latest" or any - * other permutations that look lexically the same. This is why we - * must advocate the usage of probabilities. This means that instead of - * just guessing, instead we're trying to find the correction for C, - * out of all possible corrections that maximizes the probability of C - * for the original identifer I. - * - * Thankfully there exists some theroies for probalistic interpretations - * of data. Since we're operating on two distictive intepretations, the - * transposition from I to C. We need something that can express how much - * degree of I should rationally change to become C. this is called the - * Bayesian interpretation. You can read more about it from here: - * http://www.celiagreen.com/charlesmccreery/statistics/bayestutorial.pdf - * (which is probably the only good online documentation for bayes theroy - * no lie. Everything else just sucks ..) - * - * Bayes' Thereom suggests something like the following: - * AC P(I|C) P(C) / P(I) - * - * However since P(I) is the same for every possibility of I, we can - * completley ignore it giving just: - * AC P(I|C) P(C) - * - * This greatly helps visualize how the parts of the expression are performed - * there is essentially three, from right to left we perform the following: - * - * 1: P(C), the probability that a proposed correction C will stand on its - * own. This is called the language model. - * - * 2: P(I|C), the probability that I would be used, when the programmer - * really meant C. This is the error model. - * - * 3: AC, the control mechanisim, an enumerator if you will, one that - * enumerates all feasible values of C, to determine the one that - * gives the greatest probability score. - * - * In reality the requirement for a more complex expression involving - * two seperate models is considerably a waste. But one must recognize - * that P(C|I) is already conflating two factors. It's just much simpler - * to seperate the two models and deal with them explicitaly. To properly - * estimate P(C|I) you have to consider both the probability of C and - * probability of the transposition from C to I. It's simply much more - * cleaner, and direct to seperate the two factors. - * - * Research tells us that 80% to 95% of all spelling errors have an edit - * distance no greater than one. Knowing this we can optimize for most - * cases of mistakes without taking a performance hit. Which is what we - * base longer edit distances off of. Opposed to the original method of - * I had concieved of checking everything. - * - * A little information on additional algorithms used: - * - * Initially when I implemented this corrector, it was very slow. - * Need I remind you this is essentially a brute force attack on strings, - * and since every transformation requires dynamic memory allocations, - * you can easily imagine where most of the runtime conflated. Yes - * It went right to malloc. More than THREE MILLION malloc calls are - * performed for an identifier about 16 bytes long. This was such a - * shock to me. A forward allocator (or as some call it a bump-point - * allocator, or just a memory pool) was implemented. To combat this. - * - * But of course even other factors were making it slow. Initially - * this used a hashtable. And hashtables have a good constant lookup - * time complexity. But the problem wasn't in the hashtable, it was - * in the hashing (despite having one of the fastest hash functions - * known). Remember those 3 million mallocs? Well for every malloc - * there is also a hash. After 3 million hashes .. you start to get - * very slow. To combat this I had suggested burst tries to Blub. - * The next day he had implemented them. Sure enough this brought - * down the runtime by a factor > 100% - * - * The trie initially was designed to work on all strings, but later it - * became aparent that not only was this not a requirement. It was also - * slowing down get/sets' for the trie. To fully understand, only - * correct_alpha needs to be understood by the trie system, knowing this - * We can combat the slowness using a very clever but evil optimization. - * By Setting a fixed sized amount of branches for the trie using a - * char-to-index map into the branches. We've complelty made the trie - * accesses entierly constant in lookup time. No really, a lookup is - * literally trie[str[0]] [str[1]] [2] .... .value. - * - * - * Future Work (If we really need it) - * - * Currently we can only distinguish one source of error in the - * language model we use. This could become an issue for identifiers - * that have close colliding rates, e.g colate->coat yields collate. - * - * Currently the error model has been fairly trivial, the smaller the - * edit distance the smaller the error. This usually causes some un- - * expected problems. e.g reciet->recite yields recipt. For QuakeC - * this could become a problem when lots of identifiers are involved. - */ - - -#define CORRECT_POOL_SIZE (128*1024*1024) -/* - * A forward allcator for the corrector. This corrector requires a lot - * of allocations. This forward allocator combats all those allocations - * and speeds us up a little. It also saves us space in a way since each - * allocation isn't wasting a little header space for when NOTRACK isn't - * defined. - */ -static unsigned char **correct_pool_data = NULL; -static unsigned char *correct_pool_this = NULL; -static size_t correct_pool_addr = 0; - -static GMQCC_INLINE void correct_pool_new(void) { - correct_pool_addr = 0; - correct_pool_this = (unsigned char *)mem_a(CORRECT_POOL_SIZE); - - vec_push(correct_pool_data, correct_pool_this); -} - -static GMQCC_INLINE void *correct_pool_alloc(size_t bytes) { - void *data; - if (correct_pool_addr + bytes>= CORRECT_POOL_SIZE) - correct_pool_new(); - - data = (void*)correct_pool_this; - correct_pool_this += bytes; - correct_pool_addr += bytes; - return data; -} - -static GMQCC_INLINE void correct_pool_delete(void) { - size_t i; - for (i = 0; i < vec_size(correct_pool_data); ++i) - mem_d(correct_pool_data[i]); - - correct_pool_data = NULL; - correct_pool_this = NULL; - correct_pool_addr = 0; -} - - -static GMQCC_INLINE char *correct_pool_claim(const char *data) { - char *claim = util_strdup(data); - return claim; -} - -/* - * _ is valid in identifiers. I've yet to implement numerics however - * because they're only valid after the first character is of a _, or - * alpha character. - */ -static const char correct_alpha[] = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "_"; /* TODO: Numbers ... */ - -static const size_t correct_alpha_index[0x80] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 52, - 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0 -}; - -/* - * A fast space efficent trie for a dictionary of identifiers. This is - * faster than a hashtable for one reason. A hashtable itself may have - * fast constant lookup time, but the hash itself must be very fast. We - * have one of the fastest hash functions for strings, but if you do a - * lost of hashing (which we do, almost 3 million hashes per identifier) - * a hashtable becomes slow. - */ -correct_trie_t* correct_trie_new() { - correct_trie_t *t = (correct_trie_t*)mem_a(sizeof(correct_trie_t)); - t->value = NULL; - t->entries = NULL; - return t; -} - -static GMQCC_INLINE void correct_trie_del_sub(correct_trie_t *t) { - size_t i; - if (!t->entries) - return; - for (i = 0; i < sizeof(correct_alpha)-1; ++i) { - correct_trie_del_sub(&t->entries[i]); - } - mem_d(t->entries); -} - -static GMQCC_INLINE void correct_trie_del(correct_trie_t *t) { - size_t i; - if (t->entries) { - for (i = 0; i < sizeof(correct_alpha)-1; ++i) - correct_trie_del_sub(&t->entries[i]); - mem_d(t->entries); - } - mem_d(t); -} - -static GMQCC_INLINE void* correct_trie_get(const correct_trie_t *t, const char *key) { - const unsigned char *data = (const unsigned char*)key; - - while (*data) { - if (!t->entries) - return NULL; - t = t->entries + correct_alpha_index[*data]; - ++data; - } - return t->value; -} - -static GMQCC_INLINE void correct_trie_set(correct_trie_t *t, const char *key, void * const value) { - const unsigned char *data = (const unsigned char*)key; - while (*data) { - if (!t->entries) { - t->entries = (correct_trie_t*)mem_a(sizeof(correct_trie_t)*(sizeof(correct_alpha)-1)); - memset(t->entries, 0, sizeof(correct_trie_t)*(sizeof(correct_alpha)-1)); - } - t = t->entries + correct_alpha_index[*data]; - ++data; - } - t->value = value; -} - - -/* - * Implementation of the corrector algorithm commences. A very efficent - * brute-force attack (thanks to tries and mempool :-)). - */ -static GMQCC_INLINE size_t *correct_find(correct_trie_t *table, const char *word) { - return (size_t*)correct_trie_get(table, word); -} - -static GMQCC_INLINE bool correct_update(correct_trie_t* *table, const char *word) { - size_t *data = correct_find(*table, word); - if (!data) - return false; - - (*data)++; - return true; -} - -void correct_add(correct_trie_t* table, size_t ***size, const char *ident) { - size_t *data = NULL; - const char *add = ident; - - if (!correct_update(&table, add)) { - data = (size_t*)mem_a(sizeof(size_t)); - *data = 1; - - vec_push((*size), data); - correct_trie_set(table, add, data); - } -} - -void correct_del(correct_trie_t* dictonary, size_t **data) { - size_t i; - const size_t vs = vec_size(data); - - for (i = 0; i < vs; i++) - mem_d(data[i]); - - vec_free(data); - correct_trie_del(dictonary); -} - -/* - * correcting logic for the following forms of transformations: - * 1) deletion - * 2) transposition - * 3) alteration - * 4) insertion - * - * These functions could take an additional size_t **size paramater - * and store back the results of their new length in an array that - * is the same as **array for the memcmp in correct_exists. I'm just - * not able to figure out how to do that just yet. As my brain is - * not in the mood to figure out that logic. This is a reminder to - * do it, or for someone else to :-) correct_edit however would also - * need to take a size_t ** to carry it along (would all the argument - * overhead be worth it?) - */ -static GMQCC_INLINE size_t correct_deletion(const char *ident, char **array) { - size_t itr = 0; - const size_t len = strlen(ident); - - for (; itr < len; itr++) { - char *a = (char*)correct_pool_alloc(len+1); - memcpy(a, ident, itr); - memcpy(a + itr, ident + itr + 1, len - itr); - array[itr] = a; - } - - return itr; -} - -static GMQCC_INLINE size_t correct_transposition(const char *ident, char **array) { - size_t itr = 0; - const size_t len = strlen(ident); - - for (; itr < len - 1; itr++) { - char tmp; - char *a = (char*)correct_pool_alloc(len+1); - memcpy(a, ident, len+1); - tmp = a[itr]; - a[itr ] = a[itr+1]; - a[itr+1] = tmp; - array[itr] = a; - } - - return itr; -} - -static GMQCC_INLINE size_t correct_alteration(const char *ident, char **array) { - size_t itr = 0; - size_t jtr = 0; - size_t ktr = 0; - const size_t len = strlen(ident); - - for (; itr < len; itr++) { - for (jtr = 0; jtr < sizeof(correct_alpha)-1; jtr++, ktr++) { - char *a = (char*)correct_pool_alloc(len+1); - memcpy(a, ident, len+1); - a[itr] = correct_alpha[jtr]; - array[ktr] = a; - } - } - - return ktr; -} - -static GMQCC_INLINE size_t correct_insertion(const char *ident, char **array) { - size_t itr = 0; - size_t jtr = 0; - const size_t len = strlen(ident); - - for (; itr <= len; itr++) { - for (jtr = 0; jtr < sizeof(correct_alpha)-1; jtr++) { - char *a = (char*)correct_pool_alloc(len+2); - memcpy(a, ident, itr); - memcpy(a + itr + 1, ident + itr, len - itr + 1); - a[itr] = correct_alpha[jtr]; - array[itr * (sizeof(correct_alpha)-1) + jtr] = a; - } - } - - return (len+1)*(sizeof(correct_alpha)-1); -} - -static GMQCC_INLINE size_t correct_size(const char *ident) { - /* - * deletion = len - * transposition = len - 1 - * alteration = len * sizeof(correct_alpha) - * insertion = (len + 1) * sizeof(correct_alpha) - */ - - register size_t len = strlen(ident); - return (len) + (len - 1) + (len * (sizeof(correct_alpha)-1)) + ((len + 1) * (sizeof(correct_alpha)-1)); -} - -static GMQCC_INLINE char **correct_edit(const char *ident, size_t **lens) { - size_t next; - size_t size = correct_size(ident); - char **find = (char**)correct_pool_alloc(size * sizeof(char*)); - - if (!find || !(*lens = (size_t*)correct_pool_alloc(size * sizeof(size_t)))) - return NULL; - - next = correct_deletion (ident, find); - next += correct_transposition(ident, find+next); - next += correct_alteration (ident, find+next); - /*****/ correct_insertion (ident, find+next); - - /* precompute lengths */ - for (next = 0; next < size; next++) - (*lens)[next] = strlen(find[next]); - - return find; -} - -static GMQCC_INLINE int correct_exist(char **array, register size_t *sizes, size_t rows, char *ident, register size_t len) { - size_t itr; - for (itr = 0; itr < rows; itr++) { - /* - * We can save tons of calls to memcmp if we simply ignore comparisions - * that we know cannot contain the same length. - */ - if (sizes[itr] == len && !memcmp(array[itr], ident, len)) - return 1; - } - - return 0; -} - -static GMQCC_INLINE char **correct_known_resize(char **res, size_t *allocated, size_t size) { - size_t oldallocated = *allocated; - char **out; - if (size < oldallocated) - return res; - - out = (char**)correct_pool_alloc(sizeof(*res) * oldallocated + 32); - memcpy(out, res, sizeof(*res) * oldallocated); - - *allocated += 32; - return out; -} - -static char **correct_known(correction_t *corr, correct_trie_t* table, char **array, size_t rows, size_t *next) { - size_t itr = 0; - size_t jtr = 0; - size_t len = 0; - size_t row = 0; - size_t nxt = 8; - char **res = (char**)correct_pool_alloc(sizeof(char *) * nxt); - char **end = NULL; - size_t *bit = NULL; - - for (; itr < rows; itr++) { - if (!array[itr][0]) - continue; - if (vec_size(corr->edits) > itr+1) { - end = corr->edits[itr+1]; - bit = corr->lens [itr+1]; - } else { - end = correct_edit(array[itr], &bit); - vec_push(corr->edits, end); - vec_push(corr->lens, bit); - } - row = correct_size(array[itr]); - - for (jtr = 0; jtr < row; jtr++) { - if (correct_find(table, end[jtr]) && !correct_exist(res, bit, len, end[jtr], bit[jtr])) { - res = correct_known_resize(res, &nxt, len+1); - res[len++] = end[jtr]; - } - } - } - - *next = len; - return res; -} - -static GMQCC_INLINE char *correct_maximum(correct_trie_t* table, char **array, size_t rows) { - char *str = NULL; - size_t *itm = NULL; - size_t itr = 0; - size_t top = 0; - - for (; itr < rows; itr++) { - if ((itm = correct_find(table, array[itr])) && (*itm > top)) { - top = *itm; - str = array[itr]; - } - } - - return str; -} - -/* - * This is the exposed interface: - * takes a table for the dictonary a vector of sizes (used for internal - * probability calculation), and an identifier to "correct". - */ -void correct_init(correction_t *c) -{ - correct_pool_new(); - c->edits = NULL; - c->lens = NULL; -} - -void correct_free(correction_t *c) -{ - vec_free(c->edits); - vec_free(c->lens); - correct_pool_delete(); -} - -char *correct_str(correction_t *corr, correct_trie_t* table, const char *ident) { - char **e1 = NULL; - char **e2 = NULL; - char *e1ident = NULL; - char *e2ident = NULL; - size_t e1rows = 0; - size_t e2rows = 0; - size_t *bits = NULL; - - /* needs to be allocated for free later */ - if (correct_find(table, ident)) - return correct_pool_claim(ident); - - if ((e1rows = correct_size(ident))) { - if (vec_size(corr->edits) > 0) - e1 = corr->edits[0]; - else { - e1 = correct_edit(ident, &bits); - vec_push(corr->edits, e1); - vec_push(corr->lens, bits); - } - - if ((e1ident = correct_maximum(table, e1, e1rows))) - return correct_pool_claim(e1ident); - } - - e2 = correct_known(corr, table, e1, e1rows, &e2rows); - if (e2rows && ((e2ident = correct_maximum(table, e2, e2rows)))) - return correct_pool_claim(e2ident); - - - return util_strdup(ident); -} diff --git a/gmqcc.h b/gmqcc.h index 43153e9..7da3d64 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -359,27 +359,6 @@ fs_dir_t *fs_dir_open (const char *); int fs_dir_close (fs_dir_t *); fs_dirent_t *fs_dir_read (fs_dir_t *); - -/* correct.c */ -typedef struct correct_trie_s { - void *value; - struct correct_trie_s *entries; -} correct_trie_t; - -correct_trie_t* correct_trie_new(void); - -typedef struct { - char ***edits; - size_t **lens; -} correction_t; - -void correct_del (correct_trie_t*, size_t **); -void correct_add (correct_trie_t*, size_t ***, const char *); -char *correct_str (correction_t *, correct_trie_t*, const char *); -void correct_init(correction_t *); -void correct_free(correction_t *); - - /* code.c */ /* Note: if you change the order, fix type_sizeof in ir.c */ diff --git a/include.mk b/include.mk index 1f90111..b1ed961 100644 --- a/include.mk +++ b/include.mk @@ -23,7 +23,7 @@ OPTIONAL_CFLAGS := OPTIONAL_LDFLAGS := #objects -OBJ_C = $(COMMON) main.o lexer.o parser.o code.o ast.o ir.o ftepp.o utf8.o correct.o fold.o intrin.o +OBJ_C = $(COMMON) main.o lexer.o parser.o code.o ast.o ir.o ftepp.o utf8.o fold.o intrin.o OBJ_P = $(COMMON) pak.o OBJ_T = $(COMMON) test.o OBJ_X = $(COMMON) exec.o diff --git a/main.c b/main.c index 9a027bd..db8aed0 100644 --- a/main.c +++ b/main.c @@ -498,14 +498,6 @@ static bool options_parse(int argc, char **argv) { OPTS_OPTION_BOOL(OPTION_QUIET) = true; break; } - else if (!strcmp(argv[0]+2, "correct")) { - OPTS_OPTION_BOOL(OPTION_CORRECTION) = true; - break; - } - else if (!strcmp(argv[0]+2, "no-correct")) { - OPTS_OPTION_BOOL(OPTION_CORRECTION) = false; - break; - } else if (!strcmp(argv[0]+2, "add-info")) { OPTS_OPTION_BOOL(OPTION_ADD_INFO) = true; break; diff --git a/opts.c b/opts.c index 7c59713..3f5c25b 100644 --- a/opts.c +++ b/opts.c @@ -59,7 +59,6 @@ opts_cmd_t opts; /* command line options */ static void opts_setdefault(void) { memset(&opts, 0, sizeof(opts_cmd_t)); - OPTS_OPTION_BOOL(OPTION_CORRECTION) = true; OPTS_OPTION_STR(OPTION_PROGSRC) = "progs.src"; /* warnings */ diff --git a/opts.def b/opts.def index 92cff14..9d8cad1 100644 --- a/opts.def +++ b/opts.def @@ -136,7 +136,6 @@ GMQCC_DEFINE_FLAG(PP_ONLY) GMQCC_DEFINE_FLAG(MAX_ARRAY_SIZE) GMQCC_DEFINE_FLAG(ADD_INFO) - GMQCC_DEFINE_FLAG(CORRECTION) GMQCC_DEFINE_FLAG(STATISTICS) GMQCC_DEFINE_FLAG(PROGSRC) GMQCC_DEFINE_FLAG(COVERAGE) diff --git a/parser.c b/parser.c index 0b78337..75014cb 100644 --- a/parser.c +++ b/parser.c @@ -1653,9 +1653,6 @@ static bool parse_sya_operand(parser_t *parser, shunt *sy, bool with_labels) if (!var) { - char *correct = NULL; - size_t i; - /* * sometimes people use preprocessing predefs without enabling them * i've done this thousands of times already myself. Lets check for @@ -1666,34 +1663,6 @@ static bool parse_sya_operand(parser_t *parser, shunt *sy, bool with_labels) return false; } - /* - * TODO: determine the best score for the identifier: be it - * a variable, a field. - * - * We should also consider adding correction tables for - * other things as well. - */ - if (OPTS_OPTION_BOOL(OPTION_CORRECTION) && strlen(parser_tokval(parser)) <= 16) { - correction_t corr; - correct_init(&corr); - - for (i = 0; i < vec_size(parser->correct_variables); i++) { - correct = correct_str(&corr, parser->correct_variables[i], parser_tokval(parser)); - if (strcmp(correct, parser_tokval(parser))) { - break; - } else { - mem_d(correct); - correct = NULL; - } - } - correct_free(&corr); - - if (correct) { - parseerror(parser, "unexpected identifier: %s (did you mean %s?)", parser_tokval(parser), correct); - mem_d(correct); - return false; - } - } parseerror(parser, "unexpected identifier: %s", parser_tokval(parser)); return false; } @@ -2045,10 +2014,6 @@ static void parser_enterblock(parser_t *parser) vec_push(parser->typedefs, util_htnew(TYPEDEF_HT_SIZE)); vec_push(parser->_blocktypedefs, vec_size(parser->_typedefs)); vec_push(parser->_block_ctx, parser_ctx(parser)); - - /* corrector */ - vec_push(parser->correct_variables, correct_trie_new()); - vec_push(parser->correct_variables_score, NULL); } static bool parser_leaveblock(parser_t *parser) @@ -2062,11 +2027,8 @@ static bool parser_leaveblock(parser_t *parser) } util_htdel(vec_last(parser->variables)); - correct_del(vec_last(parser->correct_variables), vec_last(parser->correct_variables_score)); vec_pop(parser->variables); - vec_pop(parser->correct_variables); - vec_pop(parser->correct_variables_score); if (!vec_size(parser->_blocklocals)) { parseerror(parser, "internal error: parser_leaveblock with no block (2)"); return false; @@ -2101,26 +2063,12 @@ static void parser_addlocal(parser_t *parser, const char *name, ast_expression * { vec_push(parser->_locals, e); util_htset(vec_last(parser->variables), name, (void*)e); - - /* corrector */ - correct_add ( - vec_last(parser->correct_variables), - &vec_last(parser->correct_variables_score), - name - ); } static void parser_addglobal(parser_t *parser, const char *name, ast_expression *e) { vec_push(parser->globals, e); util_htset(parser->htglobals, name, e); - - /* corrector */ - correct_add ( - parser->correct_variables[0], - &parser->correct_variables_score[0], - name - ); } static ast_expression* process_condition(parser_t *parser, ast_expression *cond, bool *_ifnot) @@ -5510,22 +5458,8 @@ static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofield return false; } - /* - * add alias to aliases table and to corrector - * so corrections can apply for aliases as well. - */ util_htset(parser->aliases, var->name, find); - /* - * add to corrector so corrections can work - * even for aliases too. - */ - correct_add ( - vec_last(parser->correct_variables), - &vec_last(parser->correct_variables_score), - var->name - ); - /* generate aliases for vector components */ if (isvector) { char *buffer[3]; @@ -5541,26 +5475,6 @@ static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofield mem_d(buffer[0]); mem_d(buffer[1]); mem_d(buffer[2]); - - /* - * add to corrector so corrections can work - * even for aliases too. - */ - correct_add ( - vec_last(parser->correct_variables), - &vec_last(parser->correct_variables_score), - me[0]->name - ); - correct_add ( - vec_last(parser->correct_variables), - &vec_last(parser->correct_variables_score), - me[1]->name - ); - correct_add ( - vec_last(parser->correct_variables), - &vec_last(parser->correct_variables_score), - me[2]->name - ); } } } @@ -5583,13 +5497,6 @@ static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofield /* Add it to the local scope */ util_htset(vec_last(parser->variables), var->name, (void*)var); - /* corrector */ - correct_add ( - vec_last(parser->correct_variables), - &vec_last(parser->correct_variables_score), - var->name - ); - /* now rename the global */ ln = strlen(var->name); vec_append(defname, ln, var->name); @@ -5621,13 +5528,6 @@ static bool parse_variable(parser_t *parser, ast_block *localblock, bool nofield for (i = 0; i < 3; ++i) { util_htset(vec_last(parser->variables), me[i]->name, (void*)(me[i])); - /* corrector */ - correct_add( - vec_last(parser->correct_variables), - &vec_last(parser->correct_variables_score), - me[i]->name - ); - vec_shrinkto(defname, prefix_len); ln = strlen(me[i]->name); vec_append(defname, ln, me[i]->name); @@ -6164,10 +6064,6 @@ parser_t *parser_create() parser->aliases = util_htnew(PARSER_HT_SIZE); - /* corrector */ - vec_push(parser->correct_variables, correct_trie_new()); - vec_push(parser->correct_variables_score, NULL); - empty_ctx.file = ""; empty_ctx.line = 0; empty_ctx.column = 0; @@ -6280,13 +6176,6 @@ static void parser_remove_ast(parser_t *parser) vec_free(parser->_blocklocals); vec_free(parser->_locals); - /* corrector */ - for (i = 0; i < vec_size(parser->correct_variables); ++i) { - correct_del(parser->correct_variables[i], parser->correct_variables_score[i]); - } - vec_free(parser->correct_variables); - vec_free(parser->correct_variables_score); - for (i = 0; i < vec_size(parser->_typedefs); ++i) ast_delete(parser->_typedefs[i]); vec_free(parser->_typedefs); diff --git a/parser.h b/parser.h index c6fe91d..73f6d03 100644 --- a/parser.h +++ b/parser.h @@ -92,10 +92,6 @@ struct parser_s { ht htglobals; ht *typedefs; - /* same as above but for the spelling corrector */ - correct_trie_t **correct_variables; - size_t ***correct_variables_score; /* vector of vector of size_t* */ - /* not to be used directly, we use the hash table */ ast_expression **_locals; size_t *_blocklocals; -- 2.39.2