X-Git-Url: https://git.xonotic.org/?a=blobdiff_plain;f=parse.c;h=0e1c6a511d84473c70187ebbd7cc0fa2f5e4f012;hb=d5dcb3aff72f4411c83ce97eb13006931fc186f6;hp=4ec25695aebc4de1cd703d489365123698c62eda;hpb=45cbef1b8f50648a066efd9d48e24a04b64975eb;p=xonotic%2Fgmqcc.git diff --git a/parse.c b/parse.c index 4ec2569..0e1c6a5 100644 --- a/parse.c +++ b/parse.c @@ -38,40 +38,37 @@ #define PARSE_TYPE_CONTINUE 5 #define PARSE_TYPE_RETURN 6 #define PARSE_TYPE_GOTO 7 -#define PARSE_TYPE_FOR 8 // extension -#define PARSE_TYPE_INT 9 // extension -#define PARSE_TYPE_BOOL 10 // extension -#define PARSE_TYPE_VOID 11 -#define PARSE_TYPE_STRING 12 -#define PARSE_TYPE_FLOAT 13 -#define PARSE_TYPE_VECTOR 14 -#define PARSE_TYPE_ENTITY 15 -#define PARSE_TYPE_LAND 16 -#define PARSE_TYPE_LOR 17 -#define PARSE_TYPE_LTEQ 18 -#define PARSE_TYPE_GTEQ 19 -#define PARSE_TYPE_EQEQ 20 -#define PARSE_TYPE_LNEQ 21 -#define PARSE_TYPE_COMMA 22 -#define PARSE_TYPE_LNOT 23 -#define PARSE_TYPE_STAR 24 -#define PARSE_TYPE_DIVIDE 25 -#define PARSE_TYPE_LPARTH 26 -#define PARSE_TYPE_RPARTH 27 -#define PARSE_TYPE_MINUS 28 -#define PARSE_TYPE_ADD 29 -#define PARSE_TYPE_EQUAL 30 -#define PARSE_TYPE_LSS 31 // left subscript -#define PARSE_TYPE_RSS 32 -#define PARSE_TYPE_LBS 33 // left bracket scope -#define PARSE_TYPE_RBS 34 // right bracket scope -#define PARSE_TYPE_ELIP 35 // ... -#define PARSE_TYPE_DOT 36 -#define PARSE_TYPE_LT 37 -#define PARSE_TYPE_GT 38 -#define PARSE_TYPE_BAND 39 -#define PARSE_TYPE_BOR 40 -#define PARSE_TYPE_DONE 41 // finished statement +#define PARSE_TYPE_FOR 8 +#define PARSE_TYPE_VOID 9 +#define PARSE_TYPE_STRING 10 +#define PARSE_TYPE_FLOAT 11 +#define PARSE_TYPE_VECTOR 12 +#define PARSE_TYPE_ENTITY 13 +#define PARSE_TYPE_LAND 14 +#define PARSE_TYPE_LOR 15 +#define PARSE_TYPE_LTEQ 16 +#define PARSE_TYPE_GTEQ 17 +#define PARSE_TYPE_EQEQ 18 +#define PARSE_TYPE_LNEQ 19 +#define PARSE_TYPE_COMMA 20 +#define PARSE_TYPE_LNOT 21 +#define PARSE_TYPE_STAR 22 +#define PARSE_TYPE_DIVIDE 23 +#define PARSE_TYPE_LPARTH 24 +#define PARSE_TYPE_RPARTH 25 +#define PARSE_TYPE_MINUS 26 +#define PARSE_TYPE_ADD 27 +#define PARSE_TYPE_EQUAL 28 +#define PARSE_TYPE_LBS 29 +#define PARSE_TYPE_RBS 30 +#define PARSE_TYPE_ELIP 31 +#define PARSE_TYPE_DOT 32 +#define PARSE_TYPE_LT 33 +#define PARSE_TYPE_GT 34 +#define PARSE_TYPE_BAND 35 +#define PARSE_TYPE_BOR 36 +#define PARSE_TYPE_DONE 37 +#define PARSE_TYPE_IDENT 38 /* * Adds a parse type to the parse tree, this is where all the hard @@ -85,82 +82,109 @@ parsetree = parsetree->next; \ } while (0) -static const char *const parse_punct[] = { +/* + * These are all the punctuation handled in the parser, these don't + * need tokens, they're already tokens. + */ +#if 0 "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*", "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", - "." , "<" , ">" , "&" , "|" , NULL -}; + "." , "<" , ">" , "&" , "|" , +#endif -#define STORE(X) { \ - printf(X); \ - break; \ +#define STORE(X) { \ + printf(X); \ + break; \ } void parse_debug(struct parsenode *tree) { - while (tree && tree->next != NULL) { - /* skip blanks */ - if (tree->type == 0) { - tree = tree->next; - continue; - } - + while (tree) { switch (tree->type) { - case PARSE_TYPE_ADD: STORE("ADD \n"); - case PARSE_TYPE_BAND: STORE("BITAND \n"); - case PARSE_TYPE_BOR: STORE("BITOR \n"); - case PARSE_TYPE_BREAK: STORE("BREAK \n"); - case PARSE_TYPE_COMMA: STORE("SEPERATOR\n"); - case PARSE_TYPE_CONTINUE: STORE("CONTINUE\n"); - case PARSE_TYPE_DIVIDE: STORE("DIVIDE\n"); - case PARSE_TYPE_EQUAL: STORE("ASSIGNMENT\n"); - case PARSE_TYPE_GOTO: STORE("GOTO\n"); - case PARSE_TYPE_DOT: STORE("DOT\n"); - + case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n"); + case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n"); + case PARSE_TYPE_BOR: STORE("OPERATOR: BITOR \n"); + case PARSE_TYPE_COMMA: STORE("OPERATOR: SEPERATOR\n"); + case PARSE_TYPE_DOT: STORE("OPERATOR: DOT\n"); + case PARSE_TYPE_DIVIDE: STORE("OPERATOR: DIVIDE\n"); + case PARSE_TYPE_EQUAL: STORE("OPERATOR: ASSIGNMENT\n"); + + case PARSE_TYPE_BREAK: STORE("STATEMENT: BREAK \n"); + case PARSE_TYPE_CONTINUE: STORE("STATEMENT: CONTINUE\n"); + case PARSE_TYPE_GOTO: STORE("STATEMENT: GOTO\n"); + case PARSE_TYPE_RETURN: STORE("STATEMENT: RETURN\n"); + case PARSE_TYPE_DONE: STORE("STATEMENT: DONE\n"); - case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n"); - case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n"); - case PARSE_TYPE_INT: STORE("DECLTYPE: INT\n"); - case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n"); - case PARSE_TYPE_BOOL: STORE("DECLTYPE: BOOL\n"); + case PARSE_TYPE_VOID: STORE("DECLTYPE: VOID\n"); + case PARSE_TYPE_STRING: STORE("DECLTYPE: STRING\n"); + case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n"); + case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n"); + case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n"); + case PARSE_TYPE_VECTOR: STORE("DECLTYPE: VECTOR\n"); + + case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n"); + case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n"); + case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n"); + case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n"); + case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n"); + case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n"); + + case PARSE_TYPE_LBS: STORE("BLOCK: BEG\n"); + case PARSE_TYPE_RBS: STORE("BLOCK: END\n"); + case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n"); + case PARSE_TYPE_IF: STORE("BLOCK: IF\n"); - case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n"); - case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n"); - case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n"); - case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n"); - case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n"); - case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n"); + case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n"); + case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n"); + case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n"); - case PARSE_TYPE_LBS: break; - case PARSE_TYPE_RBS: break; + case PARSE_TYPE_LPARTH: STORE("PARTH: BEG\n"); + case PARSE_TYPE_RPARTH: STORE("PARTH: END\n"); - case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n"); - case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n"); - case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n"); - case PARSE_TYPE_LPARTH: STORE("PARTH: END\n"); - case PARSE_TYPE_RPARTH: STORE("PARTH: BEG\n"); + case PARSE_TYPE_WHILE: STORE("LOOP: WHILE\n"); + case PARSE_TYPE_FOR: STORE("LOOP: FOR\n"); + case PARSE_TYPE_DO: STORE("LOOP: DO\n"); - case PARSE_TYPE_FOR: STORE("LOOP: FOR\n"); - case PARSE_TYPE_DO: STORE("LOOP: DO\n"); - case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n"); - case PARSE_TYPE_IF: STORE("BLOCK: IF\n"); + //case PARSE_TYPE_IDENT: STORE("IDENT: ???\n"); } tree = tree->next; } } /* - * This just skips the token and throws it in the parse tree for later - * checking / optimization / codegen, it doesn't do anything with it - * like syntax check for legal use -- like it should as it's a TODO item - * which is not implemented + * Performs a parse operation: This is a macro to prevent bugs, if the + * calls to lex_token are'nt exactly enough to feed to the end of the + * actual lexees for the current thing that is being parsed, the state + * of the next iteration in the creation of the parse tree will be wrong + * and everything will fail. */ -#define PARSE_TODO(X) { \ - token = lex_token(file); \ - PARSE_TREE_ADD(X); \ - break; \ +#define PARSE_PERFORM(X,C) { \ + token = lex_token(file); \ + { C } \ + while (token != '\n') { \ + token = lex_token(file); \ + } \ + PARSE_TREE_ADD(X); \ + break; \ } -int parse(struct lex_file *file) { +void parse_clear(struct parsenode *tree) { + if (!tree) return; + struct parsenode *temp = NULL; + while (tree != NULL) { + temp = tree; + tree = tree->next; + mem_d (temp); + } + + /* free any potential typedefs */ + typedef_clear(); +} + +/* + * Generates a parse tree out of the lexees generated by the lexer. This + * is where the tree is built. This is where valid check is performed. + */ +int parse_tree(struct lex_file *file) { struct parsenode *parsetree = NULL; struct parsenode *parseroot = NULL; @@ -173,8 +197,8 @@ int parse(struct lex_file *file) { parseroot = mem_a(sizeof(struct parsenode)); if (!parseroot) return error(ERROR_INTERNAL, "Ran out of memory", " "); - parsetree = parseroot; - parsetree = parseroot; + parsetree = parseroot; + parsetree->type = -1; /* not a valid type -- root element */ } int token = 0; @@ -185,35 +209,26 @@ int parse(struct lex_file *file) { token != ERROR_PREPRO && file->length >= 0) { switch (token) { case TOKEN_IF: - token = lex_token(file); while ((token == ' ' || token == '\n') && file->length >= 0) token = lex_token(file); - - if (token != '(') - error(ERROR_PARSE, "Expected `(` after if\n", ""); - PARSE_TREE_ADD(PARSE_TYPE_IF); break; case TOKEN_ELSE: token = lex_token(file); - while ((token == ' ' || token == '\n') && file->length >= 0) - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_ELSE); break; case TOKEN_FOR: - token = lex_token(file); + //token = lex_token(file); while ((token == ' ' || token == '\n') && file->length >= 0) token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_FOR); break; - - case LEX_IDENT: - token = lex_token(file); - printf("FOO: %s\n", file->lastok); - break; - + + /* + * This is a quick and easy way to do typedefs at parse time + * all power is in typedef_add(), in typedef.c. We handle + * the tokens accordingly here. + */ case TOKEN_TYPEDEF: { char *f = NULL; char *t = NULL; @@ -224,43 +239,70 @@ int parse(struct lex_file *file) { typedef_add(f, t); - /* free new strings */ - mem_d(f); - mem_d(t); + free(f); + free(t); + + while (token != '\n') + token = lex_token(file); break; } + + /* + * Returns are addable as-is, statement checking is during + * the actual parse tree check. + */ + case TOKEN_RETURN: + PARSE_TREE_ADD(PARSE_TYPE_RETURN); + break; + //PARSE_PERFORM(PARSE_TYPE_RETURN, {}); + + case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {}); + case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {}); + case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {}); + case TOKEN_CONTINUE: PARSE_PERFORM(PARSE_TYPE_CONTINUE,{}); + case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {}); + case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {}); + case TOKEN_STRING: PARSE_PERFORM(PARSE_TYPE_STRING, {}); + case TOKEN_FLOAT: PARSE_PERFORM(PARSE_TYPE_FLOAT, {}); + case TOKEN_VECTOR: PARSE_PERFORM(PARSE_TYPE_VECTOR, {}); + case TOKEN_ENTITY: PARSE_PERFORM(PARSE_TYPE_ENTITY, {}); - case TOKEN_DO: PARSE_TODO(PARSE_TYPE_DO); - case TOKEN_WHILE: PARSE_TODO(PARSE_TYPE_WHILE); - case TOKEN_BREAK: PARSE_TODO(PARSE_TYPE_BREAK); - case TOKEN_CONTINUE: PARSE_TODO(PARSE_TYPE_CONTINUE); - case TOKEN_RETURN: PARSE_TODO(PARSE_TYPE_RETURN); - case TOKEN_GOTO: PARSE_TODO(PARSE_TYPE_GOTO); - case TOKEN_INT: PARSE_TODO(PARSE_TYPE_INT); - case TOKEN_VOID: PARSE_TODO(PARSE_TYPE_VOID); - case TOKEN_STRING: PARSE_TODO(PARSE_TYPE_STRING); - case TOKEN_FLOAT: PARSE_TODO(PARSE_TYPE_FLOAT); - case TOKEN_VECTOR: PARSE_TODO(PARSE_TYPE_VECTOR); - case TOKEN_ENTITY: PARSE_TODO(PARSE_TYPE_ENTITY); - - /* TODO: Preprocessor */ + /* + * From here down is all language punctuation: There is no + * need to actual create tokens from these because they're already + * tokenized as these individual tokens (which are in a special area + * of the ascii table which doesn't conflict with our other tokens + * which are higer than the ascii table.) + */ case '#': + /* + * Skip the preprocessor for now: We'll implement our own + * eventually. For now we need to make sure directives are + * not accidently tokenized. + */ token = lex_token(file); token = lex_token(file); + + /* skip all tokens to end of directive */ + while (token != '\n') + token = lex_token(file); + break; + + case '.': token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_DOT); + break; + + case '(': token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_LPARTH); + break; + case ')': token = lex_token(file); - token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_RPARTH); break; - /* - * From here down is all language punctuation: There is no - * need to actual create tokens from these because they're already - * tokenized as these individual tokens (which are in a special area - * of the ascii table which doesn't conflict with our other tokens - * which are higer than the ascii table. - */ case '&': /* & */ token = lex_token(file); if (token == '&') { /* && */ @@ -327,14 +369,6 @@ int parse(struct lex_file *file) { token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_ADD); break; - case '(': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LPARTH); - break; - case ')': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_RPARTH); - break; case '{': token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_LBS); @@ -343,10 +377,21 @@ int parse(struct lex_file *file) { token = lex_token(file); PARSE_TREE_ADD(PARSE_TYPE_RBS); break; + + /* + * TODO: Fix lexer to spit out ( ) as tokens, it seems the + * using '(' or ')' in parser doesn't work properly unless + * there are spaces before them to allow the lexer to properly + * seperate identifiers. -- otherwise it eats all of it. + */ + case LEX_IDENT: + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_IDENT); + break; } } parse_debug(parseroot); lex_reset(file); - + parse_clear(parseroot); return 1; }