X-Git-Url: https://git.xonotic.org/?a=blobdiff_plain;f=parse.c;h=0e1c6a511d84473c70187ebbd7cc0fa2f5e4f012;hb=bd5ba9e0fe0e9bdaea3f869f55d8f40651a2d0fb;hp=939831949df4b6c98107f140ce7c2e747c0050e0;hpb=e6a270a7e00abd3bcc1b587594e21ed75e341f43;p=xonotic%2Fgmqcc.git diff --git a/parse.c b/parse.c index 9398319..0e1c6a5 100644 --- a/parse.c +++ b/parse.c @@ -21,22 +21,186 @@ * SOFTWARE. */ #include +#include +#include #include "gmqcc.h" +/* + * These are not lexical tokens: These are parse tree types. Most people + * perform tokenizing on language punctuation which is wrong. That stuff + * is technically already tokenized, it just needs to be parsed into a tree + */ +#define PARSE_TYPE_DO 0 +#define PARSE_TYPE_ELSE 1 +#define PARSE_TYPE_IF 2 +#define PARSE_TYPE_WHILE 3 +#define PARSE_TYPE_BREAK 4 +#define PARSE_TYPE_CONTINUE 5 +#define PARSE_TYPE_RETURN 6 +#define PARSE_TYPE_GOTO 7 +#define PARSE_TYPE_FOR 8 +#define PARSE_TYPE_VOID 9 +#define PARSE_TYPE_STRING 10 +#define PARSE_TYPE_FLOAT 11 +#define PARSE_TYPE_VECTOR 12 +#define PARSE_TYPE_ENTITY 13 +#define PARSE_TYPE_LAND 14 +#define PARSE_TYPE_LOR 15 +#define PARSE_TYPE_LTEQ 16 +#define PARSE_TYPE_GTEQ 17 +#define PARSE_TYPE_EQEQ 18 +#define PARSE_TYPE_LNEQ 19 +#define PARSE_TYPE_COMMA 20 +#define PARSE_TYPE_LNOT 21 +#define PARSE_TYPE_STAR 22 +#define PARSE_TYPE_DIVIDE 23 +#define PARSE_TYPE_LPARTH 24 +#define PARSE_TYPE_RPARTH 25 +#define PARSE_TYPE_MINUS 26 +#define PARSE_TYPE_ADD 27 +#define PARSE_TYPE_EQUAL 28 +#define PARSE_TYPE_LBS 29 +#define PARSE_TYPE_RBS 30 +#define PARSE_TYPE_ELIP 31 +#define PARSE_TYPE_DOT 32 +#define PARSE_TYPE_LT 33 +#define PARSE_TYPE_GT 34 +#define PARSE_TYPE_BAND 35 +#define PARSE_TYPE_BOR 36 +#define PARSE_TYPE_DONE 37 +#define PARSE_TYPE_IDENT 38 + +/* + * Adds a parse type to the parse tree, this is where all the hard + * work actually begins. + */ +#define PARSE_TREE_ADD(X) \ + do { \ + parsetree->next = mem_a(sizeof(struct parsenode)); \ + parsetree->next->next = NULL; \ + parsetree->next->type = (X); \ + parsetree = parsetree->next; \ + } while (0) -static const char *const parse_punct[] = { +/* + * These are all the punctuation handled in the parser, these don't + * need tokens, they're already tokens. + */ +#if 0 "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*", - "/" , "(" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", - "." , "<" , ">" , "#" , "&" , "|" , "$", "@", ":", NULL - /* - * $,@,: are extensions: - * $ is a shorter `self`, so instead of self.frags, $.frags - * @ is a constructor - * : is compiler builtin functions - */ -}; + "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", + "." , "<" , ">" , "&" , "|" , +#endif + +#define STORE(X) { \ + printf(X); \ + break; \ +} + +void parse_debug(struct parsenode *tree) { + while (tree) { + switch (tree->type) { + case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n"); + case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n"); + case PARSE_TYPE_BOR: STORE("OPERATOR: BITOR \n"); + case PARSE_TYPE_COMMA: STORE("OPERATOR: SEPERATOR\n"); + case PARSE_TYPE_DOT: STORE("OPERATOR: DOT\n"); + case PARSE_TYPE_DIVIDE: STORE("OPERATOR: DIVIDE\n"); + case PARSE_TYPE_EQUAL: STORE("OPERATOR: ASSIGNMENT\n"); + + case PARSE_TYPE_BREAK: STORE("STATEMENT: BREAK \n"); + case PARSE_TYPE_CONTINUE: STORE("STATEMENT: CONTINUE\n"); + case PARSE_TYPE_GOTO: STORE("STATEMENT: GOTO\n"); + case PARSE_TYPE_RETURN: STORE("STATEMENT: RETURN\n"); + case PARSE_TYPE_DONE: STORE("STATEMENT: DONE\n"); + + case PARSE_TYPE_VOID: STORE("DECLTYPE: VOID\n"); + case PARSE_TYPE_STRING: STORE("DECLTYPE: STRING\n"); + case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n"); + case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n"); + case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n"); + case PARSE_TYPE_VECTOR: STORE("DECLTYPE: VECTOR\n"); + + case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n"); + case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n"); + case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n"); + case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n"); + case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n"); + case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n"); + + case PARSE_TYPE_LBS: STORE("BLOCK: BEG\n"); + case PARSE_TYPE_RBS: STORE("BLOCK: END\n"); + case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n"); + case PARSE_TYPE_IF: STORE("BLOCK: IF\n"); + + case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n"); + case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n"); + case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n"); + + case PARSE_TYPE_LPARTH: STORE("PARTH: BEG\n"); + case PARSE_TYPE_RPARTH: STORE("PARTH: END\n"); + + case PARSE_TYPE_WHILE: STORE("LOOP: WHILE\n"); + case PARSE_TYPE_FOR: STORE("LOOP: FOR\n"); + case PARSE_TYPE_DO: STORE("LOOP: DO\n"); + + //case PARSE_TYPE_IDENT: STORE("IDENT: ???\n"); + } + tree = tree->next; + } +} + +/* + * Performs a parse operation: This is a macro to prevent bugs, if the + * calls to lex_token are'nt exactly enough to feed to the end of the + * actual lexees for the current thing that is being parsed, the state + * of the next iteration in the creation of the parse tree will be wrong + * and everything will fail. + */ +#define PARSE_PERFORM(X,C) { \ + token = lex_token(file); \ + { C } \ + while (token != '\n') { \ + token = lex_token(file); \ + } \ + PARSE_TREE_ADD(X); \ + break; \ +} + +void parse_clear(struct parsenode *tree) { + if (!tree) return; + struct parsenode *temp = NULL; + while (tree != NULL) { + temp = tree; + tree = tree->next; + mem_d (temp); + } + + /* free any potential typedefs */ + typedef_clear(); +} -int parse(struct lex_file *file) { +/* + * Generates a parse tree out of the lexees generated by the lexer. This + * is where the tree is built. This is where valid check is performed. + */ +int parse_tree(struct lex_file *file) { + struct parsenode *parsetree = NULL; + struct parsenode *parseroot = NULL; + + /* + * Allocate memory for our parse tree: + * the parse tree is just a singly linked list which will contain + * all the data for code generation. + */ + if (!parseroot) { + parseroot = mem_a(sizeof(struct parsenode)); + if (!parseroot) + return error(ERROR_INTERNAL, "Ran out of memory", " "); + parsetree = parseroot; + parsetree->type = -1; /* not a valid type -- root element */ + } + int token = 0; while ((token = lex_token(file)) != ERROR_LEX && \ token != ERROR_COMPILER && \ @@ -45,99 +209,189 @@ int parse(struct lex_file *file) { token != ERROR_PREPRO && file->length >= 0) { switch (token) { case TOKEN_IF: + while ((token == ' ' || token == '\n') && file->length >= 0) + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_IF); + break; + case TOKEN_ELSE: token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_ELSE); + break; + case TOKEN_FOR: + //token = lex_token(file); while ((token == ' ' || token == '\n') && file->length >= 0) token = lex_token(file); - - if (token != '(') - error(ERROR_PARSE, "Expected `(` after if\n", ""); + PARSE_TREE_ADD(PARSE_TYPE_FOR); + break; + + /* + * This is a quick and easy way to do typedefs at parse time + * all power is in typedef_add(), in typedef.c. We handle + * the tokens accordingly here. + */ + case TOKEN_TYPEDEF: { + char *f = NULL; + char *t = NULL; + token = lex_token(file); + token = lex_token(file); f = strdup(file->lastok); + token = lex_token(file); + token = lex_token(file); t = strdup(file->lastok); + + typedef_add(f, t); + + free(f); + free(t); + + while (token != '\n') + token = lex_token(file); + break; + } + + /* + * Returns are addable as-is, statement checking is during + * the actual parse tree check. + */ + case TOKEN_RETURN: + PARSE_TREE_ADD(PARSE_TYPE_RETURN); break; + //PARSE_PERFORM(PARSE_TYPE_RETURN, {}); - /* TODO: Preprocessor */ + + case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {}); + case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {}); + case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {}); + case TOKEN_CONTINUE: PARSE_PERFORM(PARSE_TYPE_CONTINUE,{}); + case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {}); + case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {}); + case TOKEN_STRING: PARSE_PERFORM(PARSE_TYPE_STRING, {}); + case TOKEN_FLOAT: PARSE_PERFORM(PARSE_TYPE_FLOAT, {}); + case TOKEN_VECTOR: PARSE_PERFORM(PARSE_TYPE_VECTOR, {}); + case TOKEN_ENTITY: PARSE_PERFORM(PARSE_TYPE_ENTITY, {}); + + /* + * From here down is all language punctuation: There is no + * need to actual create tokens from these because they're already + * tokenized as these individual tokens (which are in a special area + * of the ascii table which doesn't conflict with our other tokens + * which are higer than the ascii table.) + */ case '#': + /* + * Skip the preprocessor for now: We'll implement our own + * eventually. For now we need to make sure directives are + * not accidently tokenized. + */ token = lex_token(file); token = lex_token(file); + + /* skip all tokens to end of directive */ + while (token != '\n') + token = lex_token(file); + break; + + case '.': token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_DOT); + break; + + case '(': token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_LPARTH); + break; + case ')': token = lex_token(file); - token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_RPARTH); break; - /* PUNCTUATION PARSING BEGINS */ case '&': /* & */ token = lex_token(file); if (token == '&') { /* && */ token = lex_token(file); - printf("--> LOGICAL AND\n"); - goto end; + PARSE_TREE_ADD(PARSE_TYPE_LAND); + break; } - printf("--> BITWISE AND\n"); + PARSE_TREE_ADD(PARSE_TYPE_BAND); break; case '|': /* | */ token = lex_token(file); if (token == '|') { /* || */ token = lex_token(file); - printf("--> LOGICAL OR\n"); - goto end; + PARSE_TREE_ADD(PARSE_TYPE_LOR); + break; } - printf("--> BITWISE OR\n"); + PARSE_TREE_ADD(PARSE_TYPE_BOR); break; case '!': token = lex_token(file); if (token == '=') { /* != */ token = lex_token(file); - printf("--> LOGICAL NOT EQUAL\n"); - goto end; + PARSE_TREE_ADD(PARSE_TYPE_LNEQ); + break; } - printf("--> LOGICAL NOT\n"); + PARSE_TREE_ADD(PARSE_TYPE_LNOT); break; case '<': /* < */ token = lex_token(file); if (token == '=') { /* <= */ token = lex_token(file); - printf("--> LESS THAN OR EQUALL\n"); - goto end; + PARSE_TREE_ADD(PARSE_TYPE_LTEQ); + break; } - printf("--> LESS THAN\n"); + PARSE_TREE_ADD(PARSE_TYPE_LT); break; case '>': /* > */ token = lex_token(file); if (token == '=') { /* >= */ token = lex_token(file); - printf("--> GREATER THAN OR EQUAL\n"); - goto end; + PARSE_TREE_ADD(PARSE_TYPE_GTEQ); + break; } - printf("--> GREATER THAN\n"); + PARSE_TREE_ADD(PARSE_TYPE_GT); break; case '=': token = lex_token(file); if (token == '=') { /* == */ token = lex_token(file); - printf("--> COMPARISION \n"); - goto end; + PARSE_TREE_ADD(PARSE_TYPE_EQEQ); + break; } - printf("--> ASSIGNMENT\n"); + PARSE_TREE_ADD(PARSE_TYPE_EQUAL); break; case ';': token = lex_token(file); - printf("--> FINISHED STATMENT\n"); + PARSE_TREE_ADD(PARSE_TYPE_DONE); break; case '-': token = lex_token(file); - printf("--> SUBTRACTION EXPRESSION\n"); + PARSE_TREE_ADD(PARSE_TYPE_MINUS); break; case '+': token = lex_token(file); - printf("--> ASSIGNMENT EXPRRESSION\n"); + PARSE_TREE_ADD(PARSE_TYPE_ADD); + break; + case '{': + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_LBS); + break; + case '}': + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_RBS); + break; + + /* + * TODO: Fix lexer to spit out ( ) as tokens, it seems the + * using '(' or ')' in parser doesn't work properly unless + * there are spaces before them to allow the lexer to properly + * seperate identifiers. -- otherwise it eats all of it. + */ + case LEX_IDENT: + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_IDENT); break; } - end:; } + parse_debug(parseroot); lex_reset(file); - - // "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*", - //"/" , "(" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", - //"." , "<" , ">" , "#" , "&" , "|" , "$", "@", ":", NULL - + parse_clear(parseroot); return 1; }