X-Git-Url: https://git.xonotic.org/?a=blobdiff_plain;f=parse.c;h=2d52efdec35e8b14e6c4f4854b6ef669d33e3d9e;hb=e9b581adfbb079864f7ef5f198e238caaad80181;hp=3cfa59644329217db5d49f35985bd439e042fea9;hpb=0c824115cf6075f40ab5925f78df8b438b81d6bc;p=xonotic%2Fgmqcc.git diff --git a/parse.c b/parse.c index 3cfa596..2d52efd 100644 --- a/parse.c +++ b/parse.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 - * Dale Weiler + * Dale Weiler * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in @@ -20,321 +20,285 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include -#include #include "gmqcc.h" -/* - * These are not lexical tokens: These are parse tree types. Most people - * perform tokenizing on language punctuation which is wrong. That stuff - * is technically already tokenized, it just needs to be parsed into a tree - */ -#define PARSE_TYPE_DO 0 -#define PARSE_TYPE_ELSE 1 -#define PARSE_TYPE_IF 2 -#define PARSE_TYPE_WHILE 3 -#define PARSE_TYPE_BREAK 4 -#define PARSE_TYPE_CONTINUE 5 -#define PARSE_TYPE_RETURN 6 -#define PARSE_TYPE_GOTO 7 -#define PARSE_TYPE_FOR 8 // extension -#define PARSE_TYPE_VOID 9 -#define PARSE_TYPE_STRING 10 -#define PARSE_TYPE_FLOAT 11 -#define PARSE_TYPE_VECTOR 12 -#define PARSE_TYPE_ENTITY 13 -#define PARSE_TYPE_LAND 14 -#define PARSE_TYPE_LOR 15 -#define PARSE_TYPE_LTEQ 16 -#define PARSE_TYPE_GTEQ 17 -#define PARSE_TYPE_EQEQ 18 -#define PARSE_TYPE_LNEQ 19 -#define PARSE_TYPE_COMMA 20 -#define PARSE_TYPE_LNOT 21 -#define PARSE_TYPE_STAR 22 -#define PARSE_TYPE_DIVIDE 23 -#define PARSE_TYPE_LPARTH 24 -#define PARSE_TYPE_RPARTH 25 -#define PARSE_TYPE_MINUS 26 -#define PARSE_TYPE_ADD 27 -#define PARSE_TYPE_EQUAL 28 -#define PARSE_TYPE_LSS 29 // left subscript -#define PARSE_TYPE_RSS 30 -#define PARSE_TYPE_LBS 31 // left bracket scope -#define PARSE_TYPE_RBS 32 // right bracket scope -#define PARSE_TYPE_ELIP 33 // ... -#define PARSE_TYPE_DOT 34 -#define PARSE_TYPE_LT 35 -#define PARSE_TYPE_GT 36 -#define PARSE_TYPE_BAND 37 -#define PARSE_TYPE_BOR 38 -#define PARSE_TYPE_DONE 39 // finished statement - -/* - * Adds a parse type to the parse tree, this is where all the hard - * work actually begins. - */ -#define PARSE_TREE_ADD(X) \ - do { \ - parsetree->next = mem_a(sizeof(struct parsenode)); \ - parsetree->next->next = NULL; \ - parsetree->next->type = (X); \ - parsetree = parsetree->next; \ - } while (0) - -static const char *const parse_punct[] = { - "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*", - "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", - "." , "<" , ">" , "&" , "|" , NULL -}; - -#define STORE(X) { \ - printf(X); \ - break; \ -} +/* compile-time constant for type constants */ +typedef struct { + char *name; + int type; + float value[3]; + char *string; /* string value if constant is string literal */ +} constant; +VECTOR_MAKE(constant, compile_constants); -void parse_debug(struct parsenode *tree) { - while (tree && tree->next != NULL) { - /* skip blanks */ - if (tree->type == 0) { - tree = tree->next; - continue; - } - - switch (tree->type) { - case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n"); - case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n"); - case PARSE_TYPE_BOR: STORE("OPERATOR: BITOR \n"); - case PARSE_TYPE_COMMA: STORE("OPERATOR: SEPERATOR\n"); - case PARSE_TYPE_DOT: STORE("OPERATOR: DOT\n"); - case PARSE_TYPE_DIVIDE: STORE("OPERATOR: DIVIDE\n"); - case PARSE_TYPE_EQUAL: STORE("OPERATOR: ASSIGNMENT\n"); - - case PARSE_TYPE_BREAK: STORE("STATEMENT: BREAK \n"); - case PARSE_TYPE_CONTINUE: STORE("STATEMENT: CONTINUE\n"); - case PARSE_TYPE_GOTO: STORE("STATEMENT: GOTO\n"); - - - case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n"); - case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n"); - case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n"); - - case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n"); - case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n"); - case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n"); - case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n"); - case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n"); - case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n"); - - case PARSE_TYPE_LBS: STORE("BLOCK: BEG\n"); - case PARSE_TYPE_RBS: STORE("BLOCK: END\n"); - - case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n"); - case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n"); - case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n"); - - case PARSE_TYPE_LPARTH: STORE("PARTH: BEG\n"); - case PARSE_TYPE_RPARTH: STORE("PARTH: END\n"); - - case PARSE_TYPE_FOR: STORE("LOOP: FOR\n"); - case PARSE_TYPE_DO: STORE("LOOP: DO\n"); - - case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n"); - case PARSE_TYPE_IF: STORE("BLOCK: IF\n"); - } - tree = tree->next; - } +void compile_constant_debug() { + int iter = 0; + for(; iter < compile_constants_elements; iter++) { + constant *c = &compile_constants_data[iter]; + switch(c->type) { + case TYPE_FLOAT: printf("constant: %s FLOAT %f\n", c->name, c->value[0]); break; + case TYPE_VECTOR: printf("constant: %s VECTOR {%f,%f,%f}\n",c->name, c->value[0], c->value[1], c->value[2]); break; + case TYPE_STRING: printf("constant: %s STRING %s\n", c->name, c->string); break; + case TYPE_VOID: printf("constant: %s VOID %s\n", c->name, c->string); break; + } + } } /* - * This just skips the token and throws it in the parse tree for later - * checking / optimization / codegen, it doesn't do anything with it - * like syntax check for legal use -- like it should as it's a TODO item - * which is not implemented + * Generates a parse tree out of the lexees generated by the lexer. This + * is where the tree is built. This is where valid check is performed. */ -#define PARSE_TODO(X) { \ - token = lex_token(file); \ - PARSE_TREE_ADD(X); \ - break; \ -} - -int parse(struct lex_file *file) { - struct parsenode *parsetree = NULL; - struct parsenode *parseroot = NULL; - - /* - * Allocate memory for our parse tree: - * the parse tree is just a singly linked list which will contain - * all the data for code generation. - */ - if (!parseroot) { - parseroot = mem_a(sizeof(struct parsenode)); - if (!parseroot) - return error(ERROR_INTERNAL, "Ran out of memory", " "); - parsetree = parseroot; - parsetree = parseroot; - } - - int token = 0; - while ((token = lex_token(file)) != ERROR_LEX && \ - token != ERROR_COMPILER && \ - token != ERROR_INTERNAL && \ - token != ERROR_PARSE && \ - token != ERROR_PREPRO && file->length >= 0) { - switch (token) { - case TOKEN_IF: - token = lex_token(file); - while ((token == ' ' || token == '\n') && file->length >= 0) - token = lex_token(file); - - if (token != '(') - error(ERROR_PARSE, "Expected `(` after if\n", ""); - - PARSE_TREE_ADD(PARSE_TYPE_IF); - break; - case TOKEN_ELSE: - token = lex_token(file); - while ((token == ' ' || token == '\n') && file->length >= 0) - token = lex_token(file); - - PARSE_TREE_ADD(PARSE_TYPE_ELSE); - break; - case TOKEN_FOR: - token = lex_token(file); - while ((token == ' ' || token == '\n') && file->length >= 0) - token = lex_token(file); - - PARSE_TREE_ADD(PARSE_TYPE_FOR); - break; - - case LEX_IDENT: - token = lex_token(file); - printf("FOO: %s\n", file->lastok); - break; - - case TOKEN_TYPEDEF: { - char *f = NULL; - char *t = NULL; - token = lex_token(file); - token = lex_token(file); f = strdup(file->lastok); - token = lex_token(file); - token = lex_token(file); t = strdup(file->lastok); - - typedef_add(f, t); - - /* free new strings */ - mem_d(f); - mem_d(t); - break; - } - - - case TOKEN_DO: PARSE_TODO(PARSE_TYPE_DO); - case TOKEN_WHILE: PARSE_TODO(PARSE_TYPE_WHILE); - case TOKEN_BREAK: PARSE_TODO(PARSE_TYPE_BREAK); - case TOKEN_CONTINUE: PARSE_TODO(PARSE_TYPE_CONTINUE); - case TOKEN_RETURN: PARSE_TODO(PARSE_TYPE_RETURN); - case TOKEN_GOTO: PARSE_TODO(PARSE_TYPE_GOTO); - case TOKEN_VOID: PARSE_TODO(PARSE_TYPE_VOID); - case TOKEN_STRING: PARSE_TODO(PARSE_TYPE_STRING); - case TOKEN_FLOAT: PARSE_TODO(PARSE_TYPE_FLOAT); - case TOKEN_VECTOR: PARSE_TODO(PARSE_TYPE_VECTOR); - case TOKEN_ENTITY: PARSE_TODO(PARSE_TYPE_ENTITY); - - /* - * From here down is all language punctuation: There is no - * need to actual create tokens from these because they're already - * tokenized as these individual tokens (which are in a special area - * of the ascii table which doesn't conflict with our other tokens - * which are higer than the ascii table. - */ - case '&': /* & */ - token = lex_token(file); - if (token == '&') { /* && */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LAND); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_BAND); - break; - case '|': /* | */ - token = lex_token(file); - if (token == '|') { /* || */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LOR); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_BOR); - break; - case '!': - token = lex_token(file); - if (token == '=') { /* != */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LNEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_LNOT); - break; - case '<': /* < */ - token = lex_token(file); - if (token == '=') { /* <= */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LTEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_LT); - break; - case '>': /* > */ - token = lex_token(file); - if (token == '=') { /* >= */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_GTEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_GT); - break; - case '=': - token = lex_token(file); - if (token == '=') { /* == */ - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_EQEQ); - break; - } - PARSE_TREE_ADD(PARSE_TYPE_EQUAL); - break; - case ';': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_DONE); - break; - case '-': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_MINUS); - break; - case '+': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_ADD); - break; - case '(': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LPARTH); - break; - case ')': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_RPARTH); - break; - case '{': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_LBS); - break; - case '}': - token = lex_token(file); - PARSE_TREE_ADD(PARSE_TYPE_RBS); - break; +int parse_gen(struct lex_file *file) { + int token = 0; + while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) { + switch (token) { + case TOKEN_TYPEDEF: { + char *f; /* from */ + char *t; /* to */ + + token = lex_token(file); + token = lex_token(file); f = util_strdup(file->lastok); + token = lex_token(file); + token = lex_token(file); t = util_strdup(file->lastok); + + typedef_add(file, f, t); + mem_d(f); + mem_d(t); + + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + if (token != ';') + error(file, ERROR_PARSE, "Expected a `;` at end of typedef statement"); + + token = lex_token(file); + break; + } + + case TOKEN_VOID: goto fall; + case TOKEN_STRING: goto fall; + case TOKEN_VECTOR: goto fall; + case TOKEN_ENTITY: goto fall; + case TOKEN_FLOAT: goto fall; + { + fall:; + char *name = NULL; + int type = token; /* story copy */ + + /* skip over space */ + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + /* save name */ + name = util_strdup(file->lastok); + + /* skip spaces */ + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + if (token == ';') { + /* + * Definitions go to the defs table, they don't have + * any sort of data with them yet. + */ + } else if (token == '=') { + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + + /* strings are in file->lastok */ + switch (type) { + case TOKEN_VOID: + error(file, ERROR_PARSE, "Cannot assign value to type void\n"); + + /* TODO: Validate (end quote), strip quotes for constant add, name constant */ + case TOKEN_STRING: + if (*file->lastok != '"') + error(file, ERROR_PARSE, "Expected a '\"' (quote) for string constant\n"); + /* add the compile-time constant */ + compile_constants_add((constant){ + .name = util_strdup(name), + .type = TYPE_STRING, + .value = {0,0,0}, + .string = util_strdup(file->lastok) + }); + break; + /* TODO: name constant, old qc vec literals, whitespace fixes, name constant */ + case TOKEN_VECTOR: { + float compile_calc_x = 0; + float compile_calc_y = 0; + float compile_calc_z = 0; + int compile_calc_d = 0; /* dot? */ + int compile_calc_s = 0; /* sign (-, +) */ + + char compile_data[1024]; + char *compile_eval = compile_data; + + if (token != '{') + error(file, ERROR_PARSE, "Expected initializer list {} for vector constant\n"); + + /* + * This parses a single vector element: x,y & z. This will handle all the + * complicated mechanics of a vector, and can be extended as well. This + * is a rather large macro, and is #undef'd after it's use below. + */ + #define PARSE_VEC_ELEMENT(NAME, BIT) \ + token = lex_token(file); \ + if (token == ' ') \ + token = lex_token(file); \ + if (token == '.') \ + compile_calc_d = 1; \ + if (!isdigit(token) && !compile_calc_d && token != '+' && token != '-') \ + error(file, ERROR_PARSE,"Invalid constant initializer element %c for vector, must be numeric\n", NAME); \ + if (token == '+') \ + compile_calc_s = '+'; \ + if (token == '-' && !compile_calc_s) \ + compile_calc_s = '-'; \ + while (isdigit(token) || token == '.' || token == '+' || token == '-') { \ + *compile_eval++ = token; \ + token = lex_token(file); \ + if (token == '.' && compile_calc_d) { \ + error(file, ERROR_PARSE, "Invalid constant initializer element %c for vector, must be numeric.\n", NAME); \ + token = lex_token(file); \ + } \ + if ((token == '-' || token == '+') && compile_calc_s) { \ + error(file, ERROR_PARSE, "Invalid constant initializer sign for vector element %c\n", NAME); \ + token = lex_token(file); \ + } \ + else if (token == '.' && !compile_calc_d) \ + compile_calc_d = 1; \ + else if (token == '-' && !compile_calc_s) \ + compile_calc_s = '-'; \ + else if (token == '+' && !compile_calc_s) \ + compile_calc_s = '+'; \ + } \ + if (token == ' ') \ + token = lex_token(file); \ + if (NAME != 'z') { \ + if (token != ',' && token != ' ') \ + error(file, ERROR_PARSE, "invalid constant initializer element %c for vector (missing spaces, or comma delimited list?)\n", NAME); \ + } else if (token != '}') { \ + error(file, ERROR_PARSE, "Expected `}` on end of constant initialization for vector\n"); \ + } \ + compile_calc_##BIT = atof(compile_data); \ + compile_calc_d = 0; \ + compile_calc_s = 0; \ + compile_eval = &compile_data[0]; \ + memset(compile_data, 0, sizeof(compile_data)) + + /* + * Parse all elements using the macro above. + * We must undef the macro afterwards. + */ + PARSE_VEC_ELEMENT('x', x); + PARSE_VEC_ELEMENT('y', y); + PARSE_VEC_ELEMENT('z', z); + #undef PARSE_VEC_ELEMENT + + /* Check for the semi-colon... */ + token = lex_token(file); + if (token == ' ') + token = lex_token(file); + if (token != ';') + error(file, ERROR_PARSE, "Expected `;` on end of constant initialization for vector\n"); + + /* add the compile-time constant */ + compile_constants_add((constant){ + .name = util_strdup(name), + .type = TYPE_VECTOR, + .value = { + [0] = compile_calc_x, + [1] = compile_calc_y, + [2] = compile_calc_z + }, + .string = NULL + }); + break; + } + + case TOKEN_ENTITY: + case TOKEN_FLOAT: /*TODO: validate, constant generation, name constant */ + if (!isdigit(token)) + error(file, ERROR_PARSE, "Expected numeric constant for float constant\n"); + compile_constants_add((constant){ + .name = util_strdup(name), + .type = TOKEN_FLOAT, + .value = {0,0,0}, + .string = NULL + }); + break; + } + } else if (token == '(') { + printf("FUNCTION ??\n"); + } + mem_d(name); + } + + /* + * From here down is all language punctuation: There is no + * need to actual create tokens from these because they're already + * tokenized as these individual tokens (which are in a special area + * of the ascii table which doesn't conflict with our other tokens + * which are higer than the ascii table.) + */ + case '#': + token = lex_token(file); /* skip '#' */ + if (token == ' ') + token = lex_token(file); + /* + * If we make it here we found a directive, the supported + * directives so far are #include. + */ + if (strncmp(file->lastok, "include", sizeof("include")) == 0) { + /* + * We only suport include " ", not <> like in C (why?) + * because the latter is silly. + */ + while (*file->lastok != '"' && token != '\n') + token = lex_token(file); + if (token == '\n') + return error(file, ERROR_PARSE, "Invalid use of include preprocessor directive: wanted #include \"file.h\"\n"); + + char *copy = util_strdup(file->lastok); + struct lex_file *next = lex_include(file, copy); + + if (!next) { + error(file, ERROR_INTERNAL, "Include subsystem failure\n"); + exit (-1); + } + compile_constants_add((constant) { + .name = "#include", + .type = TYPE_VOID, + .value = {0,0,0}, + .string = copy + }); + parse_gen(next); + mem_d (copy); + lex_close(next); + } + /* skip all tokens to end of directive */ + while (token != '\n') + token = lex_token(file); + break; + + case LEX_IDENT: + token = lex_token(file); + break; + } + } + compile_constant_debug(); + lex_reset(file); + /* free constants */ + { + size_t i = 0; + for (; i < compile_constants_elements; i++) { + mem_d(compile_constants_data[i].name); + mem_d(compile_constants_data[i].string); } + mem_d(compile_constants_data); } - parse_debug(parseroot); - lex_reset(file); - - return 1; -} + return 1; +}