From: Wolfgang Bumiller Date: Sun, 22 Jul 2012 10:22:33 +0000 (+0200) Subject: Merge branch 'master' into blub/bc3 X-Git-Tag: 0.1-rc1~400 X-Git-Url: https://git.xonotic.org/?p=xonotic%2Fgmqcc.git;a=commitdiff_plain;h=2bfd27271444c99d02ac8de3135c6a7981b18286;hp=d4c408b4bd23e1516c6800cfd8e890dcc24c6bd6 Merge branch 'master' into blub/bc3 --- diff --git a/Makefile b/Makefile index a0d6cc9..9101d2b 100644 --- a/Makefile +++ b/Makefile @@ -16,18 +16,14 @@ ifeq ($(CC), clang) -Wno-format-nonliteral endif -OBJ = lex.o \ - error.o \ - parse.o \ - typedef.o \ +OBJ = \ util.o \ code.o \ - asm.o \ ast.o \ - ir.o + ir.o OBJ_A = test/ast-test.o OBJ_I = test/ir-test.o -OBJ_C = main.o +OBJ_C = main.o lexer.o parser.o #default is compiler only default: gmqcc diff --git a/ast.c b/ast.c index e173ed5..582cda6 100644 --- a/ast.c +++ b/ast.c @@ -170,6 +170,18 @@ ast_binary* ast_binary_new(lex_ctx ctx, int op, self->left = left; self->right = right; + if (op >= INSTR_EQ_F && op <= INSTR_GT) + self->expression.vtype = TYPE_FLOAT; + else if (op == INSTR_AND || op == INSTR_OR || + op == INSTR_BITAND || op == INSTR_BITOR) + self->expression.vtype = TYPE_FLOAT; + else if (op == INSTR_MUL_VF || op == INSTR_MUL_FV) + self->expression.vtype = TYPE_VECTOR; + else if (op == INSTR_MUL_V) + self->expression.vtype = TYPE_FLOAT; + else + self->expression.vtype = left->expression.vtype; + return self; } @@ -518,8 +530,10 @@ bool ast_global_codegen(ast_value *self, ir_builder *ir) } v = ir_builder_create_global(ir, self->name, self->expression.vtype); - if (!v) + if (!v) { + printf("ir_builder_create_global failed\n"); return false; + } if (self->isconst) { switch (self->expression.vtype) diff --git a/ast.h b/ast.h index 26edec9..1f448bc 100644 --- a/ast.h +++ b/ast.h @@ -107,6 +107,8 @@ struct ast_value_s const char *vstring; int ventity; ast_function *vfunc; + quaternion vquat; + matrix vmat; } constval; ir_value *ir_v; diff --git a/gmqcc.h b/gmqcc.h index d837480..58e12cf 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -179,95 +179,6 @@ typedef char int64_size_is_correct [sizeof(int64_t) == 8?1:-1]; typedef char uintptr_size_is_correct[sizeof(intptr_t) == sizeof(int*)?1:-1]; typedef char intptr_size_is_correct [sizeof(uintptr_t)== sizeof(int*)?1:-1]; -/*===================================================================*/ -/*============================ lex.c ================================*/ -/*===================================================================*/ -typedef struct lex_file_t { - FILE *file; /* file handler */ - char *name; /* name of file */ - char peek [5]; - char lastok[8192]; - - int last; /* last token */ - int current; /* current token */ - int length; /* bytes left to parse */ - int size; /* never changes (size of file) */ - int line; /* what line are we on? */ -} lex_file; - -/* - * It's important that this table never exceed 32 keywords, the ascii - * table starts at 33 (and we don't want conflicts) - */ -enum { - TOKEN_DO , - TOKEN_ELSE , - TOKEN_IF , - TOKEN_WHILE , - TOKEN_BREAK , - TOKEN_CONTINUE , - TOKEN_RETURN , - TOKEN_GOTO , - TOKEN_FOR , /* extension */ - TOKEN_TYPEDEF , /* extension */ - - /* ensure the token types are out of the */ - /* bounds of anyothers that may conflict. */ - TOKEN_FLOAT = 110, - TOKEN_VECTOR , - TOKEN_STRING , - TOKEN_ENTITY , - TOKEN_VOID -}; - -/* - * Lexer state constants, these are numbers for where exactly in - * the lexing the lexer is at. Or where it decided to stop if a lexer - * error occurs. These numbers must be > where the ascii-table ends - * and > the last type token which is TOKEN_VOID - */ -enum { - LEX_COMMENT = 1128, - LEX_CHRLIT , - LEX_STRLIT , - LEX_IDENT -}; - -int lex_token (lex_file *); -void lex_reset (lex_file *); -void lex_close (lex_file *); -void lex_parse (lex_file *); -lex_file *lex_include(lex_file *, const char *); -void lex_init (const char *, lex_file **); - -/*===================================================================*/ -/*========================== error.c ================================*/ -/*===================================================================*/ -#define ERROR_LEX (SHRT_MAX+0) -#define ERROR_PARSE (SHRT_MAX+1) -#define ERROR_INTERNAL (SHRT_MAX+2) -#define ERROR_COMPILER (SHRT_MAX+3) -#define ERROR_PREPRO (SHRT_MAX+4) -int error(lex_file *, int, const char *, ...); - -/*===================================================================*/ -/*========================== parse.c ================================*/ -/*===================================================================*/ -int parse_gen(lex_file *); - -/*===================================================================*/ -/*========================== typedef.c ==============================*/ -/*===================================================================*/ -typedef struct typedef_node_t { - char *name; -} typedef_node; - -void typedef_init(); -void typedef_clear(); -typedef_node *typedef_find(const char *); -int typedef_add (lex_file *file, const char *, const char *); - - /*===================================================================*/ /*=========================== util.c ================================*/ /*===================================================================*/ @@ -364,12 +275,16 @@ enum { TYPE_FIELD , TYPE_FUNCTION , TYPE_POINTER , - /* TYPE_INTEGER , */ + TYPE_INTEGER , + TYPE_QUATERNION , + TYPE_MATRIX , TYPE_VARIANT , TYPE_COUNT }; +extern const char *type_name[TYPE_COUNT]; + extern size_t type_sizeof[TYPE_COUNT]; extern uint16_t type_store_instr[TYPE_COUNT]; /* could use type_store_instr + INSTR_STOREP_F - INSTR_STORE_F @@ -463,8 +378,8 @@ enum { INSTR_DONE, INSTR_MUL_F, INSTR_MUL_V, - INSTR_MUL_FV, INSTR_MUL_VF, + INSTR_MUL_FV, INSTR_DIV_F, INSTR_ADD_F, INSTR_ADD_V, @@ -527,6 +442,23 @@ enum { INSTR_BITAND, INSTR_BITOR, +/* warning: will be reordered */ + INSTR_MUL_Q, + INSTR_MUL_QF, + INSTR_MUL_M, + INSTR_MUL_MF, + INSTR_EQ_Q, + INSTR_EQ_M, + INSTR_NE_Q, + INSTR_NE_M, + INSTR_LOAD_Q, + INSTR_LOAD_M, + INSTR_STORE_Q, + INSTR_STORE_M, + INSTR_STOREP_Q, + INSTR_STOREP_M, + INSTR_INV_Q, + INSTR_INV_M, /* * Virtual instructions used by the assembler * keep at the end but before virtual instructions @@ -582,8 +514,8 @@ static const struct { { "DONE" , 1, 4 }, { "MUL_F" , 3, 5 }, { "MUL_V" , 3, 5 }, - { "MUL_FV" , 3, 6 }, { "MUL_VF" , 3, 6 }, + { "MUL_FV" , 3, 6 }, { "DIV" , 0, 3 }, { "ADD_F" , 3, 5 }, { "ADD_V" , 3, 5 }, @@ -645,6 +577,24 @@ static const struct { { "OR" , 0, 2 }, { "BITAND" , 0, 6 }, { "BITOR" , 0, 5 }, + + { "MUL_Q" , 3, 5 }, + { "MUL_QF" , 3, 6 }, + { "MUL_M" , 3, 5 }, + { "MUL_MF" , 3, 6 }, + { "EQ_Q" , 0, 4 }, + { "EQ_M" , 0, 4 }, + { "NE_Q" , 0, 4 }, + { "NE_M" , 0, 4 }, + { "FIELD_Q" , 0, 7 }, + { "FIELD_M" , 0, 7 }, + { "STORE_Q" , 0, 7 }, + { "STORE_M" , 0, 7 }, + { "STOREP_Q" , 0, 8 }, + { "STOREP_M" , 0, 8 }, + { "INV_Q" , 0, 5 }, + { "INV_M" , 0, 5 }, + { "END" , 0, 3 } /* virtual assembler instruction */ }; @@ -769,6 +719,16 @@ void Tself##_##mem##_clear(Tself *self) \ (owner)->mem##_alloc = 0; \ } +#define MEM_VECTOR_MOVE(from, mem, to, tm) \ +{ \ + (to)->tm = (from)->mem; \ + (to)->tm##_count = (from)->mem##_count; \ + (to)->tm##_alloc = (from)->mem##_alloc; \ + (from)->mem = NULL; \ + (from)->mem##_count = 0; \ + (from)->mem##_alloc = 0; \ +} + #define MEM_VEC_FUNCTIONS(Tself, Twhat, mem) \ _MEM_VEC_FUN_REMOVE(Tself, Twhat, mem) \ _MEM_VEC_FUN_ADD(Tself, Twhat, mem) @@ -790,6 +750,14 @@ typedef struct { float x, y, z; } vector; +typedef float matrix[4][4]; /* OpenGL layout */ +typedef float quaternion[4]; /* order: x, y, z, w */ +#define MATRIX(axis, elem) ((4*(axis)) + (elem)) +#define QUAT_X 0 +#define QUAT_Y 1 +#define QUAT_Z 2 +#define QUAT_W 3 + /* * A shallow copy of a lex_file to remember where which ast node * came from. diff --git a/ir.c b/ir.c index 2a5018b..7f9cdb1 100644 --- a/ir.c +++ b/ir.c @@ -29,6 +29,23 @@ * Type sizes used at multiple points in the IR codegen */ +const char *type_name[TYPE_COUNT] = { + "void", + "string", + "float", + "vector", + "entity", + "field", + "function", + "pointer", +#if 0 + "integer", +#endif + "quaternion", + "matrix", + "variant" +}; + size_t type_sizeof[TYPE_COUNT] = { 1, /* TYPE_VOID */ 1, /* TYPE_STRING */ @@ -41,7 +58,9 @@ size_t type_sizeof[TYPE_COUNT] = { #if 0 1, /* TYPE_INTEGER */ #endif - 3, /* TYPE_VARIANT */ + 4, /* TYPE_QUATERNION */ + 16, /* TYPE_MATRIX */ + 16, /* TYPE_VARIANT */ }; uint16_t type_store_instr[TYPE_COUNT] = { @@ -54,9 +73,12 @@ uint16_t type_store_instr[TYPE_COUNT] = { INSTR_STORE_FNC, INSTR_STORE_ENT, /* should use I */ #if 0 - INSTR_STORE_ENT, /* integer type */ + INSTR_STORE_I, /* integer type */ #endif - INSTR_STORE_V, /* variant, should never be accessed */ + INSTR_STORE_Q, + INSTR_STORE_M, + + INSTR_STORE_M, /* variant, should never be accessed */ }; uint16_t type_storep_instr[TYPE_COUNT] = { @@ -71,7 +93,10 @@ uint16_t type_storep_instr[TYPE_COUNT] = { #if 0 INSTR_STOREP_ENT, /* integer type */ #endif - INSTR_STOREP_V, /* variant, should never be accessed */ + INSTR_STOREP_Q, + INSTR_STOREP_M, + + INSTR_STOREP_M, /* variant, should never be accessed */ }; MEM_VEC_FUNCTIONS(ir_value_vector, ir_value*, v) @@ -181,9 +206,14 @@ ir_value* ir_builder_get_global(ir_builder *self, const char *name) ir_value* ir_builder_create_global(ir_builder *self, const char *name, int vtype) { - ir_value *ve = ir_builder_get_global(self, name); - if (ve) { - return NULL; + ir_value *ve; + + if (name && name[0] != '#') + { + ve = ir_builder_get_global(self, name); + if (ve) { + return NULL; + } } ve = ir_value_var(name, store_global, vtype); @@ -574,6 +604,24 @@ bool ir_value_set_vector(ir_value *self, vector v) return true; } +bool ir_value_set_quaternion(ir_value *self, quaternion v) +{ + if (self->vtype != TYPE_QUATERNION) + return false; + memcpy(&self->constval.vquat, v, sizeof(self->constval.vquat)); + self->isconst = true; + return true; +} + +bool ir_value_set_matrix(ir_value *self, matrix v) +{ + if (self->vtype != TYPE_MATRIX) + return false; + memcpy(&self->constval.vmat, v, sizeof(self->constval.vmat)); + self->isconst = true; + return true; +} + bool ir_value_set_string(ir_value *self, const char *str) { if (self->vtype != TYPE_STRING) @@ -877,7 +925,6 @@ bool ir_block_create_storep(ir_block *self, ir_value *target, ir_value *what) vtype = what->vtype; op = type_storep_instr[vtype]; - return ir_block_create_store_op(self, op, target, what); } @@ -1235,6 +1282,8 @@ ir_value* ir_block_create_load_from_ent(ir_block *self, const char *label, ir_va case TYPE_POINTER: op = INSTR_LOAD_I; break; case TYPE_INTEGER: op = INSTR_LOAD_I; break; #endif + case TYPE_QUATERNION: op = INSTR_LOAD_Q; break; + case TYPE_MATRIX: op = INSTR_LOAD_M; break; default: return NULL; } @@ -1338,12 +1387,22 @@ ir_value* ir_block_create_mul(ir_block *self, case TYPE_VECTOR: op = INSTR_MUL_V; break; + case TYPE_QUATERNION: + op = INSTR_MUL_Q; + break; + case TYPE_MATRIX: + op = INSTR_MUL_M; + break; } } else { if ( (l == TYPE_VECTOR && r == TYPE_FLOAT) ) op = INSTR_MUL_VF; else if ( (l == TYPE_FLOAT && r == TYPE_VECTOR) ) op = INSTR_MUL_FV; + else if ( (l == TYPE_QUATERNION && r == TYPE_FLOAT) ) + op = INSTR_MUL_QF; + else if ( (l == TYPE_MATRIX && r == TYPE_FLOAT) ) + op = INSTR_MUL_MF; #if 0 else if ( (l == TYPE_VECTOR && r == TYPE_INTEGER) ) op = INSTR_MUL_VI; @@ -2415,6 +2474,8 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global) return global->code.globaladdr >= 0; } case TYPE_VECTOR: + case TYPE_QUATERNION: + case TYPE_MATRIX: { size_t d; if (code_defs_add(def) < 0) @@ -2629,6 +2690,7 @@ void ir_value_dump(ir_value* v, int (*oprintf)(const char*, ...)) { if (v->isconst) { switch (v->vtype) { + default: case TYPE_VOID: oprintf("(void)"); break; diff --git a/ir.h b/ir.h index fb0f699..6fdeb89 100644 --- a/ir.h +++ b/ir.h @@ -55,6 +55,8 @@ typedef struct ir_value_s { char *vstring; struct ir_value_s *vpointer; struct ir_function_s *vfunc; + quaternion vquat; + matrix vmat; } constval; struct { @@ -89,6 +91,8 @@ bool GMQCC_WARN ir_value_set_string(ir_value*, const char *s); bool GMQCC_WARN ir_value_set_vector(ir_value*, vector v); /*bool ir_value_set_pointer_v(ir_value*, ir_value* p); */ /*bool ir_value_set_pointer_i(ir_value*, int i); */ +bool GMQCC_WARN ir_value_set_quaternion(ir_value*, quaternion v); +bool GMQCC_WARN ir_value_set_matrix(ir_value*, matrix v); MEM_VECTOR_PROTO(ir_value, ir_life_entry_t, life); /* merge an instruction into the life-range */ diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..ebbe188 --- /dev/null +++ b/lexer.c @@ -0,0 +1,641 @@ +#include +#include +#include +#include + +#include "gmqcc.h" +#include "lexer.h" + +MEM_VEC_FUNCTIONS(token, char, value) + +void lexerror(lex_file *lex, const char *fmt, ...) +{ + va_list ap; + + if (lex) + printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline); + else + printf("error: "); + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + + printf("\n"); +} + +token* token_new() +{ + token *tok = (token*)mem_a(sizeof(token)); + if (!tok) + return NULL; + memset(tok, 0, sizeof(*tok)); + return tok; +} + +void token_delete(token *self) +{ + if (self->next && self->next->prev == self) + self->next->prev = self->prev; + if (self->prev && self->prev->next == self) + self->prev->next = self->next; + MEM_VECTOR_CLEAR(self, value); + mem_d(self); +} + +token* token_copy(const token *cp) +{ + token* self = token_new(); + if (!self) + return NULL; + /* copy the value */ + self->value_alloc = cp->value_count + 1; + self->value_count = cp->value_count; + self->value = (char*)mem_a(self->value_alloc); + if (!self->value) { + mem_d(self); + return NULL; + } + memcpy(self->value, cp->value, cp->value_count); + self->value[self->value_alloc-1] = 0; + + /* rest */ + self->ctx = cp->ctx; + self->ttype = cp->ttype; + memcpy(&self->constval, &cp->constval, sizeof(self->constval)); + return self; +} + +void token_delete_all(token *t) +{ + token *n; + + do { + n = t->next; + token_delete(t); + t = n; + } while(t); +} + +token* token_copy_all(const token *cp) +{ + token *cur; + token *out; + + out = cur = token_copy(cp); + if (!out) + return NULL; + + while (cp->next) { + cp = cp->next; + cur->next = token_copy(cp); + if (!cur->next) { + token_delete_all(out); + return NULL; + } + cur->next->prev = cur; + cur = cur->next; + } + + return out; +} + +lex_file* lex_open(const char *file) +{ + lex_file *lex; + FILE *in = fopen(file, "rb"); + + if (!in) { + lexerror(NULL, "open failed: '%s'\n", file); + return NULL; + } + + lex = (lex_file*)mem_a(sizeof(*lex)); + if (!lex) { + fclose(in); + lexerror(NULL, "out of memory\n"); + return NULL; + } + + memset(lex, 0, sizeof(*lex)); + + lex->file = in; + lex->name = util_strdup(file); + lex->line = 1; /* we start counting at 1 */ + + lex->peekpos = 0; + + return lex; +} + +void lex_close(lex_file *lex) +{ + if (lex->file) + fclose(lex->file); + if (lex->tok) + token_delete(lex->tok); + mem_d(lex->name); + mem_d(lex); +} + +/* Get or put-back data + * The following to functions do NOT understand what kind of data they + * are working on. + * The are merely wrapping get/put in order to count line numbers. + */ +static int lex_getch(lex_file *lex) +{ + int ch; + + if (lex->peekpos) { + lex->peekpos--; + if (lex->peek[lex->peekpos] == '\n') + lex->line++; + return lex->peek[lex->peekpos]; + } + + ch = fgetc(lex->file); + if (ch == '\n') + lex->line++; + return ch; +} + +static void lex_ungetch(lex_file *lex, int ch) +{ + lex->peek[lex->peekpos++] = ch; + if (ch == '\n') + lex->line--; +} + +/* classify characters + * some additions to the is*() functions of ctype.h + */ + +/* Idents are alphanumberic, but they start with alpha or _ */ +static bool isident_start(int ch) +{ + return isalpha(ch) || ch == '_'; +} + +static bool isident(int ch) +{ + return isident_start(ch) || isdigit(ch); +} + +/* isxdigit_only is used when we already know it's not a digit + * and want to see if it's a hex digit anyway. + */ +static bool isxdigit_only(int ch) +{ + return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); +} + +/* Skip whitespace and comments and return the first + * non-white character. + * As this makes use of the above getch() ungetch() functions, + * we don't need to care at all about line numbering anymore. + * + * In theory, this function should only be used at the beginning + * of lexing, or when we *know* the next character is part of the token. + * Otherwise, if the parser throws an error, the linenumber may not be + * the line of the error, but the line of the next token AFTER the error. + * + * This is currently only problematic when using c-like string-continuation, + * since comments and whitespaces are allowed between 2 such strings. + * Example: +printf( "line one\n" +// A comment + "A continuation of the previous string" +// This line is skipped + , foo); + + * In this case, if the parse decides it didn't actually want a string, + * and uses lex->line to print an error, it will show the ', foo);' line's + * linenumber. + * + * On the other hand, the parser is supposed to remember the line of the next + * token's beginning. In this case we would want skipwhite() to be called + * AFTER reading a token, so that the parser, before reading the NEXT token, + * doesn't store teh *comment's* linenumber, but the actual token's linenumber. + * + * THIS SOLUTION + * here is to store the line of the first character after skipping + * the initial whitespace in lex->sline, this happens in lex_do. + */ +static int lex_skipwhite(lex_file *lex) +{ + int ch = 0; + + do + { + ch = lex_getch(lex); + while (ch != EOF && isspace(ch)) ch = lex_getch(lex); + + if (ch == '/') { + ch = lex_getch(lex); + if (ch == '/') + { + /* one line comment */ + ch = lex_getch(lex); + + /* check for special: '/', '/', '*', '/' */ + if (ch == '*') { + ch = lex_getch(lex); + if (ch == '/') { + ch = ' '; + continue; + } + } + + while (ch != EOF && ch != '\n') { + ch = lex_getch(lex); + } + continue; + } + if (ch == '*') + { + /* multiline comment */ + while (ch != EOF) + { + ch = lex_getch(lex); + if (ch == '*') { + ch = lex_getch(lex); + if (ch == '/') { + ch = lex_getch(lex); + break; + } + } + } + if (ch == '/') /* allow *//* direct following comment */ + { + lex_ungetch(lex, ch); + ch = ' '; /* cause TRUE in the isspace check */ + } + continue; + } + /* Otherwise roll back to the slash and break out of the loop */ + lex_ungetch(lex, ch); + ch = '/'; + break; + } + } while (ch != EOF && isspace(ch)); + + return ch; +} + +/* Append a character to the token buffer */ +static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) +{ + if (!token_value_add(lex->tok, ch)) { + lexerror(lex, "out of memory"); + return false; + } + return true; +} + +/* Append a trailing null-byte */ +static bool GMQCC_WARN lex_endtoken(lex_file *lex) +{ + if (!token_value_add(lex->tok, 0)) { + lexerror(lex, "out of memory"); + return false; + } + lex->tok->value_count--; + return true; +} + +/* Get a token */ +static bool GMQCC_WARN lex_finish_ident(lex_file *lex) +{ + int ch; + + ch = lex_getch(lex); + while (ch != EOF && isident(ch)) + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + } + + /* last ch was not an ident ch: */ + lex_ungetch(lex, ch); + + return true; +} + +static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) +{ + int ch = 0; + + while (ch != EOF) + { + ch = lex_getch(lex); + if (ch == quote) + return TOKEN_STRINGCONST; + + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + /* as lexer we only care about \" to not terminate the string prematurely */ + if (ch == '\\') { + ch = lex_getch(lex); + if (ch == EOF) { + lexerror(lex, "unexpected end of file"); + lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ + return (lex->tok->ttype = TOKEN_ERROR); + } + /* so we just add the next character no matter what it actually is */ + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + } + } + lexerror(lex, "unexpected end of file within string constant"); + lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ + return (lex->tok->ttype = TOKEN_ERROR); +} + +static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) +{ + bool ishex = false; + + int ch = lastch; + + /* parse a number... */ + lex->tok->ttype = TOKEN_INTCONST; + + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + ch = lex_getch(lex); + if (ch != '.' && !isdigit(ch)) + { + if (lastch != '0' || ch != 'x') + { + /* end of the number or EOF */ + lex_ungetch(lex, ch); + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + + lex->tok->constval.i = lastch - '0'; + return lex->tok->ttype; + } + + ishex = true; + } + + /* EOF would have been caught above */ + + if (ch != '.') + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + while (isdigit(ch) || (ishex && isxdigit_only(ch))) + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + } + } + /* NOT else, '.' can come from above as well */ + if (ch == '.' && !ishex) + { + /* Allow floating comma in non-hex mode */ + lex->tok->ttype = TOKEN_FLOATCONST; + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + /* continue digits-only */ + ch = lex_getch(lex); + while (isdigit(ch)) + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + } + } + /* put back the last character */ + /* but do not put back the trailing 'f' or a float */ + if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f') + ch = lex_getch(lex); + + /* generally we don't want words to follow numbers: */ + if (isident(ch)) { + lexerror(lex, "unexpected trailing characters after number"); + return (lex->tok->ttype = TOKEN_ERROR); + } + lex_ungetch(lex, ch); + + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + if (lex->tok->ttype == TOKEN_FLOATCONST) + lex->tok->constval.f = strtod(lex->tok->value, NULL); + else + lex->tok->constval.i = strtol(lex->tok->value, NULL, 0); + return lex->tok->ttype; +} + +int lex_do(lex_file *lex) +{ + int ch, nextch; + + if (lex->tok) + token_delete(lex->tok); + lex->tok = token_new(); + if (!lex->tok) + return TOKEN_FATAL; + + ch = lex_skipwhite(lex); + lex->sline = lex->line; + lex->tok->ctx.line = lex->sline; + lex->tok->ctx.file = lex->name; + + if (ch == EOF) + return (lex->tok->ttype = TOKEN_EOF); + + /* single-character tokens */ + switch (ch) + { + case ';': + case '(': + case ')': + case '{': + case '}': + case '[': + case ']': + + case ',': + + case '#': + + return (lex->tok->ttype = ch); + default: + break; + } + + if (lex->flags.noops) + { + /* Detect characters early which are normally + * operators OR PART of an operator. + */ + switch (ch) + { + case '+': + case '-': + case '*': + case '/': + case '<': + case '>': + case '=': + case '&': + case '|': + case '^': + case '~': + return ch; + default: + break; + } + } + + if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */ + ch == '>' || ch == '<' || /* <<, >>, <=, >= */ + ch == '=' || /* == */ + ch == '&' || ch == '|') /* &&, ||, &=, |= */ + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + nextch = lex_getch(lex); + if (nextch == ch || nextch == '=') { + if (!lex_tokench(lex, nextch)) + return (lex->tok->ttype = TOKEN_FATAL); + } else if (ch == '-' && nextch == '>') { + if (!lex_tokench(lex, nextch)) + return (lex->tok->ttype = TOKEN_FATAL); + } else + lex_ungetch(lex, nextch); + + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok->ttype = TOKEN_OPERATOR); + } + + if (ch == '^' || ch == '~' || ch == '!') + { + if (!lex_tokench(lex, ch) || + !lex_endtoken(lex)) + { + return (lex->tok->ttype = TOKEN_FATAL); + } + return (lex->tok->ttype = TOKEN_OPERATOR); + } + + if (ch == '*' || ch == '/') /* *=, /= */ + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + nextch = lex_getch(lex); + if (nextch == '=') { + if (!lex_tokench(lex, nextch)) + return (lex->tok->ttype = TOKEN_FATAL); + } else + lex_ungetch(lex, nextch); + + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok->ttype = TOKEN_OPERATOR); + } + + if (isident_start(ch)) + { + const char *v; + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + if (!lex_finish_ident(lex)) { + /* error? */ + return (lex->tok->ttype = TOKEN_ERROR); + } + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + lex->tok->ttype = TOKEN_IDENT; + + v = lex->tok->value; + if (!strcmp(v, "void")) { + lex->tok->ttype = TOKEN_TYPENAME; + lex->tok->constval.t = TYPE_VOID; + } else if (!strcmp(v, "int")) { + lex->tok->ttype = TOKEN_TYPENAME; + lex->tok->constval.t = TYPE_INTEGER; + } else if (!strcmp(v, "float")) { + lex->tok->ttype = TOKEN_TYPENAME; + lex->tok->constval.t = TYPE_FLOAT; + } else if (!strcmp(v, "string")) { + lex->tok->ttype = TOKEN_TYPENAME; + lex->tok->constval.t = TYPE_STRING; + } else if (!strcmp(v, "entity")) { + lex->tok->ttype = TOKEN_TYPENAME; + lex->tok->constval.t = TYPE_ENTITY; + } else if (!strcmp(v, "vector")) { + lex->tok->ttype = TOKEN_TYPENAME; + lex->tok->constval.t = TYPE_VECTOR; + } else if (!strcmp(v, "for") || + !strcmp(v, "while") || + !strcmp(v, "do") || + !strcmp(v, "var") || + !strcmp(v, "const")) + lex->tok->ttype = TOKEN_KEYWORD; + + return lex->tok->ttype; + } + + if (ch == '"') + { + lex->tok->ttype = lex_finish_string(lex, '"'); + while (lex->tok->ttype == TOKEN_STRINGCONST) + { + /* Allow c style "string" "continuation" */ + ch = lex_skipwhite(lex); + if (ch != '"') { + lex_ungetch(lex, ch); + break; + } + + lex->tok->ttype = lex_finish_string(lex, '"'); + } + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return lex->tok->ttype; + } + + if (ch == '\'') + { + /* we parse character constants like string, + * but return TOKEN_CHARCONST, or a vector type if it fits... + * Likewise actual unescaping has to be done by the parser. + * The difference is we don't allow 'char' 'continuation'. + */ + lex->tok->ttype = lex_finish_string(lex, '\''); + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + + /* It's a vector if we can successfully scan 3 floats */ + if (sscanf(lex->tok->value, " %f %f %f ", &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3) + { + lex->tok->ttype = TOKEN_VECTORCONST; + } + + return lex->tok->ttype; + } + + if (isdigit(ch)) + { + lex->tok->ttype = lex_finish_digit(lex, ch); + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return lex->tok->ttype; + } + + lexerror(lex, "unknown token"); + return (lex->tok->ttype = TOKEN_ERROR); +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..c23f944 --- /dev/null +++ b/lexer.h @@ -0,0 +1,214 @@ +#ifndef GMQCC_LEXER_HDR_ +#define GMQCC_LEXER_HDR_ + +typedef struct token_s token; + +#include "ast.h" + +struct token_s { + int ttype; + + MEM_VECTOR_MAKE(char, value); + + union { + vector v; + int i; + double f; + int t; /* type */ + } constval; + + struct token_s *next; + struct token_s *prev; + + lex_ctx ctx; +}; + +token* token_new(); +void token_delete(token*); +token* token_copy(const token *cp); +void token_delete_all(token *t); +token* token_copy_all(const token *cp); + +/* Lexer + * + */ +enum { + /* Other tokens which we can return: */ + TOKEN_NONE = 0, + TOKEN_START = 128, + + TOKEN_IDENT, + + TOKEN_TYPENAME, + + TOKEN_OPERATOR, + + TOKEN_KEYWORD, /* loop */ + + TOKEN_STRINGCONST, /* not the typename but an actual "string" */ + TOKEN_CHARCONST, + TOKEN_VECTORCONST, + TOKEN_INTCONST, + TOKEN_FLOATCONST, + + TOKEN_EOF, + + /* We use '< TOKEN_ERROR', so TOKEN_FATAL must come after it and any + * other error related tokens as well + */ + TOKEN_ERROR, + TOKEN_FATAL /* internal error, eg out of memory */ +}; + +static const char *_tokennames[] = { + "TOKEN_START", + "TOKEN_IDENT", + "TOKEN_TYPENAME", + "TOKEN_OPERATOR", + "TOKEN_KEYWORD", + "TOKEN_STRINGCONST", + "TOKEN_CHARCONST", + "TOKEN_VECTORCONST", + "TOKEN_INTCONST", + "TOKEN_FLOATCONST", + "TOKEN_EOF", + "TOKEN_ERROR", + "TOKEN_FATAL", +}; +typedef int +_all_tokennames_added_[ + ((TOKEN_FATAL - TOKEN_START + 1) == + (sizeof(_tokennames)/sizeof(_tokennames[0]))) + ? 1 : -1]; + +typedef struct { + FILE *file; + char *name; + size_t line; + size_t sline; /* line at the start of a token */ + + char peek[256]; + size_t peekpos; + + token *tok; + + struct { + bool noops; + } flags; +} lex_file; + +MEM_VECTOR_PROTO(lex_file, char, token); + +lex_file* lex_open (const char *file); +void lex_close(lex_file *lex); +int lex_do (lex_file *lex); + +/* Parser + * + */ + +enum { + ASSOC_LEFT, + ASSOC_RIGHT +}; + +#define OP_SUFFIX 1 +#define OP_PREFIX 2 + +typedef struct { + const char *op; + unsigned int operands; + unsigned int id; + unsigned int assoc; + unsigned int prec; + unsigned int flags; +} oper_info; + +#define opid1(a) (a) +#define opid2(a,b) ((a<<8)|b) +#define opid3(a,b,c) ((a<<16)|(b<<8)|c) + +static const oper_info operators[] = { + { "++", 1, opid3('S','+','+'), ASSOC_LEFT, 16, OP_SUFFIX}, + { "--", 1, opid3('S','-','-'), ASSOC_LEFT, 16, OP_SUFFIX}, + + { ".", 2, opid1('.'), ASSOC_LEFT, 15, 0 }, + + { "!", 1, opid2('!', 'P'), ASSOC_RIGHT, 14, 0 }, + { "~", 1, opid2('~', 'P'), ASSOC_RIGHT, 14, 0 }, + { "+", 1, opid2('+','P'), ASSOC_RIGHT, 14, OP_PREFIX }, + { "-", 1, opid2('-','P'), ASSOC_RIGHT, 14, OP_PREFIX }, + { "++", 1, opid3('+','+','P'), ASSOC_RIGHT, 14, OP_PREFIX }, + { "--", 1, opid3('-','-','P'), ASSOC_RIGHT, 14, OP_PREFIX }, +/* { "&", 1, opid2('&','P'), ASSOC_RIGHT, 14, OP_PREFIX }, */ + + { "*", 2, opid1('*'), ASSOC_LEFT, 13, 0 }, + { "/", 2, opid1('/'), ASSOC_LEFT, 13, 0 }, + { "%", 2, opid1('%'), ASSOC_LEFT, 13, 0 }, + + { "+", 2, opid1('+'), ASSOC_LEFT, 12, 0 }, + { "-", 2, opid1('-'), ASSOC_LEFT, 12, 0 }, + + { "<<", 2, opid2('<','<'), ASSOC_LEFT, 11, 0 }, + { ">>", 2, opid2('>','>'), ASSOC_LEFT, 11, 0 }, + + { "<", 2, opid1('<'), ASSOC_LEFT, 10, 0 }, + { ">", 2, opid1('>'), ASSOC_LEFT, 10, 0 }, + { "<=", 2, opid2('<','='), ASSOC_LEFT, 10, 0 }, + { ">=", 2, opid2('>','='), ASSOC_LEFT, 10, 0 }, + + { "==", 2, opid2('=','='), ASSOC_LEFT, 9, 0 }, + { "!=", 2, opid2('!','='), ASSOC_LEFT, 9, 0 }, + + { "&", 2, opid1('&'), ASSOC_LEFT, 8, 0 }, + + { "^", 2, opid1('^'), ASSOC_LEFT, 7, 0 }, + + { "|", 2, opid1('|'), ASSOC_LEFT, 6, 0 }, + + { "&&", 2, opid2('&','&'), ASSOC_LEFT, 5, 0 }, + + { "||", 2, opid2('|','|'), ASSOC_LEFT, 4, 0 }, + + { "?", 3, opid2('?',':'), ASSOC_RIGHT, 3, 0 }, + + { "=", 2, opid1('='), ASSOC_RIGHT, 2, 0 }, + { "+=", 2, opid2('+','='), ASSOC_RIGHT, 2, 0 }, + { "-=", 2, opid2('-','='), ASSOC_RIGHT, 2, 0 }, + { "*=", 2, opid2('*','='), ASSOC_RIGHT, 2, 0 }, + { "/=", 2, opid2('/','='), ASSOC_RIGHT, 2, 0 }, + { "%=", 2, opid2('%','='), ASSOC_RIGHT, 2, 0 }, + { ">>=", 2, opid3('>','>','='), ASSOC_RIGHT, 2, 0 }, + { "<<=", 2, opid3('<','<','='), ASSOC_RIGHT, 2, 0 }, + { "&=", 2, opid2('&','='), ASSOC_RIGHT, 2, 0 }, + { "^=", 2, opid2('^','='), ASSOC_RIGHT, 2, 0 }, + { "|=", 2, opid2('|','='), ASSOC_RIGHT, 2, 0 }, +}; +static const size_t operator_count = (sizeof(operators) / sizeof(operators[0])); + +typedef struct +{ + lex_file *lex; + int error; + lex_ctx ctx; + + token *tokens; + token *lastok; + + token *tok; /* current token */ + + MEM_VECTOR_MAKE(ast_value*, globals); +} parse_file; + +MEM_VECTOR_PROTO(parse_file, ast_value*, globals); + +parse_file* parse_open(const char *file); +void parse_file_close(parse_file*); + +bool parse(parse_file*); + +bool parse_iskey(parse_file *self, const char *ident); + +void lexerror(lex_file*, const char *fmt, ...); + +#endif diff --git a/main.c b/main.c index 1ffcd99..195574e 100644 --- a/main.c +++ b/main.c @@ -21,160 +21,19 @@ * SOFTWARE. */ #include "gmqcc.h" -typedef struct { char *name, type; } argitem; -VECTOR_MAKE(argitem, items); - -static int usage(const char *app) { - printf("usage:\n" - " %s -c -oprog.dat -- compile file\n" - " %s -a -oprog.dat -- assemble file\n" - " %s -c -i -oprog.dat -- compile together (allowed multiple -i)\n" - " %s -a -i -oprog.dat -- assemble together(allowed multiple -i)\n" - " example:\n" - " %s -cfoo.qc -ibar.qc -oqc.dat -afoo.qs -ibar.qs -oqs.dat\n", app, app, app, app, app); - - printf(" additional flags:\n" - " -debug -- turns on compiler debug messages\n" - " -memchk -- turns on compiler memory leak check\n" - " -help -- prints this help/usage text\n" - " -std -- select the QuakeC compile type (types below):\n"); - - printf(" -std=qcc -- original QuakeC\n" - " -std=ftqecc -- fteqcc QuakeC\n" - " -std=qccx -- qccx QuakeC\n" - " -std=gmqcc -- this compiler QuakeC (default selection)\n"); - - printf(" codegen flags:\n" - " -fdarkplaces-string-table-bug -- patches the string table to work with bugged versions of darkplaces\n" - " -fomit-nullcode -- omits the generation of null code (will break everywhere see propsal.txt)\n"); - return -1; -} +bool parser_compile(const char *filename); int main(int argc, char **argv) { - size_t itr = 0; - char *app = &argv[0][0]; - FILE *fpp = NULL; - lex_file *lex = NULL; - - /* - * Parse all command line arguments. This is rather annoying to do - * because of all tiny corner cases. - */ - if (argc <= 1 || (argv[1][0] != '-')) - return usage(app); - - while ((argc > 1) && argv[1][0] == '-') { - switch (argv[1][1]) { - case 'v': { - printf("GMQCC:\n" - " version: %d.%d.%d (0x%08X)\n" - " build date: %s\n" - " build time: %s\n", - (GMQCC_VERSION >> 16) & 0xFF, - (GMQCC_VERSION >> 8) & 0xFF, - (GMQCC_VERSION >> 0) & 0xFF, - (GMQCC_VERSION), - __DATE__, - __TIME__ - ); - return 0; - } - #define param_argument(argtype) do { \ - argitem item; \ - if (argv[1][2]) { \ - item.name = util_strdup(&argv[1][2]); \ - item.type = argtype; \ - items_add(item); \ - } else { \ - ++argv; \ - --argc; \ - if (argc <= 1) \ - goto clean_params_usage; \ - item.name = util_strdup(argv[1]); \ - item.type = argtype; \ - items_add(item); \ - } \ - } while (0) - - case 'c': { param_argument(0); break; } /* compile */ - case 'a': { param_argument(1); break; } /* assemble */ - case 'i': { param_argument(2); break; } /* includes */ - #undef parm_argument - default: - if (util_strncmpexact(&argv[1][1], "debug" , 5)) { opts_debug = true; break; } - if (util_strncmpexact(&argv[1][1], "memchk", 6)) { opts_memchk = true; break; } - if (util_strncmpexact(&argv[1][1], "help", 4)) { - return usage(app); - } - /* compiler type selection */ - if (util_strncmpexact(&argv[1][1], "std=qcc" , 7 )) { opts_compiler = COMPILER_QCC; break; } - if (util_strncmpexact(&argv[1][1], "std=fteqcc", 10)) { opts_compiler = COMPILER_FTEQCC; break; } - if (util_strncmpexact(&argv[1][1], "std=qccx", 8 )) { opts_compiler = COMPILER_QCCX; break; } - if (util_strncmpexact(&argv[1][1], "std=gmqcc", 9 )) { opts_compiler = COMPILER_GMQCC; break; } - if (util_strncmpexact(&argv[1][1], "std=", 4 )) { - printf("invalid std selection, supported types:\n" - " -std=qcc -- original QuakeC\n" - " -std=ftqecc -- fteqcc QuakeC\n" - " -std=qccx -- qccx QuakeC\n" - " -std=gmqcc -- this compiler QuakeC (default selection)\n"); - return 0; - } - - /* code specific switches */ - if (util_strncmpexact(&argv[1][1], "fdarkplaces-stringtablebug", 26)) { - opts_darkplaces_stringtablebug = true; - break; - } - if (util_strncmpexact(&argv[1][1], "fomit-nullcode", 14)) { - opts_omit_nullcode = true; - break; - } - return printf("invalid command line argument: %s\n",argv[1]); - - } - ++argv; - --argc; - } - /* - * options could depend on another option, this is where option - * validity checking like that would take place. - */ - if (opts_memchk && !opts_debug) - printf("Warning: cannot enable -memchk, without -debug.\n"); - util_debug("COM", "starting ...\n"); - /* multi file multi path compilation system */ - for (; itr < items_elements; itr++) { - switch (items_data[itr].type) { - case 0: - lex_init (items_data[itr].name, &lex); - if (lex) { - lex_parse(lex); - lex_close(lex); - } - break; - case 1: - asm_init (items_data[itr].name, &fpp); - if (fpp) { - asm_parse(fpp); - asm_close(fpp); - } - break; + + if (argc == 2) { + if (!parser_compile(argv[1])) { + printf("There were compile errors\n"); } } util_debug("COM", "cleaning ...\n"); - /* clean list */ - for (itr = 0; itr < items_elements; itr++) - mem_d(items_data[itr].name); - mem_d(items_data); util_meminfo(); return 0; - -clean_params_usage: - for (itr = 0; itr < items_elements; itr++) - mem_d(items_data[itr].name); - mem_d(items_data); - return usage(app); } diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..1eeda39 --- /dev/null +++ b/parser.c @@ -0,0 +1,897 @@ +#include +#include + +#include "gmqcc.h" +#include "lexer.h" + +typedef struct { + lex_file *lex; + int tok; + + MEM_VECTOR_MAKE(ast_value*, globals); + MEM_VECTOR_MAKE(ast_function*, functions); + MEM_VECTOR_MAKE(ast_value*, imm_float); + + ast_function *function; + MEM_VECTOR_MAKE(ast_value*, locals); + size_t blocklocal; +} parser_t; + +MEM_VEC_FUNCTIONS(parser_t, ast_value*, globals) +MEM_VEC_FUNCTIONS(parser_t, ast_value*, imm_float) +MEM_VEC_FUNCTIONS(parser_t, ast_value*, locals) +MEM_VEC_FUNCTIONS(parser_t, ast_function*, functions) + +void parseerror(parser_t *parser, const char *fmt, ...) +{ + va_list ap; + + if (parser) + printf("error %s:%lu: ", parser->lex->tok->ctx.file, (unsigned long)parser->lex->tok->ctx.line); + else + printf("error: "); + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + + printf("\n"); +} + +bool parser_next(parser_t *parser) +{ + /* lex_do kills the previous token */ + parser->tok = lex_do(parser->lex); + if (parser->tok == TOKEN_EOF || parser->tok >= TOKEN_ERROR) + return false; + return true; +} + +/* lift a token out of the parser so it's not destroyed by parser_next */ +token *parser_lift(parser_t *parser) +{ + token *tok = parser->lex->tok; + parser->lex->tok = NULL; + return tok; +} + +#define parser_tokval(p) (p->lex->tok->value) +#define parser_token(p) (p->lex->tok) +#define parser_ctx(p) (p->lex->tok->ctx) + +ast_value* parser_const_float(parser_t *parser, double d) +{ + size_t i; + ast_value *out; + for (i = 0; i < parser->imm_float_count; ++i) { + if (parser->imm_float[i]->constval.vfloat == d) + return parser->imm_float[i]; + } + out = ast_value_new(parser_ctx(parser), "#IMMEDIATE", TYPE_FLOAT); + out->isconst = true; + out->constval.vfloat = d; + if (!parser_t_imm_float_add(parser, out)) { + ast_value_delete(out); + return NULL; + } + return out; +} + +ast_value* parser_find_global(parser_t *parser, const char *name) +{ + size_t i; + for (i = 0; i < parser->globals_count; ++i) { + if (!strcmp(parser->globals[i]->name, name)) + return parser->globals[i]; + } + return NULL; +} + +ast_value* parser_find_local(parser_t *parser, const char *name, size_t upto) +{ + size_t i; + for (i = parser->locals_count; i > upto;) { + --i; + if (!strcmp(parser->locals[i]->name, name)) + return parser->locals[i]; + } + return NULL; +} + +ast_value* parser_find_var(parser_t *parser, const char *name) +{ + ast_value *v; + v = parser_find_local(parser, name, 0); + if (!v) v = parser_find_global(parser, name); + return v; +} + +typedef struct { + MEM_VECTOR_MAKE(ast_value*, p); +} paramlist_t; +MEM_VEC_FUNCTIONS(paramlist_t, ast_value*, p) + +static ast_value *parser_parse_type(parser_t *parser, int basetype, bool *isfunc) +{ + paramlist_t params; + ast_value *var; + lex_ctx ctx = parser_ctx(parser); + int vtype = basetype; + int temptype; + + MEM_VECTOR_INIT(¶ms, p); + + *isfunc = false; + + if (parser->tok == '(') { + *isfunc = true; + while (true) { + ast_value *param; + bool dummy; + + if (!parser_next(parser)) { + MEM_VECTOR_CLEAR(¶ms, p); + return NULL; + } + + if (parser->tok == ')') + break; + + temptype = parser_token(parser)->constval.t; + if (!parser_next(parser)) { + MEM_VECTOR_CLEAR(¶ms, p); + return NULL; + } + param = parser_parse_type(parser, temptype, &dummy); + (void)dummy; + + if (!param) { + MEM_VECTOR_CLEAR(¶ms, p); + return NULL; + } + + if (!paramlist_t_p_add(¶ms, param)) { + MEM_VECTOR_CLEAR(¶ms, p); + parseerror(parser, "Out of memory while parsing typename"); + return NULL; + } + + if (parser->tok == ',') + continue; + if (parser->tok == ')') + break; + MEM_VECTOR_CLEAR(¶ms, p); + parseerror(parser, "Unexpected token"); + return NULL; + } + if (!parser_next(parser)) { + MEM_VECTOR_CLEAR(¶ms, p); + return NULL; + } + } + + var = ast_value_new(ctx, "", vtype); + if (!var) { + MEM_VECTOR_CLEAR(¶ms, p); + return NULL; + } + MEM_VECTOR_MOVE(¶ms, p, var, params); + return var; +} + +typedef struct +{ + size_t etype; /* 0 = expression, others are operators */ + int paren; + ast_expression *out; + ast_value *value; /* need to know if we can assign */ + lex_ctx ctx; +} sy_elem; +typedef struct +{ + MEM_VECTOR_MAKE(sy_elem, out); + MEM_VECTOR_MAKE(sy_elem, ops); +} shunt; +MEM_VEC_FUNCTIONS(shunt, sy_elem, out) +MEM_VEC_FUNCTIONS(shunt, sy_elem, ops) + +static sy_elem syexp(lex_ctx ctx, ast_expression *v) { + sy_elem e; + e.etype = 0; + e.out = v; + e.value = NULL; + e.ctx = ctx; + e.paren = 0; + return e; +} +static sy_elem syval(lex_ctx ctx, ast_value *v) { + sy_elem e; + e.etype = 0; + e.out = (ast_expression*)v; + e.value = v; + e.ctx = ctx; + e.paren = 0; + return e; +} + +static sy_elem syop(lex_ctx ctx, const oper_info *op) { + sy_elem e; + e.etype = 1 + (op - operators); + e.out = NULL; + e.value = NULL; + e.ctx = ctx; + e.paren = 0; + return e; +} + +static sy_elem syparen(lex_ctx ctx, int p) { + sy_elem e; + e.etype = 0; + e.out = NULL; + e.value = NULL; + e.ctx = ctx; + e.paren = p; + return e; +} + +static bool parser_sy_pop(parser_t *parser, shunt *sy) +{ + const oper_info *op; + lex_ctx ctx; + ast_expression *out = NULL; + ast_expression *exprs[3]; + ast_value *vars[3]; + size_t i; + + if (!sy->ops_count) { + parseerror(parser, "internal error: missing operator"); + return false; + } + + if (sy->ops[sy->ops_count-1].paren) { + parseerror(parser, "unmatched parenthesis"); + return false; + } + + op = &operators[sy->ops[sy->ops_count-1].etype - 1]; + ctx = sy->ops[sy->ops_count-1].ctx; + + if (sy->out_count < op->operands) { + parseerror(parser, "internal error: not enough operands: %i", sy->out_count); + return false; + } + + sy->ops_count--; + + sy->out_count -= op->operands; + for (i = 0; i < op->operands; ++i) { + exprs[i] = sy->out[sy->out_count+i].out; + vars[i] = sy->out[sy->out_count+i].value; + } + + printf("Applying operator %s\n", op->op); + switch (op->id) + { + default: + parseerror(parser, "internal error: unhandled operand"); + return false; + + case opid1('+'): + if (exprs[0]->expression.vtype != exprs[1]->expression.vtype) { + parseerror(parser, "Cannot add type %s and %s", + type_name[exprs[0]->expression.vtype], + type_name[exprs[1]->expression.vtype]); + return false; + } + switch (exprs[0]->expression.vtype) { + case TYPE_FLOAT: + out = (ast_expression*)ast_binary_new(ctx, INSTR_ADD_F, exprs[0], exprs[1]); + break; + case TYPE_VECTOR: + out = (ast_expression*)ast_binary_new(ctx, INSTR_ADD_V, exprs[0], exprs[1]); + break; + default: + parseerror(parser, "Cannot add type %s and %s", + type_name[exprs[0]->expression.vtype], + type_name[exprs[1]->expression.vtype]); + return false; + }; + break; + case opid1('-'): + if (exprs[0]->expression.vtype != exprs[1]->expression.vtype) { + parseerror(parser, "Cannot subtract type %s from %s", + type_name[exprs[1]->expression.vtype], + type_name[exprs[0]->expression.vtype]); + return false; + } + switch (exprs[0]->expression.vtype) { + case TYPE_FLOAT: + out = (ast_expression*)ast_binary_new(ctx, INSTR_SUB_F, exprs[0], exprs[1]); + break; + case TYPE_VECTOR: + out = (ast_expression*)ast_binary_new(ctx, INSTR_SUB_V, exprs[0], exprs[1]); + break; + default: + parseerror(parser, "Cannot add type %s from %s", + type_name[exprs[1]->expression.vtype], + type_name[exprs[0]->expression.vtype]); + return false; + }; + break; + case opid1('*'): + if (exprs[0]->expression.vtype != exprs[1]->expression.vtype && + exprs[0]->expression.vtype != TYPE_VECTOR && + exprs[0]->expression.vtype != TYPE_FLOAT && + exprs[1]->expression.vtype != TYPE_VECTOR && + exprs[1]->expression.vtype != TYPE_FLOAT) + { + parseerror(parser, "Cannot multiply type %s from %s", + type_name[exprs[1]->expression.vtype], + type_name[exprs[0]->expression.vtype]); + return false; + } + switch (exprs[0]->expression.vtype) { + case TYPE_FLOAT: + if (exprs[1]->expression.vtype == TYPE_VECTOR) + out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_FV, exprs[0], exprs[1]); + else + out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_F, exprs[0], exprs[1]); + break; + case TYPE_VECTOR: + if (exprs[1]->expression.vtype == TYPE_FLOAT) + out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_VF, exprs[0], exprs[1]); + else + out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_V, exprs[0], exprs[1]); + break; + default: + parseerror(parser, "Cannot add type %s from %s", + type_name[exprs[1]->expression.vtype], + type_name[exprs[0]->expression.vtype]); + return false; + }; + break; + case opid1('/'): + if (exprs[0]->expression.vtype != exprs[1]->expression.vtype || + exprs[0]->expression.vtype != TYPE_FLOAT) + { + parseerror(parser, "Cannot divide types %s and %s", + type_name[exprs[0]->expression.vtype], + type_name[exprs[1]->expression.vtype]); + return false; + } + out = (ast_expression*)ast_binary_new(ctx, INSTR_DIV_F, exprs[0], exprs[1]); + break; + + + case opid1('='): + if (!vars[0]) { + parseerror(parser, "Cannot assign to non-variable"); + return false; + } + out = (ast_expression*)ast_store_new(ctx, + type_store_instr[vars[0]->expression.vtype], + vars[0], exprs[1]); + break; + } + + if (!out) { + parseerror(parser, "failed to apply operand %s", op->op); + return false; + } + + sy->out[sy->out_count++] = syexp(ctx, out); + return true; +} + +static ast_expression* parser_expression(parser_t *parser) +{ + ast_expression *expr = NULL; + shunt sy; + bool wantop = false; + + MEM_VECTOR_INIT(&sy, out); + MEM_VECTOR_INIT(&sy, ops); + + while (true) + { + if (!wantop) + { + bool nextwant = true; + if (parser->tok == TOKEN_IDENT) + { + /* variable */ + ast_value *var = parser_find_var(parser, parser_tokval(parser)); + if (!var) { + parseerror(parser, "unexpected ident: %s", parser_tokval(parser)); + goto onerr; + } + if (!shunt_out_add(&sy, syval(parser_ctx(parser), var))) { + parseerror(parser, "out of memory"); + goto onerr; + } + printf("Added: %s\n", var->name); + } + else if (parser->tok == TOKEN_FLOATCONST) { + ast_value *val = parser_const_float(parser, (parser_token(parser)->constval.f)); + if (!val) + return false; + if (!shunt_out_add(&sy, syexp(parser_ctx(parser), (ast_expression*)val))) { + parseerror(parser, "out of memory"); + goto onerr; + } + } + else if (parser->tok == TOKEN_INTCONST) { + ast_value *val = parser_const_float(parser, (double)(parser_token(parser)->constval.i)); + if (!val) + return false; + if (!shunt_out_add(&sy, syexp(parser_ctx(parser), (ast_expression*)val))) { + parseerror(parser, "out of memory"); + goto onerr; + } + printf("Added: %i\n", parser_token(parser)->constval.i); + } + else if (parser->tok == '(') { + nextwant = false; /* not expecting an operator next */ + if (!shunt_ops_add(&sy, syparen(parser_ctx(parser), 1))) { + parseerror(parser, "out of memory"); + goto onerr; + } + } + else { + /* TODO: prefix operators */ + parseerror(parser, "expected statement"); + goto onerr; + } + wantop = nextwant; + parser->lex->flags.noops = !wantop; + } else { + if (parser->tok == ')') { + /* we do expect an operator next */ + /* closing an opening paren */ + printf("Applying closing paren\n"); + if (!sy.ops_count) { + parseerror(parser, "unmatched closing paren"); + goto onerr; + } + if (sy.ops[sy.ops_count-1].paren == 1) { + parseerror(parser, "empty parenthesis expression"); + goto onerr; + } + while (sy.ops_count) { + if (sy.ops[sy.ops_count-1].paren == 1) { + sy.ops_count--; + break; + } + if (!parser_sy_pop(parser, &sy)) + goto onerr; + } + } + else if (parser->tok != TOKEN_OPERATOR) { + parseerror(parser, "expected operator or end of statement"); + goto onerr; + } + else { + /* classify the operator */ + /* TODO: suffix operators */ + const oper_info *op; + const oper_info *olast = NULL; + size_t o; + for (o = 0; o < operator_count; ++o) { + if (!(operators[o].flags & OP_PREFIX) && + !(operators[o].flags & OP_SUFFIX) && /* remove this */ + !strcmp(parser_tokval(parser), operators[o].op)) + { + break; + } + } + if (o == operator_count) { + /* no operator found... must be the end of the statement */ + break; + } + /* found an operator */ + op = &operators[o]; + + if (sy.ops_count && !sy.ops[sy.ops_count-1].paren) + olast = &operators[sy.ops[sy.ops_count-1].etype-1]; + + while (olast && ( + (op->prec < olast->prec) || + (op->assoc == ASSOC_LEFT && op->prec <= olast->prec) ) ) + { + if (!parser_sy_pop(parser, &sy)) + goto onerr; + if (sy.ops_count && !sy.ops[sy.ops_count-1].paren) + olast = &operators[sy.ops[sy.ops_count-1].etype-1]; + } + + if (!shunt_ops_add(&sy, syop(parser_ctx(parser), op))) + goto onerr; + printf("Added op %s\n", op->op); + } + wantop = false; + parser->lex->flags.noops = true; + } + if (!parser_next(parser)) { + goto onerr; + } + if (parser->tok == ';') { + printf("End of statement\n"); + break; + } + } + if (!parser_next(parser)) { + parseerror(parser, "Unexpected end of file"); + goto onerr; + } + + while (sy.ops_count) { + if (!parser_sy_pop(parser, &sy)) + goto onerr; + } + + parser->lex->flags.noops = true; + if (!sy.out_count) { + parseerror(parser, "empty expression"); + expr = NULL; + } else + expr = sy.out[0].out; + MEM_VECTOR_CLEAR(&sy, out); + MEM_VECTOR_CLEAR(&sy, ops); + return expr; + +onerr: + parser->lex->flags.noops = true; + MEM_VECTOR_CLEAR(&sy, out); + MEM_VECTOR_CLEAR(&sy, ops); + return NULL; +} + +static bool parser_variable(parser_t *parser, ast_block *localblock); +static bool parser_body_do(parser_t *parser, ast_block *block) +{ + if (parser->tok == TOKEN_TYPENAME) + { + /* local variable */ + if (!parser_variable(parser, block)) + return false; + return true; + } + else if (parser->tok == '{') + { + /* a block */ + parseerror(parser, "TODO: inner blocks"); + return false; + } + else + { + ast_expression *exp = parser_expression(parser); + if (!exp) + return false; + if (!ast_block_exprs_add(block, exp)) + return false; + return true; + } +} + +static ast_block* parser_parse_block(parser_t *parser) +{ + size_t oldblocklocal; + ast_block *block = NULL; + + oldblocklocal = parser->blocklocal; + parser->blocklocal = parser->locals_count; + + if (!parser_next(parser)) { /* skip the '{' */ + parseerror(parser, "expected function body"); + goto cleanup; + } + + block = ast_block_new(parser_ctx(parser)); + + while (parser->tok != TOKEN_EOF && parser->tok < TOKEN_ERROR) + { + if (parser->tok == '}') + break; + + if (!parser_body_do(parser, block)) { + ast_block_delete(block); + block = NULL; + goto cleanup; + } + } + + if (parser->tok != '}') { + ast_block_delete(block); + block = NULL; + } else { + (void)parser_next(parser); + } + +cleanup: + parser->blocklocal = oldblocklocal; + return block; +} + +static bool parser_variable(parser_t *parser, ast_block *localblock) +{ + bool isfunc = false; + ast_function *func = NULL; + lex_ctx ctx; + ast_value *var; + + int basetype = parser_token(parser)->constval.t; + + while (true) + { + if (!parser_next(parser)) { /* skip basetype or comma */ + parseerror(parser, "expected variable declaration"); + return false; + } + + isfunc = false; + func = NULL; + ctx = parser_ctx(parser); + var = parser_parse_type(parser, basetype, &isfunc); + + if (!var) + return false; + + if (parser->tok != TOKEN_IDENT) { + parseerror(parser, "expected variable name\n"); + return false; + } + + if (!localblock && parser_find_global(parser, parser_tokval(parser))) { + ast_value_delete(var); + parseerror(parser, "global already exists: %s\n", parser_tokval(parser)); + return false; + } + + if (localblock && parser_find_local(parser, parser_tokval(parser), parser->blocklocal)) { + ast_value_delete(var); + parseerror(parser, "local variable already exists: %s\n", parser_tokval(parser)); + return false; + } + + if (!ast_value_set_name(var, parser_tokval(parser))) { + parseerror(parser, "failed to set variable name\n"); + ast_value_delete(var); + return false; + } + + if (isfunc) { + /* a function was defined */ + ast_value *fval; + + /* turn var into a value of TYPE_FUNCTION, with the old var + * as return type + */ + fval = ast_value_new(ctx, var->name, TYPE_FUNCTION); + func = ast_function_new(ctx, var->name, fval); + if (!fval || !func) { + ast_value_delete(var); + if (fval) ast_value_delete(fval); + if (func) ast_function_delete(func); + return false; + } + + fval->expression.next = (ast_expression*)var; + MEM_VECTOR_MOVE(var, params, fval, params); + + if (!parser_t_functions_add(parser, func)) { + ast_value_delete(var); + if (fval) ast_value_delete(fval); + if (func) ast_function_delete(func); + return false; + } + + var = fval; + } + + if ( (!localblock && !parser_t_globals_add(parser, var)) || + ( localblock && !parser_t_locals_add(parser, var)) ) + { + ast_value_delete(var); + return false; + } + if (localblock && !ast_block_locals_add(localblock, var)) + { + parser->locals_count--; + ast_value_delete(var); + return false; + } + + if (!parser_next(parser)) { + ast_value_delete(var); + return false; + } + + if (parser->tok == ';') { + if (!parser_next(parser)) + return parser->tok == TOKEN_EOF; + return true; + } + + if (parser->tok == ',') { + /* another var */ + continue; + } + + if (parser->tok != '=') { + parseerror(parser, "expected '=' or ';'"); + return false; + } + + if (!parser_next(parser)) + return false; + + if (parser->tok == '#') { + if (localblock) { + parseerror(parser, "cannot declare builtins within functions"); + return false; + } + if (!isfunc || !func) { + parseerror(parser, "unexpected builtin number, '%s' is not a function", var->name); + return false; + } + if (!parser_next(parser)) { + parseerror(parser, "expected builtin number"); + return false; + } + if (parser->tok != TOKEN_INTCONST) { + parseerror(parser, "builtin number must be an integer constant"); + return false; + } + if (parser_token(parser)->constval.i <= 0) { + parseerror(parser, "builtin number must be positive integer greater than zero"); + return false; + } + + func->builtin = -parser_token(parser)->constval.i; + } else if (parser->tok == '{') { + /* function body */ + ast_block *block; + ast_function *old = parser->function; + + if (localblock) { + parseerror(parser, "cannot declare functions within functions"); + return false; + } + + parser->function = func; + block = parser_parse_block(parser); + parser->function = old; + + if (!block) + return false; + + if (!ast_function_blocks_add(func, block)) { + ast_block_delete(block); + return false; + } + return true; + } else { + parseerror(parser, "TODO, const assignment"); + } + + if (!parser_next(parser)) + return false; + + if (parser->tok == ',') { + /* another */ + continue; + } + + if (parser->tok != ';') { + parseerror(parser, "expected semicolon"); + return false; + } + + (void)parser_next(parser); + + return true; + } +} + +static bool parser_do(parser_t *parser) +{ + if (parser->tok == TOKEN_TYPENAME) + { + return parser_variable(parser, NULL); + } + else if (parser->tok == TOKEN_KEYWORD) + { + /* handle 'var' and 'const' */ + return false; + } + else if (parser->tok == '.') + { + /* entity-member declaration */ + return false; + } + else + { + parseerror(parser, "unexpected token: %s", parser->lex->tok->value); + return false; + } + return true; +} + +bool parser_compile(const char *filename) +{ + size_t i; + parser_t *parser; + ir_builder *ir; + + parser = (parser_t*)mem_a(sizeof(parser_t)); + if (!parser) + return false; + + memset(parser, 0, sizeof(parser)); + + MEM_VECTOR_INIT(parser, globals); + MEM_VECTOR_INIT(parser, locals); + parser->lex = lex_open(filename); + + if (!parser->lex) { + printf("failed to open file \"%s\"\n", filename); + return false; + } + + /* initial lexer/parser state */ + parser->lex->flags.noops = true; + + if (parser_next(parser)) + { + while (parser->tok != TOKEN_EOF && parser->tok < TOKEN_ERROR) + { + if (!parser_do(parser)) { + if (parser->tok == TOKEN_EOF) + parseerror(parser, "unexpected eof"); + else + parseerror(parser, "parse error\n"); + lex_close(parser->lex); + mem_d(parser); + return false; + } + } + } + + lex_close(parser->lex); + + ir = ir_builder_new("gmqcc_out"); + if (!ir) { + printf("failed to allocate builder\n"); + goto cleanup; + } + + for (i = 0; i < parser->imm_float_count; ++i) { + if (!ast_global_codegen(parser->imm_float[i], ir)) { + printf("failed to generate global %s\n", parser->imm_float[i]->name); + } + } + for (i = 0; i < parser->globals_count; ++i) { + if (!ast_global_codegen(parser->globals[i], ir)) { + printf("failed to generate global %s\n", parser->globals[i]->name); + } + } + for (i = 0; i < parser->functions_count; ++i) { + if (!ast_function_codegen(parser->functions[i], ir)) { + printf("failed to generate function %s\n", parser->functions[i]->name); + } + if (!ir_function_finalize(parser->functions[i]->ir_func)) { + printf("failed to finalize function %s\n", parser->functions[i]->name); + } + } + + ir_builder_dump(ir, printf); + +cleanup: + for (i = 0; i < parser->globals_count; ++i) { + ast_value_delete(parser->globals[i]); + } + MEM_VECTOR_CLEAR(parser, globals); + + mem_d(parser); + return true; +}