]> git.xonotic.org Git - xonotic/gmqcc.git/blobdiff - parse.c
Removed primitive AST tree generator ... I'm planning a rewrite as we speak.
[xonotic/gmqcc.git] / parse.c
diff --git a/parse.c b/parse.c
index 7320c9e465b011e3f7da57b6b7b261aa78701d38..cc228ed1b9c268970de55f072a89d8c9e0352a5a 100644 (file)
--- a/parse.c
+++ b/parse.c
  */
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
 #include "gmqcc.h"
 
-/*
- * These are not lexical tokens:  These are parse tree types.  Most people
- * perform tokenizing on language punctuation which is wrong.  That stuff
- * is technically already tokenized, it just needs to be parsed into a tree
- */
-#define PARSE_TYPE_DO       0
-#define PARSE_TYPE_ELSE     1
-#define PARSE_TYPE_IF       2
-#define PARSE_TYPE_WHILE    3
-#define PARSE_TYPE_BREAK    4
-#define PARSE_TYPE_CONTINUE 5
-#define PARSE_TYPE_RETURN   6
-#define PARSE_TYPE_GOTO     7
-#define PARSE_TYPE_FOR      8   // extension
-#define PARSE_TYPE_INT      9   // extension
-#define PARSE_TYPE_BOOL     10  // extension
-#define PARSE_TYPE_VOID     11
-#define PARSE_TYPE_STRING   12
-#define PARSE_TYPE_FLOAT    13
-#define PARSE_TYPE_VECTOR   14
-#define PARSE_TYPE_ENTITY   15
-#define PARSE_TYPE_LAND     16
-#define PARSE_TYPE_LOR      17
-#define PARSE_TYPE_LTEQ     18
-#define PARSE_TYPE_GTEQ     19
-#define PARSE_TYPE_EQEQ     20
-#define PARSE_TYPE_LNEQ     21
-#define PARSE_TYPE_COMMA    22
-#define PARSE_TYPE_LNOT     23
-#define PARSE_TYPE_STAR     24
-#define PARSE_TYPE_DIVIDE   25
-#define PARSE_TYPE_LPARTH   26
-#define PARSE_TYPE_RPARTH   27
-#define PARSE_TYPE_MINUS    28
-#define PARSE_TYPE_ADD      29
-#define PARSE_TYPE_EQUAL    30
-#define PARSE_TYPE_LSS      31 // left subscript
-#define PARSE_TYPE_RSS      32
-#define PARSE_TYPE_LBS      33 // left  bracket scope
-#define PARSE_TYPE_RBS      34 // right bracket scope
-#define PARSE_TYPE_ELIP     35 // ...
-#define PARSE_TYPE_DOT      36
-#define PARSE_TYPE_LT       37
-#define PARSE_TYPE_GT       38
-#define PARSE_TYPE_BAND     39
-#define PARSE_TYPE_BOR      40
-#define PARSE_TYPE_DONE     41 // finished statement
-
-/*
- * Adds a parse type to the parse tree, this is where all the hard
- * work actually begins.
- */
-#define PARSE_TREE_ADD(X)                                        \
-       do {                                                         \
-               parsetree->next       = mem_a(sizeof(struct parsenode)); \
-               parsetree->next->next = NULL;                            \
-               parsetree->next->type = (X);                             \
-               parsetree             = parsetree->next;                 \
-       } while (0)
-
-static const char *const parse_punct[] = {
-       "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
-       "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
-       "." , "<" , ">" , "&" , "|" , NULL
-};
-
-#define STORE(X) {     \
-       printf(X);         \
-       break;             \
-}
-
-void parse_debug(struct parsenode *tree) {
-       while (tree && tree->next != NULL) {
-               /* skip blanks */
-               if (tree->type == 0) {
-                       tree = tree->next;
-                       continue;
-               }
-                       
-               switch (tree->type) {
-                       case PARSE_TYPE_ADD:       STORE("ADD    \n");
-                       case PARSE_TYPE_BAND:      STORE("BITAND \n");
-                       case PARSE_TYPE_BOR:       STORE("BITOR  \n");
-                       case PARSE_TYPE_BREAK:     STORE("BREAK  \n");
-                       case PARSE_TYPE_COMMA:     STORE("SEPERATOR\n");
-                       case PARSE_TYPE_CONTINUE:  STORE("CONTINUE\n");
-                       case PARSE_TYPE_DIVIDE:    STORE("DIVIDE\n");
-                       case PARSE_TYPE_EQUAL:     STORE("ASSIGNMENT\n");
-                       case PARSE_TYPE_GOTO:      STORE("GOTO\n");
-                       case PARSE_TYPE_DOT:       STORE("DOT\n");
-
-
-                       case PARSE_TYPE_ELIP:      STORE("DECLTYPE: VALIST\n");
-                       case PARSE_TYPE_ENTITY:    STORE("DECLTYPE: ENTITY\n");
-                       case PARSE_TYPE_INT:       STORE("DECLTYPE: INT\n");
-                       case PARSE_TYPE_FLOAT:     STORE("DECLTYPE: FLOAT\n");
-                       case PARSE_TYPE_BOOL:      STORE("DECLTYPE: BOOL\n");
-                       
-                       case PARSE_TYPE_GT:        STORE("TEST:     GREATER THAN\n");
-                       case PARSE_TYPE_LT:        STORE("TEST:     LESS THAN\n");
-                       case PARSE_TYPE_GTEQ:      STORE("TEST:     GREATER THAN OR EQUAL\n");
-                       case PARSE_TYPE_LTEQ:      STORE("TEST:     LESS THAN OR EQUAL\n");
-                       case PARSE_TYPE_LNEQ:      STORE("TEST:     NOT EQUAL\n");
-                       case PARSE_TYPE_EQEQ:      STORE("TEST:     EQUAL-EQUAL\n");
-                       
-                       case PARSE_TYPE_LBS:       break;
-                       case PARSE_TYPE_RBS:       break;
-                       
-                       case PARSE_TYPE_LAND:      STORE("LOGICAL:  AND\n");
-                       case PARSE_TYPE_LNOT:      STORE("LOGICAL:  NOT\n");
-                       case PARSE_TYPE_LOR:       STORE("LOGICAL:  OR\n");
-                       case PARSE_TYPE_LPARTH:    STORE("PARTH:    END\n");
-                       case PARSE_TYPE_RPARTH:    STORE("PARTH:    BEG\n");
-                       
-                       case PARSE_TYPE_FOR:       STORE("LOOP:     FOR\n");
-                       case PARSE_TYPE_DO:        STORE("LOOP:     DO\n");
-                       case PARSE_TYPE_ELSE:      STORE("BLOCK:    ELSE\n");
-                       case PARSE_TYPE_IF:        STORE("BLOCK:    IF\n");
-               }
-               tree = tree->next;
-       }
-}
+/* compile-time constant for type constants */
+typedef struct {
+       char *name;
+       int   type;
+       float value[3];
+       char *string; /* string value if constant is string literal */
+} constant;
+VECTOR_MAKE(constant, compile_constants);
 
 /*
- * This just skips the token and throws it in the parse tree for later
- * checking / optimization / codegen, it doesn't do anything with it
- * like syntax check for legal use -- like it should as it's a TODO item
- * which is not implemented
+ * Generates a parse tree out of the lexees generated by the lexer.  This
+ * is where the tree is built.  This is where valid check is performed.
  */
-#define PARSE_TODO(X) {       \
-       token = lex_token(file);  \
-       PARSE_TREE_ADD(X);        \
-       break;                    \
-}
-
-int parse(struct lex_file *file) {
-       struct parsenode *parsetree = NULL;
-       struct parsenode *parseroot = NULL;
-       
-       /*
-        * Allocate memory for our parse tree:
-        * the parse tree is just a singly linked list which will contain
-        * all the data for code generation.
-        */
-       if (!parseroot) {
-               parseroot = mem_a(sizeof(struct parsenode));
-               if (!parseroot)
-                       return error(ERROR_INTERNAL, "Ran out of memory", " ");
-               parsetree = parseroot;
-               parsetree = parseroot;
-       }
-       
+int parse_gen(struct lex_file *file) { 
        int     token = 0;
        while ((token = lex_token(file)) != ERROR_LEX      && \
                    token                    != ERROR_COMPILER && \
@@ -183,149 +47,237 @@ int parse(struct lex_file *file) {
                    token                    != ERROR_PARSE    && \
                    token                    != ERROR_PREPRO   && file->length >= 0) {
                switch (token) {
-                       case TOKEN_IF:
+                       case TOKEN_TYPEDEF: {
+                               char *f; /* from */
+                               char *t; /* to   */
+                               
+                               token = lex_token(file); 
+                               token = lex_token(file); f = util_strdup(file->lastok);
+                               token = lex_token(file); 
+                               token = lex_token(file); t = util_strdup(file->lastok);
+                               
+                               typedef_add(f, t);
+                               mem_d(f);
+                               mem_d(t);
+                               
                                token = lex_token(file);
-                               while ((token == ' ' || token == '\n') && file->length >= 0)
+                               if (token == ' ')
                                        token = lex_token(file);
                                        
-                               if (token != '(')
-                                       error(ERROR_PARSE, "Expected `(` after if\n", "");
+                               if (token != ';')
+                                       error(ERROR_PARSE, "%s:%d Expected a `;` at end of typedef statement\n", file->name, file->line);
                                        
-                               PARSE_TREE_ADD(PARSE_TYPE_IF);
-                               break;
-                       case TOKEN_ELSE:
                                token = lex_token(file);
-                               while ((token == ' ' || token == '\n') && file->length >= 0)
-                                       token = lex_token(file);
-                                       
-                               PARSE_TREE_ADD(PARSE_TYPE_ELSE);
                                break;
-                       case TOKEN_FOR:
+                       }
+                       
+                       case TOKEN_VOID:   goto fall;
+                       case TOKEN_STRING: goto fall;
+                       case TOKEN_VECTOR: goto fall;
+                       case TOKEN_ENTITY: goto fall;
+                       case TOKEN_FLOAT:  goto fall;
+                       {
+                       fall:;
+                               char *name = NULL;
+                               int   type = token; /* story copy */
+                               
+                               /* skip over space */
                                token = lex_token(file);
-                               while ((token == ' ' || token == '\n') && file->length >= 0)
+                               if (token == ' ')
                                        token = lex_token(file);
-                                       
-                               PARSE_TREE_ADD(PARSE_TYPE_FOR);
-                               break;
                                
-                       case TOKEN_DO:        PARSE_TODO(PARSE_TYPE_DO);
-                       case TOKEN_WHILE:     PARSE_TODO(PARSE_TYPE_WHILE);
-                       case TOKEN_BREAK:     PARSE_TODO(PARSE_TYPE_BREAK);
-                       case TOKEN_CONTINUE:  PARSE_TODO(PARSE_TYPE_CONTINUE);
-                       case TOKEN_RETURN:    PARSE_TODO(PARSE_TYPE_RETURN);
-                       case TOKEN_GOTO:      PARSE_TODO(PARSE_TYPE_GOTO);
-                       case TOKEN_INT:       PARSE_TODO(PARSE_TYPE_INT);
-                       case TOKEN_VOID:      PARSE_TODO(PARSE_TYPE_VOID);
-                       case TOKEN_STRING:    PARSE_TODO(PARSE_TYPE_STRING);
-                       case TOKEN_FLOAT:     PARSE_TODO(PARSE_TYPE_FLOAT);
-                       case TOKEN_VECTOR:    PARSE_TODO(PARSE_TYPE_VECTOR);
-                       case TOKEN_ENTITY:    PARSE_TODO(PARSE_TYPE_ENTITY);
-                       
-                       /* TODO: Preprocessor */
-                       case '#':
-                               token = lex_token(file);
-                               token = lex_token(file);
-                               token = lex_token(file);
-                               token = lex_token(file);
-                               token = lex_token(file);
+                               /* save name */
+                               name = util_strdup(file->lastok);
+                               
+                               /* skip spaces */
                                token = lex_token(file);
-                               break;
+                               if (token == ' ')
+                                       token = lex_token(file);
+                                       
+                               if (token == ';') {
+                                       /*
+                                        * Definitions go to the defs table, they don't have
+                                        * any sort of data with them yet.
+                                        */
+                               } else if (token == '=') {
+                                       token = lex_token(file);
+                                       if (token == ' ')
+                                               token = lex_token(file);
+                                       
+                                       /* strings are in file->lastok */
+                                       switch (type) {
+                                               case TOKEN_VOID:
+                                                       return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line);
+                                                       
+                                               /* TODO: Validate (end quote), strip quotes for constant add, name constant */
+                                               case TOKEN_STRING:
+                                                       if (*file->lastok != '"')
+                                                               error(ERROR_PARSE, "%s:%d Expected a '\"' (quote) for string constant\n", file->name, file->line);
+                                                       /* add the compile-time constant */
+                                                       compile_constants_add((constant){
+                                                               .name   = util_strdup(name),
+                                                               .type   = TYPE_STRING,
+                                                               .value  = {0,0,0},
+                                                               .string = util_strdup(file->lastok)
+                                                       });
+                                                       break;
+                                               /* TODO: name constant, old qc vec literals, whitespace fixes, name constant */
+                                               case TOKEN_VECTOR: {
+                                                       float compile_calc_x = 0;
+                                                       float compile_calc_y = 0;
+                                                       float compile_calc_z = 0;
+                                                       int   compile_calc_d = 0; /* dot?        */
+                                                       int   compile_calc_s = 0; /* sign (-, +) */
+                                                       
+                                                       char  compile_data[1024];
+                                                       char *compile_eval = compile_data;
+                                                       
+                                                       if (token != '{')
+                                                               error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line);    
+                                                       
+                                                       /*
+                                                        * This parses a single vector element: x,y & z.  This will handle all the
+                                                        * complicated mechanics of a vector, and can be extended as well.  This
+                                                        * is a rather large macro, and is #undef'd after it's use below.
+                                                        */
+                                                       #define PARSE_VEC_ELEMENT(NAME, BIT)                                                                                                                                   \
+                                                           token = lex_token(file);                                                                                                                                           \
+                                                           if (token == ' ')                                                                                                                                                  \
+                                                               token = lex_token(file);                                                                                                                                       \
+                                                           if (token == '.')                                                                                                                                                  \
+                                                               compile_calc_d = 1;                                                                                                                                            \
+                                                           if (!isdigit(token) && !compile_calc_d && token != '+' && token != '-')                                                                                            \
+                                                               error(ERROR_PARSE,"%s:%d Invalid constant initializer element %c for vector, must be numeric\n", file->name, file->line, NAME);                                \
+                                                           if (token == '+')                                                                                                                                                  \
+                                                               compile_calc_s = '+';                                                                                                                                          \
+                                                           if (token == '-' && !compile_calc_s)                                                                                                                               \
+                                                               compile_calc_s = '-';                                                                                                                                          \
+                                                           while (isdigit(token) || token == '.' || token == '+' || token == '-') {                                                                                           \
+                                                               *compile_eval++ = token;                                                                                                                                       \
+                                                               token           = lex_token(file);                                                                                                                             \
+                                                               if (token == '.' && compile_calc_d) {                                                                                                                          \
+                                                                   error(ERROR_PARSE, "%s:%d Invalid constant initializer element %c for vector, must be numeric.\n", file->name, file->line, NAME);                          \
+                                                                   token = lex_token(file);                                                                                                                                   \
+                                                               }                                                                                                                                                              \
+                                                               if ((token == '-' || token == '+') && compile_calc_s) {                                                                                                        \
+                                                                   error(ERROR_PARSE, "%s:%d Invalid constant initializer sign for vector element %c\n", file->name, file->line, NAME);                                       \
+                                                                   token = lex_token(file);                                                                                                                                   \
+                                                               }                                                                                                                                                              \
+                                                               else if (token == '.' && !compile_calc_d)                                                                                                                      \
+                                                                   compile_calc_d = 1;                                                                                                                                        \
+                                                               else if (token == '-' && !compile_calc_s)                                                                                                                      \
+                                                                   compile_calc_s = '-';                                                                                                                                      \
+                                                               else if (token == '+' && !compile_calc_s)                                                                                                                      \
+                                                                   compile_calc_s = '+';                                                                                                                                      \
+                                                           }                                                                                                                                                                  \
+                                                           if (token == ' ')                                                                                                                                                  \
+                                                               token = lex_token(file);                                                                                                                                       \
+                                                           if (NAME != 'z') {                                                                                                                                                 \
+                                                               if (token != ',' && token != ' ')                                                                                                                              \
+                                                                   error(ERROR_PARSE, "%s:%d invalid constant initializer element %c for vector (missing spaces, or comma delimited list?)\n", file->name, file->line, NAME); \
+                                                           } else if (token != '}') {                                                                                                                                         \
+                                                               error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line);                                               \
+                                                           }                                                                                                                                                                  \
+                                                           compile_calc_##BIT = atof(compile_data);                                                                                                                           \
+                                                           compile_calc_d = 0;                                                                                                                                                \
+                                                           compile_calc_s = 0;                                                                                                                                                \
+                                                           compile_eval   = &compile_data[0];                                                                                                                                 \
+                                                           memset(compile_data, 0, sizeof(compile_data))
+                                                       
+                                                       /*
+                                                        * Parse all elements using the macro above.
+                                                        * We must undef the macro afterwards.
+                                                        */
+                                                       PARSE_VEC_ELEMENT('x', x);
+                                                       PARSE_VEC_ELEMENT('y', y);
+                                                       PARSE_VEC_ELEMENT('z', z);
+                                                       #undef PARSE_VEC_ELEMENT
+                                                       
+                                                       /* Check for the semi-colon... */
+                                                       token = lex_token(file);
+                                                       if (token == ' ')
+                                                               token = lex_token(file);
+                                                       if (token != ';')
+                                                               error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line);
+                                                               
+                                                       /* add the compile-time constant */
+                                                       compile_constants_add((constant){
+                                                               .name   = util_strdup(name),
+                                                               .type   = TYPE_VECTOR,
+                                                               .value  = {
+                                                                       [0] = compile_calc_x,
+                                                                       [1] = compile_calc_y,
+                                                                       [2] = compile_calc_z
+                                                               },
+                                                               .string = NULL
+                                                       });
+                                                       break;
+                                               }
+                                                       
+                                               case TOKEN_ENTITY:
+                                               case TOKEN_FLOAT: /*TODO: validate, constant generation, name constant */
+                                                       if (!isdigit(token))
+                                                               error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n");
+                                                       compile_constants_add((constant){
+                                                               .name   = util_strdup(name),
+                                                               .type   = TOKEN_FLOAT,
+                                                               .value  = {0,0,0},
+                                                               .string = NULL
+                                                       });
+                                                       break;
+                                       }
+                               } else if (token == '(') {
+                                       printf("FUNCTION ??\n");
+                               }
+                               mem_d(name);
+                       }
                                
                        /*
                         * From here down is all language punctuation:  There is no
                         * need to actual create tokens from these because they're already
                         * tokenized as these individual tokens (which are in a special area
                         * of the ascii table which doesn't conflict with our other tokens
-                        * which are higer than the ascii table.
+                        * which are higer than the ascii table.)
                         */
-                       case '&':               /* &  */
-                               token = lex_token(file);
-                               if (token == '&') { /* && */
-                                       token = lex_token(file);
-                                       PARSE_TREE_ADD(PARSE_TYPE_LAND);
-                                       goto end;
-                               }
-                               PARSE_TREE_ADD(PARSE_TYPE_BAND);
-                               printf("--> BITWISE AND\n");
-                               break;
-                       case '|':               /* |  */
-                               token = lex_token(file);
-                               if (token == '|') { /* || */
-                                       token = lex_token(file);
-                                       PARSE_TREE_ADD(PARSE_TYPE_LOR);
-                                       goto end;
-                               }
-                               PARSE_TREE_ADD(PARSE_TYPE_BOR);
-                               break;
-                       case '!':
-                               token = lex_token(file);
-                               if (token == '=') { /* != */
-                                       token = lex_token(file);
-                                       PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
-                                       goto end;
-                               }
-                               PARSE_TREE_ADD(PARSE_TYPE_LNOT);
-                               break;
-                       case '<':               /* <  */
-                               token = lex_token(file);
-                               if (token == '=') { /* <= */
-                                       token = lex_token(file);
-                                       PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
-                                       goto end;
-                               }
-                               PARSE_TREE_ADD(PARSE_TYPE_LT);
-                               break;
-                       case '>':               /* >  */
-                               token = lex_token(file);
-                               if (token == '=') { /* >= */
+                       case '#':
+                               token = lex_token(file); /* skip '#' */
+                               if (token == ' ')
                                        token = lex_token(file);
-                                       PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
-                                       goto end;
+                               /*
+                                * If we make it here we found a directive, the supported
+                                * directives so far are #include.
+                                */
+                               if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
+                                       /*
+                                        * We only suport include " ", not <> like in C (why?)
+                                        * because the latter is silly.
+                                        */
+                                       while (*file->lastok != '"' && token != '\n')
+                                               token = lex_token(file);
+                                       if (token == '\n')
+                                               return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line-1);
+                                               
+                                       char            *copy = util_strdup(file->lastok);
+                                       struct lex_file *next = lex_include(file,   copy);
+                                       
+                                       if (!next) {
+                                               error(ERROR_INTERNAL, "Include subsystem failure\n");
+                                               exit (-1);
+                                       }
+                                       parse_gen(next);
+                                       mem_d    (copy);
+                                       lex_close(next);
                                }
-                               PARSE_TREE_ADD(PARSE_TYPE_GT);
-                               break;
-                       case '=':
-                               token = lex_token(file);
-                               if (token == '=') { /* == */
+                               /* skip all tokens to end of directive */
+                               while (token != '\n')
                                        token = lex_token(file);
-                                       PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
-                                       goto end;
-                               }
-                               PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
-                               break;
-                       case ';':
-                               token = lex_token(file);
-                               PARSE_TREE_ADD(PARSE_TYPE_DONE);
-                               break;
-                       case '-':
-                               token = lex_token(file);
-                               PARSE_TREE_ADD(PARSE_TYPE_MINUS);
                                break;
-                       case '+':
-                               token = lex_token(file);
-                               PARSE_TREE_ADD(PARSE_TYPE_ADD);
-                               break;
-                       case '(':
-                               token = lex_token(file);
-                               PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
-                               break;
-                       case ')':
-                               token = lex_token(file);
-                               PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
-                               break;
-                       case '{':
-                               token = lex_token(file);
-                               PARSE_TREE_ADD(PARSE_TYPE_LBS);
-                               break;
-                       case '}':
+                               
+                       case LEX_IDENT:
                                token = lex_token(file);
-                               PARSE_TREE_ADD(PARSE_TYPE_RBS);
                                break;
                }
-               end:;
        }
-       parse_debug(parseroot);
        lex_reset(file);
-       
        return 1;
 }