parse.c

   1 /*
   2  * Copyright (C) 2012
   3  *      Dale Weiler
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include <limits.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include "gmqcc.h"
  28
  29 /* compile-time constant for constants */
  30 typedef struct {
  31         char *name;
  32         int   type;
  33         float value[3];
  34         char *string; /* string value if constant is string literal */
  35 } constant;
  36 VECTOR_MAKE(constant, compile_constants);
  37
  38 /*
  39  * These are not lexical tokens:  These are parse tree types.  Most people
  40  * perform tokenizing on language punctuation which is wrong.  That stuff
  41  * is technically already tokenized, it just needs to be parsed into a tree
  42  */
  43 #define PARSE_TYPE_DO       0
  44 #define PARSE_TYPE_ELSE     1
  45 #define PARSE_TYPE_IF       2
  46 #define PARSE_TYPE_WHILE    3
  47 #define PARSE_TYPE_BREAK    4
  48 #define PARSE_TYPE_CONTINUE 5
  49 #define PARSE_TYPE_RETURN   6
  50 #define PARSE_TYPE_GOTO     7
  51 #define PARSE_TYPE_FOR      8
  52 #define PARSE_TYPE_VOID     9
  53 #define PARSE_TYPE_STRING   10
  54 #define PARSE_TYPE_FLOAT    11
  55 #define PARSE_TYPE_VECTOR   12
  56 #define PARSE_TYPE_ENTITY   13
  57 #define PARSE_TYPE_LAND     14
  58 #define PARSE_TYPE_LOR      15
  59 #define PARSE_TYPE_LTEQ     16
  60 #define PARSE_TYPE_GTEQ     17
  61 #define PARSE_TYPE_EQEQ     18
  62 #define PARSE_TYPE_LNEQ     19
  63 #define PARSE_TYPE_COMMA    20
  64 #define PARSE_TYPE_LNOT     21
  65 #define PARSE_TYPE_STAR     22
  66 #define PARSE_TYPE_DIVIDE   23
  67 #define PARSE_TYPE_LPARTH   24
  68 #define PARSE_TYPE_RPARTH   25
  69 #define PARSE_TYPE_MINUS    26
  70 #define PARSE_TYPE_ADD      27
  71 #define PARSE_TYPE_EQUAL    28
  72 #define PARSE_TYPE_LBS      29
  73 #define PARSE_TYPE_RBS      30
  74 #define PARSE_TYPE_ELIP     31
  75 #define PARSE_TYPE_DOT      32
  76 #define PARSE_TYPE_LT       33
  77 #define PARSE_TYPE_GT       34
  78 #define PARSE_TYPE_BAND     35
  79 #define PARSE_TYPE_BOR      36
  80 #define PARSE_TYPE_DONE     37
  81 #define PARSE_TYPE_IDENT    38
  82
  83 /*
  84  * Adds a parse type to the parse tree, this is where all the hard
  85  * work actually begins.
  86  */
  87 #define PARSE_TREE_ADD(X)                                        \
  88     do {                                                         \
  89         parsetree->next       = mem_a(sizeof(struct parsenode)); \
  90         parsetree->next->next = NULL;                            \
  91         parsetree->next->type = (X);                             \
  92         parsetree             = parsetree->next;                 \
  93     } while (0)
  94 #define STORE(X) { \
  95         printf(X);     \
  96         break;         \
  97 }
  98
  99 void parse_debug(struct parsenode *tree) {
 100         long fill = 0;
 101         while (tree) {
 102                 switch (tree->type) {
 103                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n");
 104                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n");
 105                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n");
 106                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n");
 107                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n");
 108                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n");
 109                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n");
 110
 111                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n");
 112                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n");
 113                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n");
 114                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n");
 115                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n");
 116
 117                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n");
 118                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n");
 119                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n");
 120                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n");
 121                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n");
 122                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n");
 123
 124                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n");
 125                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n");
 126                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n");
 127                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n");
 128                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n");
 129                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n");
 130
 131                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n");
 132                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n");
 133                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n");
 134                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n");
 135
 136                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n");
 137                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n");
 138                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n");
 139
 140                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n");
 141                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n");
 142
 143                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n");
 144                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n");
 145                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n");
 146                 }
 147                 tree = tree->next;
 148         }
 149 }
 150
 151 /*
 152  * Performs a parse operation:  This is a macro to prevent bugs, if the
 153  * calls to lex_token are'nt exactly enough to feed to the end of the
 154  * actual lexees for the current thing that is being parsed, the state
 155  * of the next iteration in the creation of the parse tree will be wrong
 156  * and everything will fail.
 157  */
 158 #define PARSE_PERFORM(X,C) {     \
 159     token = lex_token(file);     \
 160     { C }                        \
 161     while (token != '\n') {      \
 162         token = lex_token(file); \
 163     }                            \
 164     PARSE_TREE_ADD(X);           \
 165     break;                       \
 166 }
 167
 168 void parse_clear(struct parsenode *tree) {
 169         if (!tree) return;
 170         struct parsenode *temp = NULL;
 171         while (tree != NULL) {
 172                 temp = tree;
 173                 tree = tree->next;
 174                 mem_d (temp);
 175         }
 176
 177         /* free any potential typedefs */
 178         typedef_clear();
 179 }
 180
 181 /*
 182  * Generates a parse tree out of the lexees generated by the lexer.  This
 183  * is where the tree is built.  This is where valid check is performed.
 184  */
 185 int parse_tree(struct lex_file *file) {
 186         struct parsenode *parsetree = NULL;
 187         struct parsenode *parseroot = NULL;
 188
 189         /*
 190          * Allocate memory for our parse tree:
 191          * the parse tree is just a singly linked list which will contain
 192          * all the data for code generation.
 193          */
 194         if (!parseroot) {
 195                 parseroot = mem_a(sizeof(struct parsenode));
 196                 if (!parseroot)
 197                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
 198                 parsetree       = parseroot;
 199                 parsetree->type = -1; /* not a valid type -- root element */
 200         }
 201
 202         int     token = 0;
 203         while ((token = lex_token(file)) != ERROR_LEX      && \
 204                     token                    != ERROR_COMPILER && \
 205                     token                    != ERROR_INTERNAL && \
 206                     token                    != ERROR_PARSE    && \
 207                     token                    != ERROR_PREPRO   && file->length >= 0) {
 208                 switch (token) {
 209                         case TOKEN_TYPEDEF: {
 210                                 char *f; /* from */
 211                                 char *t; /* to   */
 212
 213                                 token = lex_token(file);
 214                                 token = lex_token(file); f = util_strdup(file->lastok);
 215                                 token = lex_token(file);
 216                                 token = lex_token(file); t = util_strdup(file->lastok);
 217
 218                                 typedef_add(f, t);
 219                                 mem_d(f);
 220                                 mem_d(t);
 221
 222                                 token = lex_token(file);
 223                                 if (token == ' ')
 224                                         token = lex_token(file);
 225
 226                                 if (token != ';')
 227                                         error(ERROR_PARSE, "%s:%d Expected a `;` at end of typedef statement\n", file->name, file->line);
 228
 229                                 token = lex_token(file);
 230                                 break;
 231                         }
 232
 233                         case TOKEN_VOID:      PARSE_TREE_ADD(PARSE_TYPE_VOID);   goto fall;
 234                         case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
 235                         case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
 236                         case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
 237                         case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  goto fall;
 238                         {
 239                         fall:;
 240                                 char *name = NULL;
 241                                 int   type = token; /* story copy */
 242
 243                                 /* skip over space */
 244                                 token = lex_token(file);
 245                                 if (token == ' ')
 246                                         token = lex_token(file);
 247
 248                                 /* save name */
 249                                 name = util_strdup(file->lastok);
 250
 251                                 /* skip spaces */
 252                                 token = lex_token(file);
 253                                 if (token == ' ')
 254                                         token = lex_token(file);
 255
 256                                 if (token == ';') {
 257                                         /*
 258                                          * Definitions go to the defs table, they don't have
 259                                          * any sort of data with them yet.
 260                                          */
 261                                 } else if (token == '=') {
 262                                         token = lex_token(file);
 263                                         if (token == ' ')
 264                                                 token = lex_token(file);
 265
 266                                         /* strings are in file->lastok */
 267                                         switch (type) {
 268                                                 case TOKEN_VOID:
 269                                                         return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line);
 270
 271                                                 /* TODO: Validate (end quote), strip quotes for constant add, name constant */
 272                                                 case TOKEN_STRING:
 273                                                         if (*file->lastok != '"')
 274                                                                 error(ERROR_PARSE, "%s:%d Expected a '\"' (quote) for string constant\n", file->name, file->line);
 275                                                         /* add the compile-time constant */
 276                                                         compile_constants_add((constant){
 277                                                                 .name   = util_strdup(name),
 278                                                                 .type   = TYPE_STRING,
 279                                                                 .value  = {0,0,0},
 280                                                                 .string = util_strdup(file->lastok)
 281                                                         });
 282                                                         break;
 283                                                 /* TODO: name constant, old qc vec literals, whitespace fixes, name constant */
 284                                                 case TOKEN_VECTOR: {
 285                                                         float compile_calc_x = 0;
 286                                                         float compile_calc_y = 0;
 287                                                         float compile_calc_z = 0;
 288                                                         int   compile_calc_d = 0; /* dot?        */
 289                                                         int   compile_calc_s = 0; /* sign (-, +) */
 290
 291                                                         char  compile_data[1024];
 292                                                         char *compile_eval = compile_data;
 293
 294                                                         if (token != '{')
 295                                                                 error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line);
 296
 297                                                         /*
 298                                                          * This parses a single vector element: x,y & z.  This will handle all the
 299                                                          * complicated mechanics of a vector, and can be extended as well.  This
 300                                                          * is a rather large macro, and is #undef'd after it's use below.
 301                                                          */
 302                                                         #define PARSE_VEC_ELEMENT(NAME, BIT)                                                                                                                                   \
 303                                                             token = lex_token(file);                                                                                                                                           \
 304                                                             if (token == ' ')                                                                                                                                                  \
 305                                                                 token = lex_token(file);                                                                                                                                       \
 306                                                             if (token == '.')                                                                                                                                                  \
 307                                                                 compile_calc_d = 1;                                                                                                                                            \
 308                                                             if (!isdigit(token) && !compile_calc_d && token != '+' && token != '-')                                                                                            \
 309                                                                 error(ERROR_PARSE,"%s:%d Invalid constant initializer element %c for vector, must be numeric\n", file->name, file->line, NAME);                                \
 310                                                             if (token == '+')                                                                                                                                                  \
 311                                                                 compile_calc_s = '+';                                                                                                                                          \
 312                                                             if (token == '-' && !compile_calc_s)                                                                                                                               \
 313                                                                 compile_calc_s = '-';                                                                                                                                          \
 314                                                             while (isdigit(token) || token == '.' || token == '+' || token == '-') {                                                                                           \
 315                                                                 *compile_eval++ = token;                                                                                                                                       \
 316                                                                 token           = lex_token(file);                                                                                                                             \
 317                                                                 if (token == '.' && compile_calc_d) {                                                                                                                          \
 318                                                                     error(ERROR_PARSE, "%s:%d Invalid constant initializer element %c for vector, must be numeric.\n", file->name, file->line, NAME);                          \
 319                                                                     token = lex_token(file);                                                                                                                                   \
 320                                                                 }                                                                                                                                                              \
 321                                                                 if ((token == '-' || token == '+') && compile_calc_s) {                                                                                                        \
 322                                                                     error(ERROR_PARSE, "%s:%d Invalid constant initializer sign for vector element %c\n", file->name, file->line, NAME);                                       \
 323                                                                     token = lex_token(file);                                                                                                                                   \
 324                                                                 }                                                                                                                                                              \
 325                                                                 else if (token == '.' && !compile_calc_d)                                                                                                                      \
 326                                                                     compile_calc_d = 1;                                                                                                                                        \
 327                                                                 else if (token == '-' && !compile_calc_s)                                                                                                                      \
 328                                                                     compile_calc_s = '-';                                                                                                                                      \
 329                                                                 else if (token == '+' && !compile_calc_s)                                                                                                                      \
 330                                                                     compile_calc_s = '+';                                                                                                                                      \
 331                                                             }                                                                                                                                                                  \
 332                                                             if (token == ' ')                                                                                                                                                  \
 333                                                                 token = lex_token(file);                                                                                                                                       \
 334                                                             if (NAME != 'z') {                                                                                                                                                 \
 335                                                                 if (token != ',' && token != ' ')                                                                                                                              \
 336                                                                     error(ERROR_PARSE, "%s:%d invalid constant initializer element %c for vector (missing spaces, or comma delimited list?)\n", file->name, file->line, NAME); \
 337                                                             } else if (token != '}') {                                                                                                                                         \
 338                                                                 error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line);                                               \
 339                                                             }                                                                                                                                                                  \
 340                                                             compile_calc_##BIT = atof(compile_data);                                                                                                                           \
 341                                                             compile_calc_d = 0;                                                                                                                                                \
 342                                                             compile_calc_s = 0;                                                                                                                                                \
 343                                                             compile_eval   = &compile_data[0];                                                                                                                                 \
 344                                                             memset(compile_data, 0, sizeof(compile_data))
 345
 346                                                         /*
 347                                                          * Parse all elements using the macro above.
 348                                                          * We must undef the macro afterwards.
 349                                                          */
 350                                                         PARSE_VEC_ELEMENT('x', x);
 351                                                         PARSE_VEC_ELEMENT('y', y);
 352                                                         PARSE_VEC_ELEMENT('z', z);
 353                                                         #undef PARSE_VEC_ELEMENT
 354
 355                                                         /* Check for the semi-colon... */
 356                                                         token = lex_token(file);
 357                                                         if (token == ' ')
 358                                                                 token = lex_token(file);
 359                                                         if (token != ';')
 360                                                                 error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line);
 361
 362                                                         /* add the compile-time constant */
 363                                                         compile_constants_add((constant){
 364                                                                 .name   = util_strdup(name),
 365                                                                 .type   = TYPE_VECTOR,
 366                                                                 .value  = {
 367                                                                         [0] = compile_calc_x,
 368                                                                         [1] = compile_calc_y,
 369                                                                         [2] = compile_calc_z
 370                                                                 },
 371                                                                 .string = NULL
 372                                                         });
 373                                                         break;
 374                                                 }
 375
 376                                                 case TOKEN_ENTITY:
 377                                                 case TOKEN_FLOAT: /*TODO: validate, constant generation, name constant */
 378                                                         if (!isdigit(token))
 379                                                                 error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n");
 380                                                         compile_constants_add((constant){
 381                                                                 .name   = util_strdup(name),
 382                                                                 .type   = TOKEN_FLOAT,
 383                                                                 .value  = {0,0,0},
 384                                                                 .string = NULL
 385                                                         });
 386                                                         break;
 387                                         }
 388                                 } else if (token == '(') {
 389                                         printf("FUNCTION ??\n");
 390                                 }
 391                                 mem_d(name);
 392                         }
 393
 394                         /*
 395                          * From here down is all language punctuation:  There is no
 396                          * need to actual create tokens from these because they're already
 397                          * tokenized as these individual tokens (which are in a special area
 398                          * of the ascii table which doesn't conflict with our other tokens
 399                          * which are higer than the ascii table.)
 400                          */
 401                         case '#':
 402                                 token = lex_token(file); /* skip '#' */
 403                                 if (token == ' ')
 404                                         token = lex_token(file);
 405                                 /*
 406                                  * If we make it here we found a directive, the supported
 407                                  * directives so far are #include.
 408                                  */
 409                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
 410                                         /*
 411                                          * We only suport include " ", not <> like in C (why?)
 412                                          * because the latter is silly.
 413                                          */
 414                                         while (*file->lastok != '"' && token != '\n')
 415                                                 token = lex_token(file);
 416                                         if (token == '\n')
 417                                                 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line-1);
 418
 419                                         char            *copy = util_strdup(file->lastok);
 420                                         struct lex_file *next = lex_include(file,   copy);
 421
 422                                         if (!next) {
 423                                                 error(ERROR_INTERNAL, "Include subsystem failure\n");
 424                                                 exit (-1);
 425                                         }
 426                                         parse_tree(next);
 427                                         mem_d     (copy);
 428                                         lex_close (next);
 429                                 }
 430                                 /* skip all tokens to end of directive */
 431                                 while (token != '\n')
 432                                         token = lex_token(file);
 433                                 break;
 434
 435                         case LEX_IDENT:
 436                                 token = lex_token(file);
 437                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
 438                                 break;
 439                 }
 440         }
 441         parse_debug(parseroot);
 442         lex_reset(file);
 443         parse_clear(parseroot);
 444         return 1;
 445 }