parse.c

   1 /*
   2  * Copyright (C) 2012
   3  *      Dale Weiler
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include <limits.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include "gmqcc.h"
  28
  29 /*
  30  * These are not lexical tokens:  These are parse tree types.  Most people
  31  * perform tokenizing on language punctuation which is wrong.  That stuff
  32  * is technically already tokenized, it just needs to be parsed into a tree
  33  */
  34 #define PARSE_TYPE_DO       0
  35 #define PARSE_TYPE_ELSE     1
  36 #define PARSE_TYPE_IF       2
  37 #define PARSE_TYPE_WHILE    3
  38 #define PARSE_TYPE_BREAK    4
  39 #define PARSE_TYPE_CONTINUE 5
  40 #define PARSE_TYPE_RETURN   6
  41 #define PARSE_TYPE_GOTO     7
  42 #define PARSE_TYPE_FOR      8
  43 #define PARSE_TYPE_VOID     9
  44 #define PARSE_TYPE_STRING   10
  45 #define PARSE_TYPE_FLOAT    11
  46 #define PARSE_TYPE_VECTOR   12
  47 #define PARSE_TYPE_ENTITY   13
  48 #define PARSE_TYPE_LAND     14
  49 #define PARSE_TYPE_LOR      15
  50 #define PARSE_TYPE_LTEQ     16
  51 #define PARSE_TYPE_GTEQ     17
  52 #define PARSE_TYPE_EQEQ     18
  53 #define PARSE_TYPE_LNEQ     19
  54 #define PARSE_TYPE_COMMA    20
  55 #define PARSE_TYPE_LNOT     21
  56 #define PARSE_TYPE_STAR     22
  57 #define PARSE_TYPE_DIVIDE   23
  58 #define PARSE_TYPE_LPARTH   24
  59 #define PARSE_TYPE_RPARTH   25
  60 #define PARSE_TYPE_MINUS    26
  61 #define PARSE_TYPE_ADD      27
  62 #define PARSE_TYPE_EQUAL    28
  63 #define PARSE_TYPE_LBS      29
  64 #define PARSE_TYPE_RBS      30
  65 #define PARSE_TYPE_ELIP     31
  66 #define PARSE_TYPE_DOT      32
  67 #define PARSE_TYPE_LT       33
  68 #define PARSE_TYPE_GT       34
  69 #define PARSE_TYPE_BAND     35
  70 #define PARSE_TYPE_BOR      36
  71 #define PARSE_TYPE_DONE     37
  72 #define PARSE_TYPE_IDENT    38
  73
  74 int parse[PARSE_TYPE_IDENT] = {
  75         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  76         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  77         0,0,0,0,0,0,0,0
  78 };
  79
  80 /*
  81  * Adds a parse type to the parse tree, this is where all the hard
  82  * work actually begins.
  83  */
  84 #define PARSE_TREE_ADD(X)                                        \
  85         do {                                                         \
  86                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
  87                 parsetree->next->next = NULL;                            \
  88                 parsetree->next->type = (X);                             \
  89                 parsetree             = parsetree->next;                 \
  90         } while (0)
  91
  92 #define PARSE_TREE_CHK(X,Y,Z)                                        \
  93         do { \
  94                 if(parse[X]) { \
  95                         error(ERROR_PARSE, "Expected %c for %c\n", Y, Z); \
  96                         exit (1); \
  97                 } \
  98         } while (0)
  99
 100 #define PARSE_TREE_PUT(X) do { parse[X] = 1; } while (0)
 101
 102 /*
 103  * This is all the punctuation handled in the parser, these don't
 104  * need tokens, they're already tokens.
 105  */
 106 #if 0
 107         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
 108         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
 109         "." , "<" , ">" , "&" , "|" ,
 110 #endif
 111
 112 #define STORE(X) {  \
 113         printf(X);      \
 114         break;          \
 115 }
 116
 117 void parse_debug(struct parsenode *tree) {
 118         while (tree) {
 119                 switch (tree->type) {
 120                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n");
 121                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n");
 122                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n");
 123                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n");
 124                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n");
 125                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n");
 126                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n");
 127
 128                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n");
 129                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n");
 130                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n");
 131                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n");
 132                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n");
 133
 134                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n");
 135                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n");
 136                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n");
 137                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n");
 138                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n");
 139                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n");
 140
 141                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n");
 142                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n");
 143                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n");
 144                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n");
 145                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n");
 146                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n");
 147
 148                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n");
 149                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n");
 150                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n");
 151                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n");
 152
 153                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n");
 154                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n");
 155                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n");
 156
 157                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n");
 158                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n");
 159
 160                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n");
 161                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n");
 162                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n");
 163
 164                         //case PARSE_TYPE_IDENT:     STORE("IDENT:     ???\n");
 165                 }
 166                 tree = tree->next;
 167         }
 168 }
 169
 170 /*
 171  * Performs a parse operation:  This is a macro to prevent bugs, if the
 172  * calls to lex_token are'nt exactly enough to feed to the end of the
 173  * actual lexees for the current thing that is being parsed, the state
 174  * of the next iteration in the creation of the parse tree will be wrong
 175  * and everything will fail.
 176  */
 177 #define PARSE_PERFORM(X,C) {     \
 178     token = lex_token(file);     \
 179     { C }                        \
 180     while (token != '\n') {      \
 181             token = lex_token(file); \
 182     }                            \
 183     PARSE_TREE_ADD(X);           \
 184     break;                       \
 185 }
 186
 187 void parse_clear(struct parsenode *tree) {
 188         if (!tree) return;
 189         struct parsenode *temp = NULL;
 190         while (tree != NULL) {
 191                 temp = tree;
 192                 tree = tree->next;
 193                 mem_d (temp);
 194         }
 195
 196         /* free any potential typedefs */
 197         typedef_clear();
 198 }
 199
 200 /*
 201  * Generates a parse tree out of the lexees generated by the lexer.  This
 202  * is where the tree is built.  This is where valid check is performed.
 203  */
 204 int parse_tree(struct lex_file *file) {
 205         struct parsenode *parsetree = NULL;
 206         struct parsenode *parseroot = NULL;
 207
 208         /*
 209          * Allocate memory for our parse tree:
 210          * the parse tree is just a singly linked list which will contain
 211          * all the data for code generation.
 212          */
 213         if (!parseroot) {
 214                 parseroot = mem_a(sizeof(struct parsenode));
 215                 if (!parseroot)
 216                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
 217                 parsetree       = parseroot;
 218                 parsetree->type = -1; /* not a valid type -- root element */
 219         }
 220
 221         int     token = 0;
 222         while ((token = lex_token(file)) != ERROR_LEX      && \
 223                     token                    != ERROR_COMPILER && \
 224                     token                    != ERROR_INTERNAL && \
 225                     token                    != ERROR_PARSE    && \
 226                     token                    != ERROR_PREPRO   && file->length >= 0) {
 227                 switch (token) {
 228                         case TOKEN_IF:
 229                                 token = lex_token(file);
 230                                 if (token != '(')
 231                                         error(ERROR_PARSE, "Expected `(` on if statement:\n");
 232                                 PARSE_TREE_ADD(PARSE_TYPE_IF);
 233                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 234                                 PARSE_TREE_CHK(PARSE_TYPE_LPARTH, ')', '(');
 235                                 PARSE_TREE_PUT(PARSE_TYPE_LPARTH);
 236                                 break;
 237                         case TOKEN_ELSE:
 238                                 token = lex_token(file);
 239                                 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
 240                                 break;
 241                         case TOKEN_FOR:
 242                                 //token = lex_token(file);
 243                                 while ((token == ' ' || token == '\n') && file->length >= 0)
 244                                         token = lex_token(file);
 245                                 PARSE_TREE_ADD(PARSE_TYPE_FOR);
 246                                 break;
 247
 248                         /*
 249                          * This is a quick and easy way to do typedefs at parse time
 250                          * all power is in typedef_add(), in typedef.c.  We handle
 251                          * the tokens accordingly here.
 252                          */
 253                         case TOKEN_TYPEDEF: {
 254                                 char *f,*t;
 255
 256                                 token = lex_token(file);
 257                                 token = lex_token(file); f = util_strdup(file->lastok);
 258                                 token = lex_token(file);
 259                                 token = lex_token(file); t = util_strdup(file->lastok);
 260
 261                                 typedef_add(f, t);
 262
 263                                 mem_d(f);
 264                                 mem_d(t);
 265
 266                                 while (token != '\n')
 267                                         token = lex_token(file);
 268                                 break;
 269                         }
 270
 271                         /*
 272                          * Returns are addable as-is, statement checking is during
 273                          * the actual parse tree check.
 274                          */
 275                         case TOKEN_RETURN:
 276                                 token = lex_token(file);
 277                                 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
 278                                 break;
 279                         case TOKEN_CONTINUE:
 280                                 PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
 281                                 break;
 282
 283                         /*
 284                          * DO loops work like so:
 285                          * __do_loop_beg:
 286                          * IFNOT __do_loop_beg#
 287                          *      GOTO __do_loop_end
 288                          * INSTR1
 289                          * INSTR2
 290                          * ......
 291                          * GOTO __do_loop_beg
 292                          */
 293
 294                         case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
 295                         case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
 296                         case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
 297                         case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
 298                         case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
 299                         case TOKEN_STRING:    PARSE_PERFORM(PARSE_TYPE_STRING,  {});
 300                         case TOKEN_FLOAT:     PARSE_PERFORM(PARSE_TYPE_FLOAT,   {});
 301                         case TOKEN_VECTOR:    PARSE_PERFORM(PARSE_TYPE_VECTOR,  {});
 302                         case TOKEN_ENTITY:    PARSE_PERFORM(PARSE_TYPE_ENTITY,  {});
 303
 304                         /*
 305                          * From here down is all language punctuation:  There is no
 306                          * need to actual create tokens from these because they're already
 307                          * tokenized as these individual tokens (which are in a special area
 308                          * of the ascii table which doesn't conflict with our other tokens
 309                          * which are higer than the ascii table.)
 310                          */
 311                         case '#':
 312                                 token = lex_token(file); /* skip '#' */
 313                                 while (isspace(token)) {
 314                                         if (token == '\n') {
 315                                                 return error(ERROR_PARSE, "Expected valid preprocessor directive after `#` %s\n");
 316                                         }
 317                                         token = lex_token(file); /* try again */
 318                                 }
 319                                 /*
 320                                  * If we make it here we found a directive, the supported
 321                                  * directives so far are #include.
 322                                  */
 323                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
 324                                         //lex_include("file");
 325                                         /*
 326                                          * We need to compose a name till we find either a
 327                                          * '"',> or a <, (for includes), if we hit a '\n' then
 328                                          * clearly the person miswrote the include.
 329                                          */
 330                                         while (*file->lastok != '"' && token != '\n')
 331                                                 token = lex_token(file);
 332
 333                                         if (token == '\n')
 334                                                 return error(ERROR_PARSE, "Invalid use of include preprocessor directive: wanted #include \"file.h\"\n");
 335
 336
 337                                         //lex_token(file);
 338                                         //lex_token(file);
 339                                         printf("include: %s\n", file->lastok);
 340                                 }
 341
 342                                 /* skip all tokens to end of directive */
 343                                 while (token != '\n')
 344                                         token = lex_token(file);
 345                                 break;
 346
 347                         case '.':
 348                                 //token = lex_token(file);
 349                                 PARSE_TREE_ADD(PARSE_TYPE_DOT);
 350                                 break;
 351                         case '(':
 352                                 //token = lex_token(file);
 353                                 PARSE_TREE_PUT(PARSE_TYPE_LPARTH);
 354                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 355                                 break;
 356                         case ')':
 357                                 //token = lex_token(file);
 358                                 parse[PARSE_TYPE_LPARTH] = 0;
 359                                 PARSE_TREE_PUT(PARSE_TYPE_RPARTH);
 360                                 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
 361                                 break;
 362
 363                         case '&':                               /* &  */
 364                                 token = lex_token(file);
 365                                 if (token == '&') { /* && */
 366                                         token = lex_token(file);
 367                                         PARSE_TREE_ADD(PARSE_TYPE_LAND);
 368                                         break;
 369                                 }
 370                                 PARSE_TREE_ADD(PARSE_TYPE_BAND);
 371                                 break;
 372                         case '|':                               /* |  */
 373                                 token = lex_token(file);
 374                                 if (token == '|') { /* || */
 375                                         token = lex_token(file);
 376                                         PARSE_TREE_ADD(PARSE_TYPE_LOR);
 377                                         break;
 378                                 }
 379                                 PARSE_TREE_ADD(PARSE_TYPE_BOR);
 380                                 break;
 381                         case '!':                               /* !  */
 382                                 token = lex_token(file);
 383                                 if (token == '=') { /* != */
 384                                         token = lex_token(file);
 385                                         PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
 386                                         break;
 387                                 }
 388                                 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
 389                                 break;
 390                         case '<':                               /* <  */
 391                                 token = lex_token(file);
 392                                 if (token == '=') { /* <= */
 393                                         token = lex_token(file);
 394                                         PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
 395                                         break;
 396                                 }
 397                                 PARSE_TREE_ADD(PARSE_TYPE_LT);
 398                                 break;
 399                         case '>':                               /* >  */
 400                                 token = lex_token(file);
 401                                 if (token == '=') { /* >= */
 402                                         token = lex_token(file);
 403                                         PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
 404                                         break;
 405                                 }
 406                                 PARSE_TREE_ADD(PARSE_TYPE_GT);
 407                                 break;
 408                         case '=':                               /* =  */
 409                                 token = lex_token(file);
 410                                 if (token == '=') { /* == */
 411                                         token = lex_token(file);
 412                                         PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
 413                                         break;
 414                                 }
 415                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
 416                                 break;
 417                         case ';':
 418                                 token = lex_token(file);
 419                                 PARSE_TREE_ADD(PARSE_TYPE_DONE);
 420                                 break;
 421                         case '-':
 422                                 token = lex_token(file);
 423                                 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
 424                                 break;
 425                         case '+':
 426                                 token = lex_token(file);
 427                                 PARSE_TREE_ADD(PARSE_TYPE_ADD);
 428                                 break;
 429                         case '{':
 430                                 token = lex_token(file);
 431                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
 432                                 break;
 433                         case '}':
 434                                 token = lex_token(file);
 435                                 PARSE_TREE_ADD(PARSE_TYPE_RBS);
 436                                 break;
 437
 438                         /*
 439                          * TODO: Fix lexer to spit out ( ) as tokens, it seems the
 440                          * using '(' or ')' in parser doesn't work properly unless
 441                          * there are spaces before them to allow the lexer to properly
 442                          * seperate identifiers. -- otherwise it eats all of it.
 443                          */
 444                         case LEX_IDENT:
 445                                 token = lex_token(file);
 446                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
 447                                 break;
 448                 }
 449         }
 450         parse_debug(parseroot);
 451         lex_reset(file);
 452         parse_clear(parseroot);
 453         return 1;
 454 }