parse.c

   1 /*
   2  * Copyright (C) 2012
   3  *      Dale Weiler
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include <limits.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include "gmqcc.h"
  28
  29 /*
  30  * These are not lexical tokens:  These are parse tree types.  Most people
  31  * perform tokenizing on language punctuation which is wrong.  That stuff
  32  * is technically already tokenized, it just needs to be parsed into a tree
  33  */
  34 #define PARSE_TYPE_DO       0
  35 #define PARSE_TYPE_ELSE     1
  36 #define PARSE_TYPE_IF       2
  37 #define PARSE_TYPE_WHILE    3
  38 #define PARSE_TYPE_BREAK    4
  39 #define PARSE_TYPE_CONTINUE 5
  40 #define PARSE_TYPE_RETURN   6
  41 #define PARSE_TYPE_GOTO     7
  42 #define PARSE_TYPE_FOR      8
  43 #define PARSE_TYPE_VOID     9
  44 #define PARSE_TYPE_STRING   10
  45 #define PARSE_TYPE_FLOAT    11
  46 #define PARSE_TYPE_VECTOR   12
  47 #define PARSE_TYPE_ENTITY   13
  48 #define PARSE_TYPE_LAND     14
  49 #define PARSE_TYPE_LOR      15
  50 #define PARSE_TYPE_LTEQ     16
  51 #define PARSE_TYPE_GTEQ     17
  52 #define PARSE_TYPE_EQEQ     18
  53 #define PARSE_TYPE_LNEQ     19
  54 #define PARSE_TYPE_COMMA    20
  55 #define PARSE_TYPE_LNOT     21
  56 #define PARSE_TYPE_STAR     22
  57 #define PARSE_TYPE_DIVIDE   23
  58 #define PARSE_TYPE_LPARTH   24
  59 #define PARSE_TYPE_RPARTH   25
  60 #define PARSE_TYPE_MINUS    26
  61 #define PARSE_TYPE_ADD      27
  62 #define PARSE_TYPE_EQUAL    28
  63 #define PARSE_TYPE_LBS      29
  64 #define PARSE_TYPE_RBS      30
  65 #define PARSE_TYPE_ELIP     31
  66 #define PARSE_TYPE_DOT      32
  67 #define PARSE_TYPE_LT       33
  68 #define PARSE_TYPE_GT       34
  69 #define PARSE_TYPE_BAND     35
  70 #define PARSE_TYPE_BOR      36
  71 #define PARSE_TYPE_DONE     37
  72 #define PARSE_TYPE_IDENT    38
  73
  74 /*
  75  * Adds a parse type to the parse tree, this is where all the hard
  76  * work actually begins.
  77  */
  78 #define PARSE_TREE_ADD(X)                                        \
  79         do {                                                         \
  80                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
  81                 parsetree->next->next = NULL;                            \
  82                 parsetree->next->type = (X);                             \
  83                 parsetree             = parsetree->next;                 \
  84         } while (0)
  85
  86 /*
  87  * This is all the punctuation handled in the parser, these don't
  88  * need tokens, they're already tokens.
  89  */
  90 #if 0
  91         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
  92         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
  93         "." , "<" , ">" , "&" , "|" ,
  94 #endif
  95
  96 #define STORE(X,C) {  \
  97     long f = fill;    \
  98     while(f--) {      \
  99       putchar(' ');   \
 100     }                 \
 101     fill C;           \
 102         printf(X);        \
 103         break;            \
 104 }
 105
 106 void parse_debug(struct parsenode *tree) {
 107         long fill = 0;
 108         while (tree) {
 109                 switch (tree->type) {
 110                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n", -=0);
 111                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n",-=0);
 112                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n",-=0);
 113                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n",-=0);
 114                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n",-=0);
 115                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n",-=0);
 116                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n",-=0);
 117
 118                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n",-=0);
 119                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n",-=0);
 120                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n",-=0);
 121                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n",-=0);
 122                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n",-=0);
 123
 124                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n",-=0);
 125                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n",-=0);
 126                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n",-=0);
 127                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n",-=0);
 128                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n",-=0);
 129                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n",-=0);
 130
 131                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n",-=0);
 132                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n",-=0);
 133                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n",-=0);
 134                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n",-=0);
 135                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n",-=0);
 136                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n",-=0);
 137
 138                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n",+=4);
 139                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n",-=4);
 140                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n",+=0);
 141                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n",+=0);
 142
 143                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n",-=0);
 144                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n",-=0);
 145                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n",-=0);
 146
 147                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n",-=0);
 148                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n",-=0);
 149
 150                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n",-=0);
 151                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n",-=0);
 152                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n",-=0);
 153                 }
 154                 tree = tree->next;
 155         }
 156 }
 157
 158 /*
 159  * Performs a parse operation:  This is a macro to prevent bugs, if the
 160  * calls to lex_token are'nt exactly enough to feed to the end of the
 161  * actual lexees for the current thing that is being parsed, the state
 162  * of the next iteration in the creation of the parse tree will be wrong
 163  * and everything will fail.
 164  */
 165 #define PARSE_PERFORM(X,C) {     \
 166     token = lex_token(file);     \
 167     { C }                        \
 168     while (token != '\n') {      \
 169             token = lex_token(file); \
 170     }                            \
 171     PARSE_TREE_ADD(X);           \
 172     break;                       \
 173 }
 174
 175 void parse_clear(struct parsenode *tree) {
 176         if (!tree) return;
 177         struct parsenode *temp = NULL;
 178         while (tree != NULL) {
 179                 temp = tree;
 180                 tree = tree->next;
 181                 mem_d (temp);
 182         }
 183
 184         /* free any potential typedefs */
 185         typedef_clear();
 186 }
 187
 188 const char *STRING_(char ch) {
 189         if (ch == ' ')
 190                 return "<space>";
 191         if (ch == '\n')
 192                 return "<newline>";
 193         if (ch == '\0')
 194                 return "<null>";
 195
 196         return &ch;
 197 }
 198
 199 #define TOKEN_SKIPWHITE()        \
 200         token = lex_token(file);     \
 201         while (token == ' ') {       \
 202                 token = lex_token(file); \
 203         }
 204
 205 /*
 206  * Generates a parse tree out of the lexees generated by the lexer.  This
 207  * is where the tree is built.  This is where valid check is performed.
 208  */
 209 int parse_tree(struct lex_file *file) {
 210         struct parsenode *parsetree = NULL;
 211         struct parsenode *parseroot = NULL;
 212
 213         /*
 214          * Allocate memory for our parse tree:
 215          * the parse tree is just a singly linked list which will contain
 216          * all the data for code generation.
 217          */
 218         if (!parseroot) {
 219                 parseroot = mem_a(sizeof(struct parsenode));
 220                 if (!parseroot)
 221                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
 222                 parsetree       = parseroot;
 223                 parsetree->type = -1; /* not a valid type -- root element */
 224         }
 225
 226         int     token = 0;
 227         while ((token = lex_token(file)) != ERROR_LEX      && \
 228                     token                    != ERROR_COMPILER && \
 229                     token                    != ERROR_INTERNAL && \
 230                     token                    != ERROR_PARSE    && \
 231                     token                    != ERROR_PREPRO   && file->length >= 0) {
 232                 switch (token) {
 233                         case TOKEN_IF:
 234                                 TOKEN_SKIPWHITE();
 235                                 if (token != '(')
 236                                         error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line);
 237                                 PARSE_TREE_ADD(PARSE_TYPE_IF);
 238                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 239                                 break;
 240                         case TOKEN_ELSE:
 241                                 token = lex_token(file);
 242                                 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
 243                                 break;
 244                         case TOKEN_FOR:
 245                                 while ((token == ' ' || token == '\n') && file->length >= 0)
 246                                         token = lex_token(file);
 247                                 PARSE_TREE_ADD(PARSE_TYPE_FOR);
 248                                 break;
 249
 250                         /*
 251                          * This is a quick and easy way to do typedefs at parse time
 252                          * all power is in typedef_add(), in typedef.c.  We handle
 253                          * the tokens accordingly here.
 254                          */
 255                         case TOKEN_TYPEDEF: {
 256                                 char *f,*t;
 257
 258                                 token = lex_token(file);
 259                                 token = lex_token(file); f = util_strdup(file->lastok);
 260                                 token = lex_token(file);
 261                                 token = lex_token(file); t = util_strdup(file->lastok);
 262
 263                                 typedef_add(f, t);
 264
 265                                 printf("TYPEDEF %s as %s\n", f, t);
 266
 267                                 mem_d(f);
 268                                 mem_d(t);
 269
 270                                 //while (token != '\n')
 271                                 token = lex_token(file);
 272                                 if (token != ';')
 273                                         error(ERROR_PARSE, "%s:%d Expected `;` on typedef\n", file->name, file->line);
 274
 275                                 token = lex_token(file);
 276                                 printf("TOK: %c\n", token);
 277                                 break;
 278                         }
 279
 280                         /*
 281                          * Returns are addable as-is, statement checking is during
 282                          * the actual parse tree check.
 283                          */
 284                         case TOKEN_RETURN:
 285                                 token = lex_token(file);
 286                                 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
 287                                 break;
 288                         case TOKEN_CONTINUE:
 289                                 PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
 290                                 break;
 291
 292                         case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
 293                         case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
 294                         case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
 295                         case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
 296                         case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
 297
 298                         case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
 299                         case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
 300                         case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
 301                         case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  goto fall;
 302                         /* fall into this for all types */
 303                         {
 304                         fall:;
 305                                 char *name = NULL;
 306                                 TOKEN_SKIPWHITE();
 307                                 name  = util_strdup(file->lastok);
 308                                 token = lex_token  (file);
 309
 310                                 /* is it NOT a definition? */
 311                                 if (token != ';') {
 312                                         while (token == ' ')
 313                                                 token = lex_token(file);
 314
 315                                         /* it's a function? */
 316                                         if (token == '(') {
 317                                                 /*
 318                                                  * Now I essentially have to do a ton of parsing for
 319                                                  * function definition.
 320                                                  */
 321                                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 322                                                 token = lex_token(file);
 323                                                 while (token != '\n' && token != ')') {
 324                                                         switch (token) {
 325                                                                 case TOKEN_VOID:    PARSE_TREE_ADD(PARSE_TYPE_VOID);   break;
 326                                                                 case TOKEN_STRING:  PARSE_TREE_ADD(PARSE_TYPE_STRING); break;
 327                                                                 case TOKEN_ENTITY:  PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break;
 328                                                                 case TOKEN_FLOAT:   PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  break;
 329                                                                 /*
 330                                                                  * TODO:  Need to parse function pointers:  I have no clue how
 331                                                                  * I'm actually going to pull that off, it's going to be hard
 332                                                                  * since you can have a function pointer-pointer-pointer ....
 333                                                                  */
 334                                                         }
 335                                                 }
 336                                                 /* just a definition */
 337                                                 if (token == ')') {
 338                                                         /*
 339                                                          * I like to put my { on the same line as the ) for
 340                                                          * functions, ifs, elses, so we must support that!.
 341                                                          */
 342                                                         PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
 343                                                         token = lex_token(file);
 344                                                         token = lex_token(file);
 345                                                         if(token == '{')
 346                                                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
 347                                                 }
 348                                                 else if (token == '\n')
 349                                                         error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name);
 350
 351                                         } else if (token == '=') {
 352                                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
 353                                         } else {
 354                                                 error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant], or `;` [definition] for %s\n", file->name, file->line, name);
 355                                         }
 356                                 } else {
 357                                         /* definition */
 358                                         printf("FOUND DEFINITION\n");
 359                                 }
 360                                 mem_d(name);
 361                         }
 362
 363                         /*
 364                          * From here down is all language punctuation:  There is no
 365                          * need to actual create tokens from these because they're already
 366                          * tokenized as these individual tokens (which are in a special area
 367                          * of the ascii table which doesn't conflict with our other tokens
 368                          * which are higer than the ascii table.)
 369                          */
 370                         case '#':
 371                                 token = lex_token(file); /* skip '#' */
 372                                 /*
 373                                  * If we make it here we found a directive, the supported
 374                                  * directives so far are #include.
 375                                  */
 376                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
 377                                         /*
 378                                          * We only suport include " ", not <> like in C (why?)
 379                                          * because the latter is silly.
 380                                          */
 381                                         while (*file->lastok != '"' && token != '\n')
 382                                                 token = lex_token(file);
 383
 384                                         /* we handle lexing at that point now */
 385                                         if (token == '\n')
 386                                                 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line);
 387                                 }
 388
 389                                 /* skip all tokens to end of directive */
 390                                 while (token != '\n')
 391                                         token = lex_token(file);
 392                                 break;
 393
 394                         case '.':
 395                                 PARSE_TREE_ADD(PARSE_TYPE_DOT);
 396                                 break;
 397                         case '(':
 398                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 399                                 break;
 400                         case ')':
 401                                 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
 402                                 break;
 403
 404                         case '&':                               /* &  */
 405                                 token = lex_token(file);
 406                                 if (token == '&') { /* && */
 407                                         token = lex_token(file);
 408                                         PARSE_TREE_ADD(PARSE_TYPE_LAND);
 409                                         break;
 410                                 }
 411                                 PARSE_TREE_ADD(PARSE_TYPE_BAND);
 412                                 break;
 413                         case '|':                               /* |  */
 414                                 token = lex_token(file);
 415                                 if (token == '|') { /* || */
 416                                         token = lex_token(file);
 417                                         PARSE_TREE_ADD(PARSE_TYPE_LOR);
 418                                         break;
 419                                 }
 420                                 PARSE_TREE_ADD(PARSE_TYPE_BOR);
 421                                 break;
 422                         case '!':                               /* !  */
 423                                 token = lex_token(file);
 424                                 if (token == '=') { /* != */
 425                                         token = lex_token(file);
 426                                         PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
 427                                         break;
 428                                 }
 429                                 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
 430                                 break;
 431                         case '<':                               /* <  */
 432                                 token = lex_token(file);
 433                                 if (token == '=') { /* <= */
 434                                         token = lex_token(file);
 435                                         PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
 436                                         break;
 437                                 }
 438                                 PARSE_TREE_ADD(PARSE_TYPE_LT);
 439                                 break;
 440                         case '>':                               /* >  */
 441                                 token = lex_token(file);
 442                                 if (token == '=') { /* >= */
 443                                         token = lex_token(file);
 444                                         PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
 445                                         break;
 446                                 }
 447                                 PARSE_TREE_ADD(PARSE_TYPE_GT);
 448                                 break;
 449                         case '=':                               /* =  */
 450                                 token = lex_token(file);
 451                                 if (token == '=') { /* == */
 452                                         token = lex_token(file);
 453                                         PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
 454                                         break;
 455                                 }
 456                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
 457                                 break;
 458                         case ';':
 459                                 token = lex_token(file);
 460                                 PARSE_TREE_ADD(PARSE_TYPE_DONE);
 461                                 break;
 462                         case '-':
 463                                 token = lex_token(file);
 464                                 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
 465                                 break;
 466                         case '+':
 467                                 token = lex_token(file);
 468                                 PARSE_TREE_ADD(PARSE_TYPE_ADD);
 469                                 break;
 470                         case '{':
 471                                 token = lex_token(file);
 472                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
 473                                 break;
 474                         case '}':
 475                                 token = lex_token(file);
 476                                 PARSE_TREE_ADD(PARSE_TYPE_RBS);
 477                                 break;
 478
 479                         /*
 480                          * TODO: Fix lexer to spit out ( ) as tokens, it seems the
 481                          * using '(' or ')' in parser doesn't work properly unless
 482                          * there are spaces before them to allow the lexer to properly
 483                          * seperate identifiers. -- otherwise it eats all of it.
 484                          */
 485                         case LEX_IDENT:
 486                                 token = lex_token(file);
 487                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
 488                                 break;
 489                 }
 490         }
 491         parse_debug(parseroot);
 492         lex_reset(file);
 493         parse_clear(parseroot);
 494         return 1;
 495 }