parse.c

   1 /*
   2  * Copyright (C) 2012
   3  *      Dale Weiler
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include <limits.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include "gmqcc.h"
  28
  29 /*
  30  * These are not lexical tokens:  These are parse tree types.  Most people
  31  * perform tokenizing on language punctuation which is wrong.  That stuff
  32  * is technically already tokenized, it just needs to be parsed into a tree
  33  */
  34 #define PARSE_TYPE_DO       0
  35 #define PARSE_TYPE_ELSE     1
  36 #define PARSE_TYPE_IF       2
  37 #define PARSE_TYPE_WHILE    3
  38 #define PARSE_TYPE_BREAK    4
  39 #define PARSE_TYPE_CONTINUE 5
  40 #define PARSE_TYPE_RETURN   6
  41 #define PARSE_TYPE_GOTO     7
  42 #define PARSE_TYPE_FOR      8
  43 #define PARSE_TYPE_VOID     9
  44 #define PARSE_TYPE_STRING   10
  45 #define PARSE_TYPE_FLOAT    11
  46 #define PARSE_TYPE_VECTOR   12
  47 #define PARSE_TYPE_ENTITY   13
  48 #define PARSE_TYPE_LAND     14
  49 #define PARSE_TYPE_LOR      15
  50 #define PARSE_TYPE_LTEQ     16
  51 #define PARSE_TYPE_GTEQ     17
  52 #define PARSE_TYPE_EQEQ     18
  53 #define PARSE_TYPE_LNEQ     19
  54 #define PARSE_TYPE_COMMA    20
  55 #define PARSE_TYPE_LNOT     21
  56 #define PARSE_TYPE_STAR     22
  57 #define PARSE_TYPE_DIVIDE   23
  58 #define PARSE_TYPE_LPARTH   24
  59 #define PARSE_TYPE_RPARTH   25
  60 #define PARSE_TYPE_MINUS    26
  61 #define PARSE_TYPE_ADD      27
  62 #define PARSE_TYPE_EQUAL    28
  63 #define PARSE_TYPE_LBS      29
  64 #define PARSE_TYPE_RBS      30
  65 #define PARSE_TYPE_ELIP     31
  66 #define PARSE_TYPE_DOT      32
  67 #define PARSE_TYPE_LT       33
  68 #define PARSE_TYPE_GT       34
  69 #define PARSE_TYPE_BAND     35
  70 #define PARSE_TYPE_BOR      36
  71 #define PARSE_TYPE_DONE     37
  72 #define PARSE_TYPE_IDENT    38
  73
  74 /*
  75  * Adds a parse type to the parse tree, this is where all the hard
  76  * work actually begins.
  77  */
  78 #define PARSE_TREE_ADD(X)                                        \
  79         do {                                                         \
  80                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
  81                 parsetree->next->next = NULL;                            \
  82                 parsetree->next->type = (X);                             \
  83                 parsetree             = parsetree->next;                 \
  84         } while (0)
  85
  86 /*
  87  * This is all the punctuation handled in the parser, these don't
  88  * need tokens, they're already tokens.
  89  */
  90 #if 0
  91         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
  92         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
  93         "." , "<" , ">" , "&" , "|" ,
  94 #endif
  95
  96 #define STORE(X,C) {  \
  97     long f = fill;    \
  98     while(f--) {      \
  99       putchar(' ');   \
 100     }                 \
 101     fill C;           \
 102         printf(X);        \
 103         break;            \
 104 }
 105
 106 void parse_debug(struct parsenode *tree) {
 107         long fill = 0;
 108         while (tree) {
 109                 switch (tree->type) {
 110                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n", -=0);
 111                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n",-=0);
 112                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n",-=0);
 113                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n",-=0);
 114                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n",-=0);
 115                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n",-=0);
 116                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n",-=0);
 117
 118                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n",-=0);
 119                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n",-=0);
 120                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n",-=0);
 121                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n",-=0);
 122                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n",-=0);
 123
 124                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n",-=0);
 125                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n",-=0);
 126                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n",-=0);
 127                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n",-=0);
 128                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n",-=0);
 129                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n",-=0);
 130
 131                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n",-=0);
 132                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n",-=0);
 133                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n",-=0);
 134                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n",-=0);
 135                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n",-=0);
 136                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n",-=0);
 137
 138                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n",+=4);
 139                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n",-=4);
 140                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n",+=0);
 141                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n",+=0);
 142
 143                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n",-=0);
 144                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n",-=0);
 145                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n",-=0);
 146
 147                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n",-=0);
 148                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n",-=0);
 149
 150                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n",-=0);
 151                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n",-=0);
 152                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n",-=0);
 153                 }
 154                 tree = tree->next;
 155         }
 156 }
 157
 158 /*
 159  * Performs a parse operation:  This is a macro to prevent bugs, if the
 160  * calls to lex_token are'nt exactly enough to feed to the end of the
 161  * actual lexees for the current thing that is being parsed, the state
 162  * of the next iteration in the creation of the parse tree will be wrong
 163  * and everything will fail.
 164  */
 165 #define PARSE_PERFORM(X,C) {     \
 166     token = lex_token(file);     \
 167     { C }                        \
 168     while (token != '\n') {      \
 169             token = lex_token(file); \
 170     }                            \
 171     PARSE_TREE_ADD(X);           \
 172     break;                       \
 173 }
 174
 175 void parse_clear(struct parsenode *tree) {
 176         if (!tree) return;
 177         struct parsenode *temp = NULL;
 178         while (tree != NULL) {
 179                 temp = tree;
 180                 tree = tree->next;
 181                 mem_d (temp);
 182         }
 183
 184         /* free any potential typedefs */
 185         typedef_clear();
 186 }
 187
 188 const char *STRING_(char ch) {
 189         if (ch == ' ')
 190                 return "<space>";
 191         if (ch == '\n')
 192                 return "<newline>";
 193         if (ch == '\0')
 194                 return "<null>";
 195
 196         return &ch;
 197 }
 198
 199 #define TOKEN_SKIPWHITE()        \
 200         token = lex_token(file);     \
 201         while (token == ' ') {       \
 202                 token = lex_token(file); \
 203         }
 204
 205 /*
 206  * Generates a parse tree out of the lexees generated by the lexer.  This
 207  * is where the tree is built.  This is where valid check is performed.
 208  */
 209 int parse_tree(struct lex_file *file) {
 210         struct parsenode *parsetree = NULL;
 211         struct parsenode *parseroot = NULL;
 212
 213         /*
 214          * Allocate memory for our parse tree:
 215          * the parse tree is just a singly linked list which will contain
 216          * all the data for code generation.
 217          */
 218         if (!parseroot) {
 219                 parseroot = mem_a(sizeof(struct parsenode));
 220                 if (!parseroot)
 221                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
 222                 parsetree       = parseroot;
 223                 parsetree->type = -1; /* not a valid type -- root element */
 224         }
 225
 226         int     token = 0;
 227         while ((token = lex_token(file)) != ERROR_LEX      && \
 228                     token                    != ERROR_COMPILER && \
 229                     token                    != ERROR_INTERNAL && \
 230                     token                    != ERROR_PARSE    && \
 231                     token                    != ERROR_PREPRO   && file->length >= 0) {
 232                 switch (token) {
 233                         case TOKEN_IF:
 234                                 TOKEN_SKIPWHITE();
 235                                 if (token != '(')
 236                                         error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line);
 237                                 PARSE_TREE_ADD(PARSE_TYPE_IF);
 238                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 239                                 break;
 240                         case TOKEN_ELSE:
 241                                 token = lex_token(file);
 242                                 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
 243                                 break;
 244                         case TOKEN_FOR:
 245                                 while ((token == ' ' || token == '\n') && file->length >= 0)
 246                                         token = lex_token(file);
 247                                 PARSE_TREE_ADD(PARSE_TYPE_FOR);
 248                                 break;
 249
 250                         /*
 251                          * This is a quick and easy way to do typedefs at parse time
 252                          * all power is in typedef_add(), in typedef.c.  We handle
 253                          * the tokens accordingly here.
 254                          */
 255                         case TOKEN_TYPEDEF: {
 256                                 char *f,*t;
 257
 258                                 token = lex_token(file);
 259                                 token = lex_token(file); f = util_strdup(file->lastok);
 260                                 token = lex_token(file);
 261                                 token = lex_token(file); t = util_strdup(file->lastok);
 262
 263                                 typedef_add(f, t);
 264
 265                                 mem_d(f);
 266                                 mem_d(t);
 267
 268                                 while (token != '\n')
 269                                         token = lex_token(file);
 270                                 break;
 271                         }
 272
 273                         /*
 274                          * Returns are addable as-is, statement checking is during
 275                          * the actual parse tree check.
 276                          */
 277                         case TOKEN_RETURN:
 278                                 token = lex_token(file);
 279                                 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
 280                                 break;
 281                         case TOKEN_CONTINUE:
 282                                 PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
 283                                 break;
 284
 285                         case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
 286                         case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
 287                         case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
 288                         case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
 289                         case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
 290
 291                         case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING);
 292                         case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR);
 293                         case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY);
 294                         case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);
 295                         /* fall into this for all types */
 296                         {
 297                                 char *name = NULL;
 298                                 TOKEN_SKIPWHITE();
 299                                 name  = util_strdup(file->lastok);
 300                                 //token = lex_token  (file);
 301
 302                                 /* is it NOT a definition? */
 303                                 if (token != ';') {
 304                                         while (token == ' ')
 305                                                 token = lex_token(file);
 306
 307                                         /* it's a function? */
 308                                         if (token == '(') {
 309                                                 /*
 310                                                  * Now I essentially have to do a ton of parsing for
 311                                                  * function definition.
 312                                                  */
 313                                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 314                                                 token = lex_token(file);
 315                                                 while (token != '\n' && token != ')') {
 316                                                         switch (token) {
 317                                                                 case TOKEN_VOID:    PARSE_TREE_ADD(PARSE_TYPE_VOID);   break;
 318                                                                 case TOKEN_STRING:  PARSE_TREE_ADD(PARSE_TYPE_STRING); break;
 319                                                                 case TOKEN_ENTITY:  PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break;
 320                                                                 case TOKEN_FLOAT:   PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  break;
 321                                                                 /*
 322                                                                  * TODO:  Need to parse function pointers:  I have no clue how
 323                                                                  * I'm actually going to pull that off, it's going to be hard
 324                                                                  * since you can have a function pointer-pointer-pointer ....
 325                                                                  */
 326                                                         }
 327                                                 }
 328                                                 /* just a definition */
 329                                                 if (token == ')') {
 330                                                         /*
 331                                                          * I like to put my { on the same line as the ) for
 332                                                          * functions, ifs, elses, so we must support that!.
 333                                                          */
 334                                                         PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
 335                                                         token = lex_token(file);
 336                                                         token = lex_token(file);
 337                                                         if(token == '{')
 338                                                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
 339                                                 }
 340                                                 else if (token == '\n')
 341                                                         error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name);
 342
 343                                         } else if (token == '=') {
 344                                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
 345                                         } else {
 346                                                 error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant] for %s\n", file->name, file->line, name);
 347                                         }
 348                                 } else {
 349                                         /* definition */
 350                                         printf("FOUND DEFINITION\n");
 351                                 }
 352                                 mem_d(name);
 353                         }
 354
 355                         /*
 356                          * From here down is all language punctuation:  There is no
 357                          * need to actual create tokens from these because they're already
 358                          * tokenized as these individual tokens (which are in a special area
 359                          * of the ascii table which doesn't conflict with our other tokens
 360                          * which are higer than the ascii table.)
 361                          */
 362                         case '#':
 363                                 token = lex_token(file); /* skip '#' */
 364                                 //while (isspace(token)) {
 365                                 //      if (token == '\n')
 366                                 //              return error(ERROR_PARSE, "Expected valid preprocessor directive after `#` %s\n");
 367                                 //      token = lex_token(file); /* try again */
 368                                 //}
 369                                 /*
 370                                  * If we make it here we found a directive, the supported
 371                                  * directives so far are #include.
 372                                  */
 373                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
 374                                         /*
 375                                          * We only suport include " ", not <> like in C (why?)
 376                                          * because the latter is silly.
 377                                          */
 378                                         while (*file->lastok != '"' && token != '\n')
 379                                                 token = lex_token(file);
 380
 381                                         /* we handle lexing at that point now */
 382                                         if (token == '\n')
 383                                                 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line);
 384                                 }
 385
 386                                 /* skip all tokens to end of directive */
 387                                 while (token != '\n')
 388                                         token = lex_token(file);
 389                                 break;
 390
 391                         case '.':
 392                                 PARSE_TREE_ADD(PARSE_TYPE_DOT);
 393                                 break;
 394                         case '(':
 395                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 396                                 break;
 397                         case ')':
 398                                 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
 399                                 break;
 400
 401                         case '&':                               /* &  */
 402                                 token = lex_token(file);
 403                                 if (token == '&') { /* && */
 404                                         token = lex_token(file);
 405                                         PARSE_TREE_ADD(PARSE_TYPE_LAND);
 406                                         break;
 407                                 }
 408                                 PARSE_TREE_ADD(PARSE_TYPE_BAND);
 409                                 break;
 410                         case '|':                               /* |  */
 411                                 token = lex_token(file);
 412                                 if (token == '|') { /* || */
 413                                         token = lex_token(file);
 414                                         PARSE_TREE_ADD(PARSE_TYPE_LOR);
 415                                         break;
 416                                 }
 417                                 PARSE_TREE_ADD(PARSE_TYPE_BOR);
 418                                 break;
 419                         case '!':                               /* !  */
 420                                 token = lex_token(file);
 421                                 if (token == '=') { /* != */
 422                                         token = lex_token(file);
 423                                         PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
 424                                         break;
 425                                 }
 426                                 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
 427                                 break;
 428                         case '<':                               /* <  */
 429                                 token = lex_token(file);
 430                                 if (token == '=') { /* <= */
 431                                         token = lex_token(file);
 432                                         PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
 433                                         break;
 434                                 }
 435                                 PARSE_TREE_ADD(PARSE_TYPE_LT);
 436                                 break;
 437                         case '>':                               /* >  */
 438                                 token = lex_token(file);
 439                                 if (token == '=') { /* >= */
 440                                         token = lex_token(file);
 441                                         PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
 442                                         break;
 443                                 }
 444                                 PARSE_TREE_ADD(PARSE_TYPE_GT);
 445                                 break;
 446                         case '=':                               /* =  */
 447                                 token = lex_token(file);
 448                                 if (token == '=') { /* == */
 449                                         token = lex_token(file);
 450                                         PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
 451                                         break;
 452                                 }
 453                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
 454                                 break;
 455                         case ';':
 456                                 token = lex_token(file);
 457                                 PARSE_TREE_ADD(PARSE_TYPE_DONE);
 458                                 break;
 459                         case '-':
 460                                 token = lex_token(file);
 461                                 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
 462                                 break;
 463                         case '+':
 464                                 token = lex_token(file);
 465                                 PARSE_TREE_ADD(PARSE_TYPE_ADD);
 466                                 break;
 467                         case '{':
 468                                 token = lex_token(file);
 469                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
 470                                 break;
 471                         case '}':
 472                                 token = lex_token(file);
 473                                 PARSE_TREE_ADD(PARSE_TYPE_RBS);
 474                                 break;
 475
 476                         /*
 477                          * TODO: Fix lexer to spit out ( ) as tokens, it seems the
 478                          * using '(' or ')' in parser doesn't work properly unless
 479                          * there are spaces before them to allow the lexer to properly
 480                          * seperate identifiers. -- otherwise it eats all of it.
 481                          */
 482                         case LEX_IDENT:
 483                                 token = lex_token(file);
 484                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
 485                                 break;
 486                 }
 487         }
 488         parse_debug(parseroot);
 489         lex_reset(file);
 490         parse_clear(parseroot);
 491         return 1;
 492 }