parse.c

   1 /*
   2  * Copyright (C) 2012
   3  *      Dale Weiler
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include <limits.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include "gmqcc.h"
  27
  28 /*
  29  * These are not lexical tokens:  These are parse tree types.  Most people
  30  * perform tokenizing on language punctuation which is wrong.  That stuff
  31  * is technically already tokenized, it just needs to be parsed into a tree
  32  */
  33 #define PARSE_TYPE_DO       0
  34 #define PARSE_TYPE_ELSE     1
  35 #define PARSE_TYPE_IF       2
  36 #define PARSE_TYPE_WHILE    3
  37 #define PARSE_TYPE_BREAK    4
  38 #define PARSE_TYPE_CONTINUE 5
  39 #define PARSE_TYPE_RETURN   6
  40 #define PARSE_TYPE_GOTO     7
  41 #define PARSE_TYPE_FOR      8
  42 #define PARSE_TYPE_VOID     9
  43 #define PARSE_TYPE_STRING   10
  44 #define PARSE_TYPE_FLOAT    11
  45 #define PARSE_TYPE_VECTOR   12
  46 #define PARSE_TYPE_ENTITY   13
  47 #define PARSE_TYPE_LAND     14
  48 #define PARSE_TYPE_LOR      15
  49 #define PARSE_TYPE_LTEQ     16
  50 #define PARSE_TYPE_GTEQ     17
  51 #define PARSE_TYPE_EQEQ     18
  52 #define PARSE_TYPE_LNEQ     19
  53 #define PARSE_TYPE_COMMA    20
  54 #define PARSE_TYPE_LNOT     21
  55 #define PARSE_TYPE_STAR     22
  56 #define PARSE_TYPE_DIVIDE   23
  57 #define PARSE_TYPE_LPARTH   24
  58 #define PARSE_TYPE_RPARTH   25
  59 #define PARSE_TYPE_MINUS    26
  60 #define PARSE_TYPE_ADD      27
  61 #define PARSE_TYPE_EQUAL    28
  62 #define PARSE_TYPE_LBS      29
  63 #define PARSE_TYPE_RBS      30
  64 #define PARSE_TYPE_ELIP     31
  65 #define PARSE_TYPE_DOT      32
  66 #define PARSE_TYPE_LT       33
  67 #define PARSE_TYPE_GT       34
  68 #define PARSE_TYPE_BAND     35
  69 #define PARSE_TYPE_BOR      36
  70 #define PARSE_TYPE_DONE     37
  71 #define PARSE_TYPE_IDENT    38
  72
  73 /*
  74  * Adds a parse type to the parse tree, this is where all the hard
  75  * work actually begins.
  76  */
  77 #define PARSE_TREE_ADD(X)                                        \
  78         do {                                                         \
  79                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
  80                 parsetree->next->next = NULL;                            \
  81                 parsetree->next->type = (X);                             \
  82                 parsetree             = parsetree->next;                 \
  83         } while (0)
  84
  85 /*
  86  * These are all the punctuation handled in the parser, these don't
  87  * need tokens, they're already tokens.
  88  */
  89 #if 0
  90         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
  91         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
  92         "." , "<" , ">" , "&" , "|" ,
  93 #endif
  94
  95 #define STORE(X) {  \
  96         printf(X);      \
  97         break;          \
  98 }
  99
 100 void parse_debug(struct parsenode *tree) {
 101         while (tree) {
 102                 switch (tree->type) {
 103                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n");
 104                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n");
 105                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n");
 106                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n");
 107                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n");
 108                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n");
 109                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n");
 110
 111                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n");
 112                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n");
 113                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n");
 114                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n");
 115                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n");
 116
 117                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n");
 118                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n");
 119                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n");
 120                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n");
 121                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n");
 122                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n");
 123
 124                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n");
 125                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n");
 126                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n");
 127                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n");
 128                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n");
 129                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n");
 130
 131                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n");
 132                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n");
 133                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n");
 134                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n");
 135
 136                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n");
 137                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n");
 138                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n");
 139
 140                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n");
 141                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n");
 142
 143                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n");
 144                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n");
 145                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n");
 146
 147                         //case PARSE_TYPE_IDENT:     STORE("IDENT:     ???\n");
 148                 }
 149                 tree = tree->next;
 150         }
 151 }
 152
 153 /*
 154  * Performs a parse operation:  This is a macro to prevent bugs, if the
 155  * calls to lex_token are'nt exactly enough to feed to the end of the
 156  * actual lexees for the current thing that is being parsed, the state
 157  * of the next iteration in the creation of the parse tree will be wrong
 158  * and everything will fail.
 159  */
 160 #define PARSE_PERFORM(X,C) {     \
 161         token = lex_token(file);     \
 162         { C }                        \
 163         while (token != '\n') {      \
 164                 token = lex_token(file); \
 165         }                            \
 166         PARSE_TREE_ADD(X);           \
 167         break;                       \
 168 }
 169
 170 void parse_clear(struct parsenode *tree) {
 171         if (!tree) return;
 172         struct parsenode *temp = NULL;
 173         while (tree != NULL) {
 174                 temp = tree;
 175                 tree = tree->next;
 176                 mem_d (temp);
 177         }
 178
 179         /* free any potential typedefs */
 180         typedef_clear();
 181 }
 182
 183 /*
 184  * Generates a parse tree out of the lexees generated by the lexer.  This
 185  * is where the tree is built.  This is where valid check is performed.
 186  */
 187 int parse_tree(struct lex_file *file) {
 188         struct parsenode *parsetree = NULL;
 189         struct parsenode *parseroot = NULL;
 190
 191         /*
 192          * Allocate memory for our parse tree:
 193          * the parse tree is just a singly linked list which will contain
 194          * all the data for code generation.
 195          */
 196         if (!parseroot) {
 197                 parseroot = mem_a(sizeof(struct parsenode));
 198                 if (!parseroot)
 199                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
 200                 parsetree       = parseroot;
 201                 parsetree->type = -1; /* not a valid type -- root element */
 202         }
 203
 204         int     token = 0;
 205         while ((token = lex_token(file)) != ERROR_LEX      && \
 206                     token                    != ERROR_COMPILER && \
 207                     token                    != ERROR_INTERNAL && \
 208                     token                    != ERROR_PARSE    && \
 209                     token                    != ERROR_PREPRO   && file->length >= 0) {
 210                 switch (token) {
 211                         case TOKEN_IF:
 212                                 while ((token == ' ' || token == '\n') && file->length >= 0)
 213                                         token = lex_token(file);
 214                                 PARSE_TREE_ADD(PARSE_TYPE_IF);
 215                                 break;
 216                         case TOKEN_ELSE:
 217                                 token = lex_token(file);
 218                                 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
 219                                 break;
 220                         case TOKEN_FOR:
 221                                 //token = lex_token(file);
 222                                 while ((token == ' ' || token == '\n') && file->length >= 0)
 223                                         token = lex_token(file);
 224                                 PARSE_TREE_ADD(PARSE_TYPE_FOR);
 225                                 break;
 226
 227                         /*
 228                          * This is a quick and easy way to do typedefs at parse time
 229                          * all power is in typedef_add(), in typedef.c.  We handle
 230                          * the tokens accordingly here.
 231                          */
 232                         case TOKEN_TYPEDEF: {
 233                                 char *f = NULL;
 234                                 char *t = NULL;
 235                                 token = lex_token(file);
 236                                 token = lex_token(file); f = strdup(file->lastok);
 237                                 token = lex_token(file);
 238                                 token = lex_token(file); t = strdup(file->lastok);
 239
 240                                 typedef_add(f, t);
 241
 242                                 free(f);
 243                                 free(t);
 244
 245                                 while (token != '\n')
 246                                         token = lex_token(file);
 247                                 break;
 248                         }
 249
 250                         /*
 251                          * Returns are addable as-is, statement checking is during
 252                          * the actual parse tree check.
 253                          */
 254                         case TOKEN_RETURN:
 255                                 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
 256                                 break;
 257                                 //PARSE_PERFORM(PARSE_TYPE_RETURN,  {});
 258
 259
 260                         case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
 261                         case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
 262                         case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
 263                         case TOKEN_CONTINUE:  PARSE_PERFORM(PARSE_TYPE_CONTINUE,{});
 264                         case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
 265                         case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
 266                         case TOKEN_STRING:    PARSE_PERFORM(PARSE_TYPE_STRING,  {});
 267                         case TOKEN_FLOAT:     PARSE_PERFORM(PARSE_TYPE_FLOAT,   {});
 268                         case TOKEN_VECTOR:    PARSE_PERFORM(PARSE_TYPE_VECTOR,  {});
 269                         case TOKEN_ENTITY:    PARSE_PERFORM(PARSE_TYPE_ENTITY,  {});
 270
 271                         /*
 272                          * From here down is all language punctuation:  There is no
 273                          * need to actual create tokens from these because they're already
 274                          * tokenized as these individual tokens (which are in a special area
 275                          * of the ascii table which doesn't conflict with our other tokens
 276                          * which are higer than the ascii table.)
 277                          */
 278                         case '#':
 279                                 /*
 280                                  * Skip the preprocessor for now:  We'll implement our own
 281                                  * eventually.  For now we need to make sure directives are
 282                                  * not accidently tokenized.
 283                                  */
 284                                 token = lex_token(file);
 285                                 token = lex_token(file);
 286
 287                                 /* skip all tokens to end of directive */
 288                                 while (token != '\n')
 289                                         token = lex_token(file);
 290                                 break;
 291
 292                         case '.':
 293                                 token = lex_token(file);
 294                                 PARSE_TREE_ADD(PARSE_TYPE_DOT);
 295                                 break;
 296
 297                         case '(':
 298                                 token = lex_token(file);
 299                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
 300                                 break;
 301                         case ')':
 302                                 token = lex_token(file);
 303                                 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
 304                                 break;
 305
 306                         case '&':               /* &  */
 307                                 token = lex_token(file);
 308                                 if (token == '&') { /* && */
 309                                         token = lex_token(file);
 310                                         PARSE_TREE_ADD(PARSE_TYPE_LAND);
 311                                         break;
 312                                 }
 313                                 PARSE_TREE_ADD(PARSE_TYPE_BAND);
 314                                 break;
 315                         case '|':               /* |  */
 316                                 token = lex_token(file);
 317                                 if (token == '|') { /* || */
 318                                         token = lex_token(file);
 319                                         PARSE_TREE_ADD(PARSE_TYPE_LOR);
 320                                         break;
 321                                 }
 322                                 PARSE_TREE_ADD(PARSE_TYPE_BOR);
 323                                 break;
 324                         case '!':
 325                                 token = lex_token(file);
 326                                 if (token == '=') { /* != */
 327                                         token = lex_token(file);
 328                                         PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
 329                                         break;
 330                                 }
 331                                 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
 332                                 break;
 333                         case '<':               /* <  */
 334                                 token = lex_token(file);
 335                                 if (token == '=') { /* <= */
 336                                         token = lex_token(file);
 337                                         PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
 338                                         break;
 339                                 }
 340                                 PARSE_TREE_ADD(PARSE_TYPE_LT);
 341                                 break;
 342                         case '>':               /* >  */
 343                                 token = lex_token(file);
 344                                 if (token == '=') { /* >= */
 345                                         token = lex_token(file);
 346                                         PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
 347                                         break;
 348                                 }
 349                                 PARSE_TREE_ADD(PARSE_TYPE_GT);
 350                                 break;
 351                         case '=':
 352                                 token = lex_token(file);
 353                                 if (token == '=') { /* == */
 354                                         token = lex_token(file);
 355                                         PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
 356                                         break;
 357                                 }
 358                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
 359                                 break;
 360                         case ';':
 361                                 token = lex_token(file);
 362                                 PARSE_TREE_ADD(PARSE_TYPE_DONE);
 363                                 break;
 364                         case '-':
 365                                 token = lex_token(file);
 366                                 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
 367                                 break;
 368                         case '+':
 369                                 token = lex_token(file);
 370                                 PARSE_TREE_ADD(PARSE_TYPE_ADD);
 371                                 break;
 372                         case '{':
 373                                 token = lex_token(file);
 374                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
 375                                 break;
 376                         case '}':
 377                                 token = lex_token(file);
 378                                 PARSE_TREE_ADD(PARSE_TYPE_RBS);
 379                                 break;
 380
 381                         /*
 382                          * TODO: Fix lexer to spit out ( ) as tokens, it seems the
 383                          * using '(' or ')' in parser doesn't work properly unless
 384                          * there are spaces before them to allow the lexer to properly
 385                          * seperate identifiers. -- otherwise it eats all of it.
 386                          */
 387                         case LEX_IDENT:
 388                                 token = lex_token(file);
 389                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
 390                                 break;
 391                 }
 392         }
 393         parse_debug(parseroot);
 394         lex_reset(file);
 395         parse_clear(parseroot);
 396         return 1;
 397 }