]> git.xonotic.org Git - xonotic/gmqcc.git/blob - parse.c
More parsing stuff (still totally broken)
[xonotic/gmqcc.git] / parse.c
1 /*
2  * Copyright (C) 2012 
3  *      Dale Weiler
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
6  * this software and associated documentation files (the "Software"), to deal in
7  * the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is furnished to do
10  * so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in all
13  * copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 #include <limits.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include "gmqcc.h"
28
29 /*
30  * These are not lexical tokens:  These are parse tree types.  Most people
31  * perform tokenizing on language punctuation which is wrong.  That stuff
32  * is technically already tokenized, it just needs to be parsed into a tree
33  */
34 #define PARSE_TYPE_DO       0
35 #define PARSE_TYPE_ELSE     1
36 #define PARSE_TYPE_IF       2
37 #define PARSE_TYPE_WHILE    3
38 #define PARSE_TYPE_BREAK    4
39 #define PARSE_TYPE_CONTINUE 5
40 #define PARSE_TYPE_RETURN   6
41 #define PARSE_TYPE_GOTO     7
42 #define PARSE_TYPE_FOR      8
43 #define PARSE_TYPE_VOID     9
44 #define PARSE_TYPE_STRING   10
45 #define PARSE_TYPE_FLOAT    11
46 #define PARSE_TYPE_VECTOR   12
47 #define PARSE_TYPE_ENTITY   13
48 #define PARSE_TYPE_LAND     14
49 #define PARSE_TYPE_LOR      15
50 #define PARSE_TYPE_LTEQ     16
51 #define PARSE_TYPE_GTEQ     17
52 #define PARSE_TYPE_EQEQ     18
53 #define PARSE_TYPE_LNEQ     19
54 #define PARSE_TYPE_COMMA    20
55 #define PARSE_TYPE_LNOT     21
56 #define PARSE_TYPE_STAR     22
57 #define PARSE_TYPE_DIVIDE   23
58 #define PARSE_TYPE_LPARTH   24
59 #define PARSE_TYPE_RPARTH   25
60 #define PARSE_TYPE_MINUS    26
61 #define PARSE_TYPE_ADD      27
62 #define PARSE_TYPE_EQUAL    28
63 #define PARSE_TYPE_LBS      29
64 #define PARSE_TYPE_RBS      30
65 #define PARSE_TYPE_ELIP     31
66 #define PARSE_TYPE_DOT      32
67 #define PARSE_TYPE_LT       33
68 #define PARSE_TYPE_GT       34
69 #define PARSE_TYPE_BAND     35
70 #define PARSE_TYPE_BOR      36
71 #define PARSE_TYPE_DONE     37
72 #define PARSE_TYPE_IDENT    38
73
74 /*
75  * Adds a parse type to the parse tree, this is where all the hard
76  * work actually begins.
77  */
78 #define PARSE_TREE_ADD(X)                                        \
79         do {                                                         \
80                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
81                 parsetree->next->next = NULL;                            \
82                 parsetree->next->type = (X);                             \
83                 parsetree             = parsetree->next;                 \
84         } while (0)
85
86 /*
87  * This is all the punctuation handled in the parser, these don't
88  * need tokens, they're already tokens.
89  */
90 #if 0
91         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
92         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
93         "." , "<" , ">" , "&" , "|" , 
94 #endif
95
96 #define STORE(X,C) {  \
97     long f = fill;    \
98     while(f--) {      \
99       putchar(' ');   \
100     }                 \
101     fill C;           \
102         printf(X);        \
103         break;            \
104 }
105
106 void parse_debug(struct parsenode *tree) {
107         long fill = 0;
108         while (tree) {  
109                 switch (tree->type) {
110                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n", -=0);
111                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n",-=0);
112                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n",-=0);
113                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n",-=0);
114                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n",-=0);
115                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n",-=0);
116                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n",-=0);
117                         
118                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n",-=0);
119                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n",-=0);
120                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n",-=0);
121                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n",-=0);
122                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n",-=0);
123
124                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n",-=0);
125                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n",-=0);
126                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n",-=0);
127                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n",-=0);
128                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n",-=0);
129                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n",-=0);
130                         
131                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n",-=0);
132                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n",-=0);
133                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n",-=0);
134                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n",-=0);
135                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n",-=0);
136                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n",-=0);
137                         
138                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n",+=4);
139                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n",-=4);
140                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n",+=0);
141                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n",+=0);
142                         
143                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n",-=0);
144                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n",-=0);
145                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n",-=0);
146                         
147                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n",-=0);
148                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n",-=0);
149                         
150                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n",-=0);
151                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n",-=0);
152                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n",-=0);
153                 }
154                 tree = tree->next;
155         }
156 }
157
158 /*
159  * Performs a parse operation:  This is a macro to prevent bugs, if the
160  * calls to lex_token are'nt exactly enough to feed to the end of the
161  * actual lexees for the current thing that is being parsed, the state 
162  * of the next iteration in the creation of the parse tree will be wrong
163  * and everything will fail.
164  */
165 #define PARSE_PERFORM(X,C) {     \
166     token = lex_token(file);     \
167     { C }                        \
168     while (token != '\n') {      \
169             token = lex_token(file); \
170     }                            \
171     PARSE_TREE_ADD(X);           \
172     break;                       \
173 }
174
175 void parse_clear(struct parsenode *tree) {
176         if (!tree) return;
177         struct parsenode *temp = NULL;
178         while (tree != NULL) {
179                 temp = tree;
180                 tree = tree->next;
181                 mem_d (temp);
182         }
183         
184         /* free any potential typedefs */
185         typedef_clear();
186 }
187
188 const char *STRING_(char ch) {
189         if (ch == ' ')
190                 return "<space>";
191         if (ch == '\n')
192                 return "<newline>";
193         if (ch == '\0')
194                 return "<null>";
195                 
196         return &ch;
197 }
198
199 #define TOKEN_SKIPWHITE()        \
200         token = lex_token(file);     \
201         while (token == ' ') {       \
202                 token = lex_token(file); \
203         }
204
205 /*
206  * Generates a parse tree out of the lexees generated by the lexer.  This
207  * is where the tree is built.  This is where valid check is performed.
208  */
209 int parse_tree(struct lex_file *file) {
210         struct parsenode *parsetree = NULL;
211         struct parsenode *parseroot = NULL;
212         
213         /*
214          * Allocate memory for our parse tree:
215          * the parse tree is just a singly linked list which will contain
216          * all the data for code generation.
217          */
218         if (!parseroot) {
219                 parseroot = mem_a(sizeof(struct parsenode));
220                 if (!parseroot)
221                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
222                 parsetree       = parseroot;
223                 parsetree->type = -1; /* not a valid type -- root element */
224         }
225         
226         int     token = 0;
227         while ((token = lex_token(file)) != ERROR_LEX      && \
228                     token                    != ERROR_COMPILER && \
229                     token                    != ERROR_INTERNAL && \
230                     token                    != ERROR_PARSE    && \
231                     token                    != ERROR_PREPRO   && file->length >= 0) {
232                 switch (token) {
233                         case TOKEN_IF:
234                                 TOKEN_SKIPWHITE();
235                                 if (token != '(')
236                                         error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line);
237                                 PARSE_TREE_ADD(PARSE_TYPE_IF);
238                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
239                                 break;
240                         case TOKEN_ELSE:
241                                 token = lex_token(file);
242                                 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
243                                 break;
244                         case TOKEN_FOR:
245                                 while ((token == ' ' || token == '\n') && file->length >= 0)
246                                         token = lex_token(file);
247                                 PARSE_TREE_ADD(PARSE_TYPE_FOR);
248                                 break;
249                         
250                         /*
251                          * This is a quick and easy way to do typedefs at parse time
252                          * all power is in typedef_add(), in typedef.c.  We handle 
253                          * the tokens accordingly here.
254                          */
255                         case TOKEN_TYPEDEF: {
256                                 char *f,*t;
257                                 
258                                 token = lex_token(file); 
259                                 token = lex_token(file); f = util_strdup(file->lastok);
260                                 token = lex_token(file); 
261                                 token = lex_token(file); t = util_strdup(file->lastok);
262                                 
263                                 typedef_add(f, t);
264                                 
265                                 mem_d(f);
266                                 mem_d(t);
267                                 
268                                 while (token != '\n')
269                                         token = lex_token(file);
270                                 break;
271                         }
272                         
273                         /*
274                          * Returns are addable as-is, statement checking is during
275                          * the actual parse tree check.
276                          */
277                         case TOKEN_RETURN:
278                                 token = lex_token(file);
279                                 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
280                                 break;
281                         case TOKEN_CONTINUE:
282                                 PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
283                                 break;
284                         
285                         case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
286                         case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
287                         case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
288                         case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
289                         case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
290                         
291                         case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING);
292                         case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR);
293                         case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY);
294                         case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);
295                         /* fall into this for all types */
296                         {
297                                 char *name = NULL;
298                                 TOKEN_SKIPWHITE();
299                                 name  = util_strdup(file->lastok);
300                                 //token = lex_token  (file);
301                                 
302                                 /* is it NOT a definition? */
303                                 if (token != ';') {
304                                         while (token == ' ')
305                                                 token = lex_token(file);
306                                         
307                                         /* it's a function? */
308                                         if (token == '(') {
309                                                 /*
310                                                  * Now I essentially have to do a ton of parsing for
311                                                  * function definition.
312                                                  */
313                                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
314                                                 token = lex_token(file);
315                                                 while (token != '\n' && token != ')') {
316                                                         switch (token) {
317                                                                 case TOKEN_VOID:    PARSE_TREE_ADD(PARSE_TYPE_VOID);   break;
318                                                                 case TOKEN_STRING:  PARSE_TREE_ADD(PARSE_TYPE_STRING); break;
319                                                                 case TOKEN_ENTITY:  PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break;
320                                                                 case TOKEN_FLOAT:   PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  break;
321                                                                 /*
322                                                                  * TODO:  Need to parse function pointers:  I have no clue how
323                                                                  * I'm actually going to pull that off, it's going to be hard
324                                                                  * since you can have a function pointer-pointer-pointer ....
325                                                                  */
326                                                         }
327                                                 }
328                                                 /* just a definition */
329                                                 if (token == ')') {
330                                                         /*
331                                                          * I like to put my { on the same line as the ) for
332                                                          * functions, ifs, elses, so we must support that!.
333                                                          */
334                                                         PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
335                                                         token = lex_token(file);
336                                                         token = lex_token(file);
337                                                         if(token == '{')
338                                                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
339                                                 }
340                                                 else if (token == '\n')
341                                                         error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name);
342                                                          
343                                         } else if (token == '=') {
344                                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
345                                         } else {
346                                                 error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant] for %s\n", file->name, file->line, name);
347                                         } 
348                                 } else {
349                                         /* definition */
350                                         printf("FOUND DEFINITION\n");
351                                 }
352                                 mem_d(name);
353                         }
354                                 
355                         /*
356                          * From here down is all language punctuation:  There is no
357                          * need to actual create tokens from these because they're already
358                          * tokenized as these individual tokens (which are in a special area
359                          * of the ascii table which doesn't conflict with our other tokens
360                          * which are higer than the ascii table.)
361                          */
362                         case '#':
363                                 token = lex_token(file); /* skip '#' */
364                                 //while (isspace(token)) {
365                                 //      if (token == '\n')
366                                 //              return error(ERROR_PARSE, "Expected valid preprocessor directive after `#` %s\n");
367                                 //      token = lex_token(file); /* try again */
368                                 //}
369                                 /*
370                                  * If we make it here we found a directive, the supported
371                                  * directives so far are #include.
372                                  */
373                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
374                                         /*
375                                          * We only suport include " ", not <> like in C (why?)
376                                          * because the latter is silly.
377                                          */
378                                         while (*file->lastok != '"' && token != '\n')
379                                                 token = lex_token(file);
380                                         
381                                         /* we handle lexing at that point now */
382                                         if (token == '\n')
383                                                 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line);
384                                 }
385                         
386                                 /* skip all tokens to end of directive */
387                                 while (token != '\n')
388                                         token = lex_token(file);
389                                 break;
390                                 
391                         case '.':
392                                 PARSE_TREE_ADD(PARSE_TYPE_DOT);
393                                 break;
394                         case '(':
395                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
396                                 break;
397                         case ')':
398                                 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
399                                 break;
400                                 
401                         case '&':                               /* &  */
402                                 token = lex_token(file);
403                                 if (token == '&') { /* && */
404                                         token = lex_token(file);
405                                         PARSE_TREE_ADD(PARSE_TYPE_LAND);
406                                         break;
407                                 }
408                                 PARSE_TREE_ADD(PARSE_TYPE_BAND);
409                                 break;
410                         case '|':                               /* |  */
411                                 token = lex_token(file);
412                                 if (token == '|') { /* || */
413                                         token = lex_token(file);
414                                         PARSE_TREE_ADD(PARSE_TYPE_LOR);
415                                         break;
416                                 }
417                                 PARSE_TREE_ADD(PARSE_TYPE_BOR);
418                                 break;
419                         case '!':                               /* !  */
420                                 token = lex_token(file);
421                                 if (token == '=') { /* != */
422                                         token = lex_token(file);
423                                         PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
424                                         break;
425                                 }
426                                 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
427                                 break;
428                         case '<':                               /* <  */
429                                 token = lex_token(file);
430                                 if (token == '=') { /* <= */
431                                         token = lex_token(file);
432                                         PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
433                                         break;
434                                 }
435                                 PARSE_TREE_ADD(PARSE_TYPE_LT);
436                                 break;
437                         case '>':                               /* >  */
438                                 token = lex_token(file);
439                                 if (token == '=') { /* >= */
440                                         token = lex_token(file);
441                                         PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
442                                         break;
443                                 }
444                                 PARSE_TREE_ADD(PARSE_TYPE_GT);
445                                 break;
446                         case '=':                               /* =  */
447                                 token = lex_token(file);
448                                 if (token == '=') { /* == */
449                                         token = lex_token(file);
450                                         PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
451                                         break;
452                                 }
453                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
454                                 break;
455                         case ';':
456                                 token = lex_token(file);
457                                 PARSE_TREE_ADD(PARSE_TYPE_DONE);
458                                 break;
459                         case '-':
460                                 token = lex_token(file);
461                                 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
462                                 break;
463                         case '+':
464                                 token = lex_token(file);
465                                 PARSE_TREE_ADD(PARSE_TYPE_ADD);
466                                 break;
467                         case '{':
468                                 token = lex_token(file);
469                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
470                                 break;
471                         case '}':
472                                 token = lex_token(file);
473                                 PARSE_TREE_ADD(PARSE_TYPE_RBS);
474                                 break;
475                                 
476                         /*
477                          * TODO: Fix lexer to spit out ( ) as tokens, it seems the
478                          * using '(' or ')' in parser doesn't work properly unless
479                          * there are spaces before them to allow the lexer to properly
480                          * seperate identifiers. -- otherwise it eats all of it.
481                          */
482                         case LEX_IDENT:
483                                 token = lex_token(file);
484                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
485                                 break;
486                 }
487         }
488         parse_debug(parseroot);
489         lex_reset(file);
490         parse_clear(parseroot);
491         return 1;
492 }