]> git.xonotic.org Git - xonotic/gmqcc.git/blob - parse.c
38c33324ccb0246eee0929731e0a123a063cbb50
[xonotic/gmqcc.git] / parse.c
1 /*
2  * Copyright (C) 2012 
3  *      Dale Weiler
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
6  * this software and associated documentation files (the "Software"), to deal in
7  * the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is furnished to do
10  * so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in all
13  * copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 #include <limits.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include "gmqcc.h"
28
29 /*
30  * These are not lexical tokens:  These are parse tree types.  Most people
31  * perform tokenizing on language punctuation which is wrong.  That stuff
32  * is technically already tokenized, it just needs to be parsed into a tree
33  */
34 #define PARSE_TYPE_DO       0
35 #define PARSE_TYPE_ELSE     1
36 #define PARSE_TYPE_IF       2
37 #define PARSE_TYPE_WHILE    3
38 #define PARSE_TYPE_BREAK    4
39 #define PARSE_TYPE_CONTINUE 5
40 #define PARSE_TYPE_RETURN   6
41 #define PARSE_TYPE_GOTO     7
42 #define PARSE_TYPE_FOR      8
43 #define PARSE_TYPE_VOID     9
44 #define PARSE_TYPE_STRING   10
45 #define PARSE_TYPE_FLOAT    11
46 #define PARSE_TYPE_VECTOR   12
47 #define PARSE_TYPE_ENTITY   13
48 #define PARSE_TYPE_LAND     14
49 #define PARSE_TYPE_LOR      15
50 #define PARSE_TYPE_LTEQ     16
51 #define PARSE_TYPE_GTEQ     17
52 #define PARSE_TYPE_EQEQ     18
53 #define PARSE_TYPE_LNEQ     19
54 #define PARSE_TYPE_COMMA    20
55 #define PARSE_TYPE_LNOT     21
56 #define PARSE_TYPE_STAR     22
57 #define PARSE_TYPE_DIVIDE   23
58 #define PARSE_TYPE_LPARTH   24
59 #define PARSE_TYPE_RPARTH   25
60 #define PARSE_TYPE_MINUS    26
61 #define PARSE_TYPE_ADD      27
62 #define PARSE_TYPE_EQUAL    28
63 #define PARSE_TYPE_LBS      29
64 #define PARSE_TYPE_RBS      30
65 #define PARSE_TYPE_ELIP     31
66 #define PARSE_TYPE_DOT      32
67 #define PARSE_TYPE_LT       33
68 #define PARSE_TYPE_GT       34
69 #define PARSE_TYPE_BAND     35
70 #define PARSE_TYPE_BOR      36
71 #define PARSE_TYPE_DONE     37
72 #define PARSE_TYPE_IDENT    38
73
74 int parse[PARSE_TYPE_IDENT] = {
75         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
76         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
77         0,0,0,0,0,0,0,0
78 };
79
80 /*
81  * Adds a parse type to the parse tree, this is where all the hard
82  * work actually begins.
83  */
84 #define PARSE_TREE_ADD(X)                                        \
85         do {                                                         \
86                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
87                 parsetree->next->next = NULL;                            \
88                 parsetree->next->type = (X);                             \
89                 parsetree             = parsetree->next;                 \
90         } while (0)
91
92 #define PARSE_TREE_CHK(X,Y,Z)                                        \
93         do { \
94                 if(parse[X]) { \
95                         error(ERROR_PARSE, "Expected %c for %c\n", Y, Z); \
96                         exit (1); \
97                 } \
98         } while (0)
99         
100 #define PARSE_TREE_PUT(X) do { parse[X] = 1; } while (0)
101
102 /*
103  * This is all the punctuation handled in the parser, these don't
104  * need tokens, they're already tokens.
105  */
106 #if 0
107         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
108         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
109         "." , "<" , ">" , "&" , "|" , 
110 #endif
111
112 #define STORE(X) {  \
113         printf(X);      \
114         break;          \
115 }
116
117 void parse_debug(struct parsenode *tree) {
118         while (tree) {  
119                 switch (tree->type) {
120                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n");
121                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n");
122                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n");
123                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n");
124                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n");
125                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n");
126                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n");
127                         
128                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n");
129                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n");
130                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n");
131                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n");
132                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n");
133
134                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n");
135                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n");
136                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n");
137                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n");
138                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n");
139                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n");
140                         
141                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n");
142                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n");
143                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n");
144                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n");
145                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n");
146                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n");
147                         
148                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n");
149                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n");
150                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n");
151                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n");
152                         
153                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n");
154                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n");
155                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n");
156                         
157                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n");
158                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n");
159                         
160                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n");
161                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n");
162                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n");
163                         
164                         //case PARSE_TYPE_IDENT:     STORE("IDENT:     ???\n");
165                 }
166                 tree = tree->next;
167         }
168 }
169
170 /*
171  * Performs a parse operation:  This is a macro to prevent bugs, if the
172  * calls to lex_token are'nt exactly enough to feed to the end of the
173  * actual lexees for the current thing that is being parsed, the state 
174  * of the next iteration in the creation of the parse tree will be wrong
175  * and everything will fail.
176  */
177 #define PARSE_PERFORM(X,C) {     \
178     token = lex_token(file);     \
179     { C }                        \
180     while (token != '\n') {      \
181             token = lex_token(file); \
182     }                            \
183     PARSE_TREE_ADD(X);           \
184     break;                       \
185 }
186
187 void parse_clear(struct parsenode *tree) {
188         if (!tree) return;
189         struct parsenode *temp = NULL;
190         while (tree != NULL) {
191                 temp = tree;
192                 tree = tree->next;
193                 mem_d (temp);
194         }
195         
196         /* free any potential typedefs */
197         typedef_clear();
198 }
199
200 /*
201  * Generates a parse tree out of the lexees generated by the lexer.  This
202  * is where the tree is built.  This is where valid check is performed.
203  */
204 int parse_tree(struct lex_file *file) {
205         struct parsenode *parsetree = NULL;
206         struct parsenode *parseroot = NULL;
207         
208         /*
209          * Allocate memory for our parse tree:
210          * the parse tree is just a singly linked list which will contain
211          * all the data for code generation.
212          */
213         if (!parseroot) {
214                 parseroot = mem_a(sizeof(struct parsenode));
215                 if (!parseroot)
216                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
217                 parsetree       = parseroot;
218                 parsetree->type = -1; /* not a valid type -- root element */
219         }
220         
221         int     token = 0;
222         while ((token = lex_token(file)) != ERROR_LEX      && \
223                     token                    != ERROR_COMPILER && \
224                     token                    != ERROR_INTERNAL && \
225                     token                    != ERROR_PARSE    && \
226                     token                    != ERROR_PREPRO   && file->length >= 0) {
227                 switch (token) {
228                         case TOKEN_IF:
229                                 token = lex_token(file);
230                                 if (token != '(')
231                                         error(ERROR_PARSE, "Expected `(` on if statement:\n");
232                                 PARSE_TREE_ADD(PARSE_TYPE_IF);
233                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
234                                 PARSE_TREE_CHK(PARSE_TYPE_LPARTH, ')', '(');
235                                 PARSE_TREE_PUT(PARSE_TYPE_LPARTH);
236                                 break;
237                         case TOKEN_ELSE:
238                                 token = lex_token(file);
239                                 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
240                                 break;
241                         case TOKEN_FOR:
242                                 //token = lex_token(file);
243                                 while ((token == ' ' || token == '\n') && file->length >= 0)
244                                         token = lex_token(file);
245                                 PARSE_TREE_ADD(PARSE_TYPE_FOR);
246                                 break;
247                         
248                         /*
249                          * This is a quick and easy way to do typedefs at parse time
250                          * all power is in typedef_add(), in typedef.c.  We handle 
251                          * the tokens accordingly here.
252                          */
253                         case TOKEN_TYPEDEF: {
254                                 char *f,*t;
255                                 
256                                 token = lex_token(file); 
257                                 token = lex_token(file); f = util_strdup(file->lastok);
258                                 token = lex_token(file); 
259                                 token = lex_token(file); t = util_strdup(file->lastok);
260                                 
261                                 typedef_add(f, t);
262                                 
263                                 mem_d(f);
264                                 mem_d(t);
265                                 
266                                 while (token != '\n')
267                                         token = lex_token(file);
268                                 break;
269                         }
270                         
271                         /*
272                          * Returns are addable as-is, statement checking is during
273                          * the actual parse tree check.
274                          */
275                         case TOKEN_RETURN:
276                                 token = lex_token(file);
277                                 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
278                                 break;
279                         case TOKEN_CONTINUE:
280                                 PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
281                                 break;
282                         
283                         /*
284                          * DO loops work like so:
285                          * __do_loop_beg:
286                          * IFNOT __do_loop_beg#
287                          *      GOTO __do_loop_end
288                          * INSTR1
289                          * INSTR2
290                          * ......
291                          * GOTO __do_loop_beg
292                          */
293                         
294                         case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
295                         case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
296                         case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
297                         case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
298                         case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
299                         case TOKEN_STRING:    PARSE_PERFORM(PARSE_TYPE_STRING,  {});
300                         case TOKEN_FLOAT:     PARSE_PERFORM(PARSE_TYPE_FLOAT,   {});
301                         case TOKEN_VECTOR:    PARSE_PERFORM(PARSE_TYPE_VECTOR,  {});
302                         case TOKEN_ENTITY:    PARSE_PERFORM(PARSE_TYPE_ENTITY,  {});
303                                 
304                         /*
305                          * From here down is all language punctuation:  There is no
306                          * need to actual create tokens from these because they're already
307                          * tokenized as these individual tokens (which are in a special area
308                          * of the ascii table which doesn't conflict with our other tokens
309                          * which are higer than the ascii table.)
310                          */
311                         case '#':
312                                 token = lex_token(file); /* skip '#' */
313                                 while (isspace(token)) {
314                                         if (token == '\n') {
315                                                 return error(ERROR_PARSE, "Expected valid preprocessor directive after `#` %s\n");
316                                         }
317                                         token = lex_token(file); /* try again */
318                                 }
319                                 /*
320                                  * If we make it here we found a directive, the supported
321                                  * directives so far are #include.
322                                  */
323                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
324                                         //lex_include("file");
325                                         /*
326                                          * We need to compose a name till we find either a
327                                          * '"',> or a <, (for includes), if we hit a '\n' then
328                                          * clearly the person miswrote the include.
329                                          */
330                                         while (*file->lastok != '"' && token != '\n')
331                                                 token = lex_token(file);
332                                                 
333                                         if (token == '\n')
334                                                 return error(ERROR_PARSE, "Invalid use of include preprocessor directive: wanted #include \"file.h\"\n");
335                                                 
336                                         
337                                         //lex_token(file);
338                                         //lex_token(file);
339                                         printf("include: %s\n", file->lastok);
340                                 }
341                         
342                                 /* skip all tokens to end of directive */
343                                 while (token != '\n')
344                                         token = lex_token(file);
345                                 break;
346                                 
347                         case '.':
348                                 //token = lex_token(file);
349                                 PARSE_TREE_ADD(PARSE_TYPE_DOT);
350                                 break;
351                         case '(':
352                                 //token = lex_token(file);
353                                 PARSE_TREE_PUT(PARSE_TYPE_LPARTH);
354                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
355                                 break;
356                         case ')':
357                                 //token = lex_token(file);
358                                 parse[PARSE_TYPE_LPARTH] = 0;
359                                 PARSE_TREE_PUT(PARSE_TYPE_RPARTH);
360                                 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
361                                 break;
362                                 
363                         case '&':                               /* &  */
364                                 token = lex_token(file);
365                                 if (token == '&') { /* && */
366                                         token = lex_token(file);
367                                         PARSE_TREE_ADD(PARSE_TYPE_LAND);
368                                         break;
369                                 }
370                                 PARSE_TREE_ADD(PARSE_TYPE_BAND);
371                                 break;
372                         case '|':                               /* |  */
373                                 token = lex_token(file);
374                                 if (token == '|') { /* || */
375                                         token = lex_token(file);
376                                         PARSE_TREE_ADD(PARSE_TYPE_LOR);
377                                         break;
378                                 }
379                                 PARSE_TREE_ADD(PARSE_TYPE_BOR);
380                                 break;
381                         case '!':                               /* !  */
382                                 token = lex_token(file);
383                                 if (token == '=') { /* != */
384                                         token = lex_token(file);
385                                         PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
386                                         break;
387                                 }
388                                 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
389                                 break;
390                         case '<':                               /* <  */
391                                 token = lex_token(file);
392                                 if (token == '=') { /* <= */
393                                         token = lex_token(file);
394                                         PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
395                                         break;
396                                 }
397                                 PARSE_TREE_ADD(PARSE_TYPE_LT);
398                                 break;
399                         case '>':                               /* >  */
400                                 token = lex_token(file);
401                                 if (token == '=') { /* >= */
402                                         token = lex_token(file);
403                                         PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
404                                         break;
405                                 }
406                                 PARSE_TREE_ADD(PARSE_TYPE_GT);
407                                 break;
408                         case '=':                               /* =  */
409                                 token = lex_token(file);
410                                 if (token == '=') { /* == */
411                                         token = lex_token(file);
412                                         PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
413                                         break;
414                                 }
415                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
416                                 break;
417                         case ';':
418                                 token = lex_token(file);
419                                 PARSE_TREE_ADD(PARSE_TYPE_DONE);
420                                 break;
421                         case '-':
422                                 token = lex_token(file);
423                                 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
424                                 break;
425                         case '+':
426                                 token = lex_token(file);
427                                 PARSE_TREE_ADD(PARSE_TYPE_ADD);
428                                 break;
429                         case '{':
430                                 token = lex_token(file);
431                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
432                                 break;
433                         case '}':
434                                 token = lex_token(file);
435                                 PARSE_TREE_ADD(PARSE_TYPE_RBS);
436                                 break;
437                                 
438                         /*
439                          * TODO: Fix lexer to spit out ( ) as tokens, it seems the
440                          * using '(' or ')' in parser doesn't work properly unless
441                          * there are spaces before them to allow the lexer to properly
442                          * seperate identifiers. -- otherwise it eats all of it.
443                          */
444                         case LEX_IDENT:
445                                 token = lex_token(file);
446                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
447                                 break;
448                 }
449         }
450         parse_debug(parseroot);
451         lex_reset(file);
452         parse_clear(parseroot);
453         return 1;
454 }