]> git.xonotic.org Git - xonotic/gmqcc.git/blob - parse.c
Fixes for '#' tokens
[xonotic/gmqcc.git] / parse.c
1 /*
2  * Copyright (C) 2012 
3  *      Dale Weiler
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
6  * this software and associated documentation files (the "Software"), to deal in
7  * the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is furnished to do
10  * so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in all
13  * copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 #include <limits.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "gmqcc.h"
27
28 /*
29  * These are not lexical tokens:  These are parse tree types.  Most people
30  * perform tokenizing on language punctuation which is wrong.  That stuff
31  * is technically already tokenized, it just needs to be parsed into a tree
32  */
33 #define PARSE_TYPE_DO       0
34 #define PARSE_TYPE_ELSE     1
35 #define PARSE_TYPE_IF       2
36 #define PARSE_TYPE_WHILE    3
37 #define PARSE_TYPE_BREAK    4
38 #define PARSE_TYPE_CONTINUE 5
39 #define PARSE_TYPE_RETURN   6
40 #define PARSE_TYPE_GOTO     7
41 #define PARSE_TYPE_FOR      8
42 #define PARSE_TYPE_VOID     9
43 #define PARSE_TYPE_STRING   10
44 #define PARSE_TYPE_FLOAT    11
45 #define PARSE_TYPE_VECTOR   12
46 #define PARSE_TYPE_ENTITY   13
47 #define PARSE_TYPE_LAND     14
48 #define PARSE_TYPE_LOR      15
49 #define PARSE_TYPE_LTEQ     16
50 #define PARSE_TYPE_GTEQ     17
51 #define PARSE_TYPE_EQEQ     18
52 #define PARSE_TYPE_LNEQ     19
53 #define PARSE_TYPE_COMMA    20
54 #define PARSE_TYPE_LNOT     21
55 #define PARSE_TYPE_STAR     22
56 #define PARSE_TYPE_DIVIDE   23
57 #define PARSE_TYPE_LPARTH   24
58 #define PARSE_TYPE_RPARTH   25
59 #define PARSE_TYPE_MINUS    26
60 #define PARSE_TYPE_ADD      27
61 #define PARSE_TYPE_EQUAL    28
62 #define PARSE_TYPE_LBS      29
63 #define PARSE_TYPE_RBS      30
64 #define PARSE_TYPE_ELIP     31
65 #define PARSE_TYPE_DOT      32
66 #define PARSE_TYPE_LT       33
67 #define PARSE_TYPE_GT       34
68 #define PARSE_TYPE_BAND     35
69 #define PARSE_TYPE_BOR      36
70 #define PARSE_TYPE_DONE     37
71
72 /*
73  * Adds a parse type to the parse tree, this is where all the hard
74  * work actually begins.
75  */
76 #define PARSE_TREE_ADD(X)                                        \
77         do {                                                         \
78                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
79                 parsetree->next->next = NULL;                            \
80                 parsetree->next->type = (X);                             \
81                 parsetree             = parsetree->next;                 \
82         } while (0)
83
84 /*
85  * These are all the punctuation handled in the parser, these don't
86  * need tokens, they're already tokens.
87  */
88 #if 0
89         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
90         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
91         "." , "<" , ">" , "&" , "|" , 
92 #endif
93
94 #define STORE(X) {     \
95         printf(X);         \
96         break;             \
97 }
98
99 void parse_debug(struct parsenode *tree) {
100         while (tree && tree->next != NULL) {
101                 /* skip blanks */
102                 if (tree->type == 0) {
103                         tree = tree->next;
104                         continue;
105                 }
106                         
107                 switch (tree->type) {
108                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n");
109                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n");
110                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n");
111                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n");
112                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n");
113                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n");
114                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n");
115                         
116                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n");
117                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n");
118                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n");
119                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n");
120                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n");
121
122
123                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n");
124                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n");
125                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n");
126                         
127                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n");
128                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n");
129                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n");
130                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n");
131                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n");
132                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n");
133                         
134                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n");
135                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n");
136                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n");
137                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n");
138                         
139                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n");
140                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n");
141                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n");
142                         
143                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n");
144                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n");
145                         
146                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n");
147                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n");
148                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n");
149                         
150
151                 }
152                 tree = tree->next;
153         }
154 }
155
156 /*
157  * This just skips the token and throws it in the parse tree for later
158  * checking / optimization / codegen, it doesn't do anything with it
159  * like syntax check for legal use -- like it should as it's a TODO item
160  * which is not implemented
161  */
162 #define PARSE_TODO(X) {       \
163         token = lex_token(file);  \
164         PARSE_TREE_ADD(X);        \
165         break;                    \
166 }
167
168 int parse(struct lex_file *file) {
169         struct parsenode *parsetree = NULL;
170         struct parsenode *parseroot = NULL;
171         
172         /*
173          * Allocate memory for our parse tree:
174          * the parse tree is just a singly linked list which will contain
175          * all the data for code generation.
176          */
177         if (!parseroot) {
178                 parseroot = mem_a(sizeof(struct parsenode));
179                 if (!parseroot)
180                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
181                 parsetree = parseroot;
182         }
183         
184         int     token = 0;
185         while ((token = lex_token(file)) != ERROR_LEX      && \
186                     token                    != ERROR_COMPILER && \
187                     token                    != ERROR_INTERNAL && \
188                     token                    != ERROR_PARSE    && \
189                     token                    != ERROR_PREPRO   && file->length >= 0) {
190                 switch (token) {
191                         case TOKEN_IF:
192                                 token = lex_token(file);
193                                 while ((token == ' ' || token == '\n') && file->length >= 0)
194                                         token = lex_token(file);
195                                         
196                                 if (token != '(')
197                                         error(ERROR_PARSE, "Expected `(` after if\n", "");
198                                         
199                                 PARSE_TREE_ADD(PARSE_TYPE_IF);
200                                 break;
201                         case TOKEN_ELSE:
202                                 token = lex_token(file);
203                                 while ((token == ' ' || token == '\n') && file->length >= 0)
204                                         token = lex_token(file);
205                                         
206                                 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
207                                 break;
208                         case TOKEN_FOR:
209                                 token = lex_token(file);
210                                 while ((token == ' ' || token == '\n') && file->length >= 0)
211                                         token = lex_token(file);
212                                         
213                                 PARSE_TREE_ADD(PARSE_TYPE_FOR);
214                                 break;
215                                 
216                         case LEX_IDENT:
217                                 token = lex_token(file);
218                                 break;
219                         
220                         /*
221                          * This is a quick and easy way to do typedefs at parse time
222                          * all power is in typedef_add(), in typedef.c.  We handle 
223                          * the tokens accordingly here.
224                          */
225                         case TOKEN_TYPEDEF: {
226                                 char *f = NULL;
227                                 char *t = NULL;
228                                 token = lex_token(file); 
229                                 token = lex_token(file); f = strdup(file->lastok);
230                                 token = lex_token(file); 
231                                 token = lex_token(file); t = strdup(file->lastok);
232                                 
233                                 typedef_add(f, t);
234                                 
235                                 /* free stdup strings */
236                                 mem_d(f);
237                                 mem_d(t);
238                                 break;
239                         }
240                                 
241                                 
242                         case TOKEN_DO:        PARSE_TODO(PARSE_TYPE_DO);
243                         case TOKEN_WHILE:     PARSE_TODO(PARSE_TYPE_WHILE);
244                         case TOKEN_BREAK:     PARSE_TODO(PARSE_TYPE_BREAK);
245                         case TOKEN_CONTINUE:  PARSE_TODO(PARSE_TYPE_CONTINUE);
246                         case TOKEN_RETURN:    PARSE_TODO(PARSE_TYPE_RETURN);
247                         case TOKEN_GOTO:      PARSE_TODO(PARSE_TYPE_GOTO);
248                         case TOKEN_VOID:      PARSE_TODO(PARSE_TYPE_VOID);
249                         case TOKEN_STRING:    PARSE_TODO(PARSE_TYPE_STRING);
250                         case TOKEN_FLOAT:     PARSE_TODO(PARSE_TYPE_FLOAT);
251                         case TOKEN_VECTOR:    PARSE_TODO(PARSE_TYPE_VECTOR);
252                         case TOKEN_ENTITY:    PARSE_TODO(PARSE_TYPE_ENTITY);
253                                 
254                         /*
255                          * From here down is all language punctuation:  There is no
256                          * need to actual create tokens from these because they're already
257                          * tokenized as these individual tokens (which are in a special area
258                          * of the ascii table which doesn't conflict with our other tokens
259                          * which are higer than the ascii table.)
260                          */
261                         case '#':
262                                 /*
263                                  * Skip the preprocessor for now:  We'll implement our own
264                                  * eventually.  For now we need to make sure directives are
265                                  * not accidently tokenized.
266                                  */
267                                 token = lex_token(file);
268                                 token = lex_token(file);
269                                 
270                                 /* skip all tokens to end of directive */
271                                 while (token != '\n')
272                                         token = lex_token(file);
273                                 break;
274                                 
275                         case '&':               /* &  */
276                                 token = lex_token(file);
277                                 if (token == '&') { /* && */
278                                         token = lex_token(file);
279                                         PARSE_TREE_ADD(PARSE_TYPE_LAND);
280                                         break;
281                                 }
282                                 PARSE_TREE_ADD(PARSE_TYPE_BAND);
283                                 break;
284                         case '|':               /* |  */
285                                 token = lex_token(file);
286                                 if (token == '|') { /* || */
287                                         token = lex_token(file);
288                                         PARSE_TREE_ADD(PARSE_TYPE_LOR);
289                                         break;
290                                 }
291                                 PARSE_TREE_ADD(PARSE_TYPE_BOR);
292                                 break;
293                         case '!':
294                                 token = lex_token(file);
295                                 if (token == '=') { /* != */
296                                         token = lex_token(file);
297                                         PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
298                                         break;
299                                 }
300                                 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
301                                 break;
302                         case '<':               /* <  */
303                                 token = lex_token(file);
304                                 if (token == '=') { /* <= */
305                                         token = lex_token(file);
306                                         PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
307                                         break;
308                                 }
309                                 PARSE_TREE_ADD(PARSE_TYPE_LT);
310                                 break;
311                         case '>':               /* >  */
312                                 token = lex_token(file);
313                                 if (token == '=') { /* >= */
314                                         token = lex_token(file);
315                                         PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
316                                         break;
317                                 }
318                                 PARSE_TREE_ADD(PARSE_TYPE_GT);
319                                 break;
320                         case '=':
321                                 token = lex_token(file);
322                                 if (token == '=') { /* == */
323                                         token = lex_token(file);
324                                         PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
325                                         break;
326                                 }
327                                 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
328                                 break;
329                         case ';':
330                                 token = lex_token(file);
331                                 PARSE_TREE_ADD(PARSE_TYPE_DONE);
332                                 break;
333                         case '-':
334                                 token = lex_token(file);
335                                 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
336                                 break;
337                         case '+':
338                                 token = lex_token(file);
339                                 PARSE_TREE_ADD(PARSE_TYPE_ADD);
340                                 break;
341                         case '(':
342                                 token = lex_token(file);
343                                 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
344                                 break;
345                         case ')':
346                                 token = lex_token(file);
347                                 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
348                                 break;
349                         case '{':
350                                 token = lex_token(file);
351                                 PARSE_TREE_ADD(PARSE_TYPE_LBS);
352                                 break;
353                         case '}':
354                                 token = lex_token(file);
355                                 PARSE_TREE_ADD(PARSE_TYPE_RBS);
356                                 break;
357                 }
358         }
359         parse_debug(parseroot);
360         lex_reset(file);
361         
362         return 1;
363 }