]> git.xonotic.org Git - xonotic/gmqcc.git/blob - parse.c
3555035848a11bdb8c597de1d162accb2c652385
[xonotic/gmqcc.git] / parse.c
1 /*
2  * Copyright (C) 2012 
3  *      Dale Weiler
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
6  * this software and associated documentation files (the "Software"), to deal in
7  * the Software without restriction, including without limitation the rights to
8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is furnished to do
10  * so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in all
13  * copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 #include <limits.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <ctype.h>
27 #include "gmqcc.h"
28
29 /*
30  * These are not lexical tokens:  These are parse tree types.  Most people
31  * perform tokenizing on language punctuation which is wrong.  That stuff
32  * is technically already tokenized, it just needs to be parsed into a tree
33  */
34 #define PARSE_TYPE_DO       0
35 #define PARSE_TYPE_ELSE     1
36 #define PARSE_TYPE_IF       2
37 #define PARSE_TYPE_WHILE    3
38 #define PARSE_TYPE_BREAK    4
39 #define PARSE_TYPE_CONTINUE 5
40 #define PARSE_TYPE_RETURN   6
41 #define PARSE_TYPE_GOTO     7
42 #define PARSE_TYPE_FOR      8
43 #define PARSE_TYPE_VOID     9
44 #define PARSE_TYPE_STRING   10
45 #define PARSE_TYPE_FLOAT    11
46 #define PARSE_TYPE_VECTOR   12
47 #define PARSE_TYPE_ENTITY   13
48 #define PARSE_TYPE_LAND     14
49 #define PARSE_TYPE_LOR      15
50 #define PARSE_TYPE_LTEQ     16
51 #define PARSE_TYPE_GTEQ     17
52 #define PARSE_TYPE_EQEQ     18
53 #define PARSE_TYPE_LNEQ     19
54 #define PARSE_TYPE_COMMA    20
55 #define PARSE_TYPE_LNOT     21
56 #define PARSE_TYPE_STAR     22
57 #define PARSE_TYPE_DIVIDE   23
58 #define PARSE_TYPE_LPARTH   24
59 #define PARSE_TYPE_RPARTH   25
60 #define PARSE_TYPE_MINUS    26
61 #define PARSE_TYPE_ADD      27
62 #define PARSE_TYPE_EQUAL    28
63 #define PARSE_TYPE_LBS      29
64 #define PARSE_TYPE_RBS      30
65 #define PARSE_TYPE_ELIP     31
66 #define PARSE_TYPE_DOT      32
67 #define PARSE_TYPE_LT       33
68 #define PARSE_TYPE_GT       34
69 #define PARSE_TYPE_BAND     35
70 #define PARSE_TYPE_BOR      36
71 #define PARSE_TYPE_DONE     37
72 #define PARSE_TYPE_IDENT    38
73
74 /*
75  * Adds a parse type to the parse tree, this is where all the hard
76  * work actually begins.
77  */
78 #define PARSE_TREE_ADD(X)                                        \
79         do {                                                         \
80                 parsetree->next       = mem_a(sizeof(struct parsenode)); \
81                 parsetree->next->next = NULL;                            \
82                 parsetree->next->type = (X);                             \
83                 parsetree             = parsetree->next;                 \
84         } while (0)
85
86 /*
87  * This is all the punctuation handled in the parser, these don't
88  * need tokens, they're already tokens.
89  */
90 #if 0
91         "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
92         "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
93         "." , "<" , ">" , "&" , "|" , 
94 #endif
95
96 #define STORE(X,C) {  \
97     long f = fill;    \
98     while(f--) {      \
99       putchar(' ');   \
100     }                 \
101     fill C;           \
102         printf(X);        \
103         break;            \
104 }
105
106 void parse_debug(struct parsenode *tree) {
107         long fill = 0;
108         while (tree) {  
109                 switch (tree->type) {
110                         case PARSE_TYPE_ADD:       STORE("OPERATOR:  ADD    \n", -=0);
111                         case PARSE_TYPE_BAND:      STORE("OPERATOR:  BITAND \n",-=0);
112                         case PARSE_TYPE_BOR:       STORE("OPERATOR:  BITOR  \n",-=0);
113                         case PARSE_TYPE_COMMA:     STORE("OPERATOR:  SEPERATOR\n",-=0);
114                         case PARSE_TYPE_DOT:       STORE("OPERATOR:  DOT\n",-=0);
115                         case PARSE_TYPE_DIVIDE:    STORE("OPERATOR:  DIVIDE\n",-=0);
116                         case PARSE_TYPE_EQUAL:     STORE("OPERATOR:  ASSIGNMENT\n",-=0);
117                         
118                         case PARSE_TYPE_BREAK:     STORE("STATEMENT: BREAK  \n",-=0);
119                         case PARSE_TYPE_CONTINUE:  STORE("STATEMENT: CONTINUE\n",-=0);
120                         case PARSE_TYPE_GOTO:      STORE("STATEMENT: GOTO\n",-=0);
121                         case PARSE_TYPE_RETURN:    STORE("STATEMENT: RETURN\n",-=0);
122                         case PARSE_TYPE_DONE:      STORE("STATEMENT: DONE\n",-=0);
123
124                         case PARSE_TYPE_VOID:      STORE("DECLTYPE:  VOID\n",-=0);
125                         case PARSE_TYPE_STRING:    STORE("DECLTYPE:  STRING\n",-=0);
126                         case PARSE_TYPE_ELIP:      STORE("DECLTYPE:  VALIST\n",-=0);
127                         case PARSE_TYPE_ENTITY:    STORE("DECLTYPE:  ENTITY\n",-=0);
128                         case PARSE_TYPE_FLOAT:     STORE("DECLTYPE:  FLOAT\n",-=0);
129                         case PARSE_TYPE_VECTOR:    STORE("DECLTYPE:  VECTOR\n",-=0);
130                         
131                         case PARSE_TYPE_GT:        STORE("TEST:      GREATER THAN\n",-=0);
132                         case PARSE_TYPE_LT:        STORE("TEST:      LESS THAN\n",-=0);
133                         case PARSE_TYPE_GTEQ:      STORE("TEST:      GREATER THAN OR EQUAL\n",-=0);
134                         case PARSE_TYPE_LTEQ:      STORE("TEST:      LESS THAN OR EQUAL\n",-=0);
135                         case PARSE_TYPE_LNEQ:      STORE("TEST:      NOT EQUAL\n",-=0);
136                         case PARSE_TYPE_EQEQ:      STORE("TEST:      EQUAL-EQUAL\n",-=0);
137                         
138                         case PARSE_TYPE_LBS:       STORE("BLOCK:     BEG\n",+=4);
139                         case PARSE_TYPE_RBS:       STORE("BLOCK:     END\n",-=4);
140                         case PARSE_TYPE_ELSE:      STORE("BLOCK:     ELSE\n",+=0);
141                         case PARSE_TYPE_IF:        STORE("BLOCK:     IF\n",+=0);
142                         
143                         case PARSE_TYPE_LAND:      STORE("LOGICAL:   AND\n",-=0);
144                         case PARSE_TYPE_LNOT:      STORE("LOGICAL:   NOT\n",-=0);
145                         case PARSE_TYPE_LOR:       STORE("LOGICAL:   OR\n",-=0);
146                         
147                         case PARSE_TYPE_LPARTH:    STORE("PARTH:     BEG\n",-=0);
148                         case PARSE_TYPE_RPARTH:    STORE("PARTH:     END\n",-=0);
149                         
150                         case PARSE_TYPE_WHILE:     STORE("LOOP:      WHILE\n",-=0);
151                         case PARSE_TYPE_FOR:       STORE("LOOP:      FOR\n",-=0);
152                         case PARSE_TYPE_DO:        STORE("LOOP:      DO\n",-=0);
153                 }
154                 tree = tree->next;
155         }
156 }
157
158 /*
159  * Performs a parse operation:  This is a macro to prevent bugs, if the
160  * calls to lex_token are'nt exactly enough to feed to the end of the
161  * actual lexees for the current thing that is being parsed, the state 
162  * of the next iteration in the creation of the parse tree will be wrong
163  * and everything will fail.
164  */
165 #define PARSE_PERFORM(X,C) {     \
166     token = lex_token(file);     \
167     { C }                        \
168     while (token != '\n') {      \
169             token = lex_token(file); \
170     }                            \
171     PARSE_TREE_ADD(X);           \
172     break;                       \
173 }
174
175 void parse_clear(struct parsenode *tree) {
176         if (!tree) return;
177         struct parsenode *temp = NULL;
178         while (tree != NULL) {
179                 temp = tree;
180                 tree = tree->next;
181                 mem_d (temp);
182         }
183         
184         /* free any potential typedefs */
185         typedef_clear();
186 }
187
188 const char *STRING_(char ch) {
189         if (ch == ' ')
190                 return "<space>";
191         if (ch == '\n')
192                 return "<newline>";
193         if (ch == '\0')
194                 return "<null>";
195                 
196         return &ch;
197 }
198
199 #define TOKEN_SKIPWHITE()        \
200         token = lex_token(file);     \
201         while (token == ' ') {       \
202                 token = lex_token(file); \
203         }
204
205 /*
206  * Generates a parse tree out of the lexees generated by the lexer.  This
207  * is where the tree is built.  This is where valid check is performed.
208  */
209 int parse_tree(struct lex_file *file) {
210         struct parsenode *parsetree = NULL;
211         struct parsenode *parseroot = NULL;
212         
213         /*
214          * Allocate memory for our parse tree:
215          * the parse tree is just a singly linked list which will contain
216          * all the data for code generation.
217          */
218         if (!parseroot) {
219                 parseroot = mem_a(sizeof(struct parsenode));
220                 if (!parseroot)
221                         return error(ERROR_INTERNAL, "Ran out of memory", " ");
222                 parsetree       = parseroot;
223                 parsetree->type = -1; /* not a valid type -- root element */
224         }
225         
226         int     token = 0;
227         long    line  = 0;
228         while ((token = lex_token(file)) != ERROR_LEX      && \
229                     token                    != ERROR_COMPILER && \
230                     token                    != ERROR_INTERNAL && \
231                     token                    != ERROR_PARSE    && \
232                     token                    != ERROR_PREPRO   && file->length >= 0) {
233                 line = file->line;
234                 switch (token) {
235                         case TOKEN_TYPEDEF: {
236                                 char *f; /* from */
237                                 char *t; /* to   */
238                                 
239                                 token = lex_token(file); 
240                                 token = lex_token(file); f = util_strdup(file->lastok);
241                                 token = lex_token(file); 
242                                 token = lex_token(file); t = util_strdup(file->lastok);
243                                 
244                                 typedef_add(f, t);
245                                 mem_d(f);
246                                 mem_d(t);
247                                 
248                                 token = lex_token(file);
249                                 if (token == ' ')
250                                         token = lex_token(file);
251                                         
252                                 if (token != ';')
253                                         error(ERROR_PARSE, "%s:%d Expected a `;` at end of typedef statement\n", file->name, file->line);
254                                         
255                                 token = lex_token(file);
256                                 break;
257                         }
258                         
259                         case TOKEN_VOID:      PARSE_TREE_ADD(PARSE_TYPE_VOID);   goto fall;
260                         case TOKEN_STRING:    PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
261                         case TOKEN_VECTOR:    PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
262                         case TOKEN_ENTITY:    PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
263                         case TOKEN_FLOAT:     PARSE_TREE_ADD(PARSE_TYPE_FLOAT);  goto fall;
264                         {
265                         fall:;
266                                 char *name = NULL;
267                                 int   type = token; /* story copy */
268                                 
269                                 /* skip over space */
270                                 token = lex_token(file);
271                                 if (token == ' ')
272                                         token = lex_token(file);
273                                 
274                                 /* save name */
275                                 name = util_strdup(file->lastok);
276                                 
277                                 /* skip spaces */
278                                 token = lex_token(file);
279                                 if (token == ' ')
280                                         token = lex_token(file);
281                                         
282                                 if (token == ';') {
283                                         /*
284                                          * Definitions go to the defs table, they don't have
285                                          * any sort of data with them yet.
286                                          */
287                                 } else if (token == '=') {
288                                         token = lex_token(file);
289                                         if (token == ' ')
290                                                 token = lex_token(file);
291                                         
292                                         /* strings are in file->lastok */
293                                         switch (type) {
294                                                 case TOKEN_VOID:
295                                                         return error(ERROR_PARSE, "%s:%d Cannot assign value to type void\n", file->name, file->line);
296                                                 case TOKEN_STRING:
297                                                         if (*file->lastok != '"')
298                                                                 error(ERROR_PARSE, "%s:%d Expected a '\"' (quote) for string constant\n", file->name, file->line);
299                                                         break;
300                                                 case TOKEN_VECTOR: {
301                                                         float compile_calc_x = 0;
302                                                         float compile_calc_y = 0;
303                                                         float compile_calc_z = 0;
304                                                         int   compile_calc_d = 0; /* dot?        */
305                                                         int   compile_calc_s = 0; /* sign (-, +) */
306                                                         
307                                                         char  compile_data[1024];
308                                                         char *compile_eval = compile_data;
309                                                         
310                                                         if (token != '{')
311                                                                 error(ERROR_PARSE, "%s:%d Expected initializer list `{`,`}` for vector constant\n", file->name, file->line);    
312                                                         
313                                                         /*
314                                                          * This parses a single vector element: x,y & z.  This will handle all the
315                                                          * complicated mechanics of a vector, and can be extended as well.  This
316                                                          * is a rather large macro, and is #undef after it's use below.
317                                                          */
318                                                         #define PARSE_VEC_ELEMENT(NAME, BIT)                                                                                                                                   \
319                                                             token = lex_token(file);                                                                                                                                           \
320                                                             if (token == ' ') {                                                                                                                                                \
321                                                                 token = lex_token(file);                                                                                                                                       \
322                                                             }                                                                                                                                                                  \
323                                                             if (token == '.') {                                                                                                                                                \
324                                                                 compile_calc_d = 1;                                                                                                                                            \
325                                                             }                                                                                                                                                                  \
326                                                             if (!isdigit(token) && !compile_calc_d && token != '+' && token != '-')  {                                                                                         \
327                                                                 error(ERROR_PARSE,"%s:%d Invalid constant initializer element %c for vector, must be numeric\n", file->name, file->line, NAME);                                \
328                                                             }                                                                                                                                                                  \
329                                                             if (token == '+') {                                                                                                                                                \
330                                                                 compile_calc_s = '+';                                                                                                                                          \
331                                                             }                                                                                                                                                                  \
332                                                             if (token == '-' && !compile_calc_s) {                                                                                                                             \
333                                                                 compile_calc_s = '-';                                                                                                                                          \
334                                                             }                                                                                                                                                                  \
335                                                             while (isdigit(token) || token == '.' || token == '+' || token == '-') {                                                                                           \
336                                                                 *compile_eval++ = token;                                                                                                                                       \
337                                                                 token           = lex_token(file);                                                                                                                             \
338                                                                 if (token == '.' && compile_calc_d) {                                                                                                                          \
339                                                                     error(ERROR_PARSE, "%s:%d Invalid constant initializer element %c for vector, must be numeric.\n", file->name, file->line, NAME);                          \
340                                                                     token = lex_token(file);                                                                                                                                   \
341                                                                 }                                                                                                                                                              \
342                                                                 if ((token == '-' || token == '+') && compile_calc_s) {                                                                                                        \
343                                                                     error(ERROR_PARSE, "%s:%d Invalid constant initializer sign for vector element %c\n", file->name, file->line, NAME);                                       \
344                                                                     token = lex_token(file);                                                                                                                                   \
345                                                                 } else if (token == '.' && !compile_calc_d) {                                                                                                                  \
346                                                                     compile_calc_d = 1;                                                                                                                                        \
347                                                                 } else if (token == '-' && !compile_calc_s) {                                                                                                                  \
348                                                                     compile_calc_s = '-';                                                                                                                                      \
349                                                                 } else if (token == '+' && !compile_calc_s) {                                                                                                                  \
350                                                                     compile_calc_s = '+';                                                                                                                                      \
351                                                                 }                                                                                                                                                              \
352                                                             }                                                                                                                                                                  \
353                                                             if (token == ' ') {                                                                                                                                                \
354                                                                 token = lex_token(file);                                                                                                                                       \
355                                                             }                                                                                                                                                                  \
356                                                             if (NAME != 'z') {                                                                                                                                                 \
357                                                                 if (token != ',' && token != ' ')  {                                                                                                                           \
358                                                                     error(ERROR_PARSE, "%s:%d invalid constant initializer element %c for vector (missing spaces, or comma delimited list?)\n", NAME, file->name, file->line); \
359                                                                 }                                                                                                                                                              \
360                                                             } else if (token != '}') {                                                                                                                                         \
361                                                                 error(ERROR_PARSE, "%s:%d Expected `}` on end of constant initialization for vector\n", file->name, file->line);                                               \
362                                                             }                                                                                                                                                                  \
363                                                             compile_calc_##BIT = atof(compile_data);                                                                                                                           \
364                                                             compile_calc_d = 0;                                                                                                                                                \
365                                                             compile_calc_s = 0;                                                                                                                                                \
366                                                             compile_eval   = &compile_data[0];                                                                                                                                 \
367                                                             memset(compile_data, 0, sizeof(compile_data))
368                                                         
369                                                         /*
370                                                          * Parse all elements using the macro above.
371                                                          * We must undef the macro afterwards.
372                                                          */
373                                                         PARSE_VEC_ELEMENT('x', x);
374                                                         PARSE_VEC_ELEMENT('y', y);
375                                                         PARSE_VEC_ELEMENT('z', z);
376                                                         #undef PARSE_VEC_ELEMENT
377                                                         
378                                                         /*
379                                                          * Check for the semi-colon... This is insane
380                                                          * the amount of parsing here that is.
381                                                          */
382                                                         token = lex_token(file);
383                                                         if (token == ' ')
384                                                                 token = lex_token(file);
385                                                         if (token != ';')
386                                                                 error(ERROR_PARSE, "%s:%d Expected `;` on end of constant initialization for vector\n", file->name, file->line);
387                                                                 
388                                                         printf("VEC_X: %f\n", compile_calc_x);
389                                                         printf("VEC_Y: %f\n", compile_calc_y);
390                                                         printf("VEC_Z: %f\n", compile_calc_z);
391                                                         break;
392                                                 }
393                                                         
394                                                 case TOKEN_ENTITY:
395                                                 case TOKEN_FLOAT:
396                                                         
397                                                         if (!isdigit(token))
398                                                                 error(ERROR_PARSE, "%s:%d Expected numeric constant for float constant\n");
399                                                         break;
400                                         }
401                                 } else if (token == '(') {
402                                         printf("FUNCTION ??\n");
403                                 }
404                                 mem_d(name);
405                         }
406                                 
407                         /*
408                          * From here down is all language punctuation:  There is no
409                          * need to actual create tokens from these because they're already
410                          * tokenized as these individual tokens (which are in a special area
411                          * of the ascii table which doesn't conflict with our other tokens
412                          * which are higer than the ascii table.)
413                          */
414                         case '#':
415                                 token = lex_token(file); /* skip '#' */
416                                 if (token == ' ')
417                                         token = lex_token(file);
418                                 /*
419                                  * If we make it here we found a directive, the supported
420                                  * directives so far are #include.
421                                  */
422                                 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
423                                         /*
424                                          * We only suport include " ", not <> like in C (why?)
425                                          * because the latter is silly.
426                                          */
427                                         while (*file->lastok != '"' && token != '\n')
428                                                 token = lex_token(file);
429                                         if (token == '\n')
430                                                 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line-1);
431                                 }
432                         
433                                 /* skip all tokens to end of directive */
434                                 while (token != '\n')
435                                         token = lex_token(file);
436                                 break;
437                                 
438                         case LEX_IDENT:
439                                 token = lex_token(file);
440                                 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
441                                 break;
442                 }
443         }
444         parse_debug(parseroot);
445         lex_reset(file);
446         parse_clear(parseroot);
447         return 1;
448 }