5 * Permission is hereby granted, free of charge, to any person obtaining a copy of
6 * this software and associated documentation files (the "Software"), to deal in
7 * the Software without restriction, including without limitation the rights to
8 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is furnished to do
10 * so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * These are not lexical tokens: These are parse tree types. Most people
31 * perform tokenizing on language punctuation which is wrong. That stuff
32 * is technically already tokenized, it just needs to be parsed into a tree
34 #define PARSE_TYPE_DO 0
35 #define PARSE_TYPE_ELSE 1
36 #define PARSE_TYPE_IF 2
37 #define PARSE_TYPE_WHILE 3
38 #define PARSE_TYPE_BREAK 4
39 #define PARSE_TYPE_CONTINUE 5
40 #define PARSE_TYPE_RETURN 6
41 #define PARSE_TYPE_GOTO 7
42 #define PARSE_TYPE_FOR 8
43 #define PARSE_TYPE_VOID 9
44 #define PARSE_TYPE_STRING 10
45 #define PARSE_TYPE_FLOAT 11
46 #define PARSE_TYPE_VECTOR 12
47 #define PARSE_TYPE_ENTITY 13
48 #define PARSE_TYPE_LAND 14
49 #define PARSE_TYPE_LOR 15
50 #define PARSE_TYPE_LTEQ 16
51 #define PARSE_TYPE_GTEQ 17
52 #define PARSE_TYPE_EQEQ 18
53 #define PARSE_TYPE_LNEQ 19
54 #define PARSE_TYPE_COMMA 20
55 #define PARSE_TYPE_LNOT 21
56 #define PARSE_TYPE_STAR 22
57 #define PARSE_TYPE_DIVIDE 23
58 #define PARSE_TYPE_LPARTH 24
59 #define PARSE_TYPE_RPARTH 25
60 #define PARSE_TYPE_MINUS 26
61 #define PARSE_TYPE_ADD 27
62 #define PARSE_TYPE_EQUAL 28
63 #define PARSE_TYPE_LBS 29
64 #define PARSE_TYPE_RBS 30
65 #define PARSE_TYPE_ELIP 31
66 #define PARSE_TYPE_DOT 32
67 #define PARSE_TYPE_LT 33
68 #define PARSE_TYPE_GT 34
69 #define PARSE_TYPE_BAND 35
70 #define PARSE_TYPE_BOR 36
71 #define PARSE_TYPE_DONE 37
72 #define PARSE_TYPE_IDENT 38
75 * Adds a parse type to the parse tree, this is where all the hard
76 * work actually begins.
78 #define PARSE_TREE_ADD(X) \
80 parsetree->next = mem_a(sizeof(struct parsenode)); \
81 parsetree->next->next = NULL; \
82 parsetree->next->type = (X); \
83 parsetree = parsetree->next; \
87 * This is all the punctuation handled in the parser, these don't
88 * need tokens, they're already tokens.
91 "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
92 "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
93 "." , "<" , ">" , "&" , "|" ,
96 #define STORE(X,C) { \
106 void parse_debug(struct parsenode *tree) {
109 switch (tree->type) {
110 case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n", -=0);
111 case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n",-=0);
112 case PARSE_TYPE_BOR: STORE("OPERATOR: BITOR \n",-=0);
113 case PARSE_TYPE_COMMA: STORE("OPERATOR: SEPERATOR\n",-=0);
114 case PARSE_TYPE_DOT: STORE("OPERATOR: DOT\n",-=0);
115 case PARSE_TYPE_DIVIDE: STORE("OPERATOR: DIVIDE\n",-=0);
116 case PARSE_TYPE_EQUAL: STORE("OPERATOR: ASSIGNMENT\n",-=0);
118 case PARSE_TYPE_BREAK: STORE("STATEMENT: BREAK \n",-=0);
119 case PARSE_TYPE_CONTINUE: STORE("STATEMENT: CONTINUE\n",-=0);
120 case PARSE_TYPE_GOTO: STORE("STATEMENT: GOTO\n",-=0);
121 case PARSE_TYPE_RETURN: STORE("STATEMENT: RETURN\n",-=0);
122 case PARSE_TYPE_DONE: STORE("STATEMENT: DONE\n",-=0);
124 case PARSE_TYPE_VOID: STORE("DECLTYPE: VOID\n",-=0);
125 case PARSE_TYPE_STRING: STORE("DECLTYPE: STRING\n",-=0);
126 case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n",-=0);
127 case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n",-=0);
128 case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n",-=0);
129 case PARSE_TYPE_VECTOR: STORE("DECLTYPE: VECTOR\n",-=0);
131 case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n",-=0);
132 case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n",-=0);
133 case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n",-=0);
134 case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n",-=0);
135 case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n",-=0);
136 case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n",-=0);
138 case PARSE_TYPE_LBS: STORE("BLOCK: BEG\n",+=4);
139 case PARSE_TYPE_RBS: STORE("BLOCK: END\n",-=4);
140 case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n",+=0);
141 case PARSE_TYPE_IF: STORE("BLOCK: IF\n",+=0);
143 case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n",-=0);
144 case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n",-=0);
145 case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n",-=0);
147 case PARSE_TYPE_LPARTH: STORE("PARTH: BEG\n",-=0);
148 case PARSE_TYPE_RPARTH: STORE("PARTH: END\n",-=0);
150 case PARSE_TYPE_WHILE: STORE("LOOP: WHILE\n",-=0);
151 case PARSE_TYPE_FOR: STORE("LOOP: FOR\n",-=0);
152 case PARSE_TYPE_DO: STORE("LOOP: DO\n",-=0);
159 * Performs a parse operation: This is a macro to prevent bugs, if the
160 * calls to lex_token are'nt exactly enough to feed to the end of the
161 * actual lexees for the current thing that is being parsed, the state
162 * of the next iteration in the creation of the parse tree will be wrong
163 * and everything will fail.
165 #define PARSE_PERFORM(X,C) { \
166 token = lex_token(file); \
168 while (token != '\n') { \
169 token = lex_token(file); \
175 void parse_clear(struct parsenode *tree) {
177 struct parsenode *temp = NULL;
178 while (tree != NULL) {
184 /* free any potential typedefs */
188 const char *STRING_(char ch) {
199 #define TOKEN_SKIPWHITE() \
200 token = lex_token(file); \
201 while (token == ' ') { \
202 token = lex_token(file); \
206 * Generates a parse tree out of the lexees generated by the lexer. This
207 * is where the tree is built. This is where valid check is performed.
209 int parse_tree(struct lex_file *file) {
210 struct parsenode *parsetree = NULL;
211 struct parsenode *parseroot = NULL;
214 * Allocate memory for our parse tree:
215 * the parse tree is just a singly linked list which will contain
216 * all the data for code generation.
219 parseroot = mem_a(sizeof(struct parsenode));
221 return error(ERROR_INTERNAL, "Ran out of memory", " ");
222 parsetree = parseroot;
223 parsetree->type = -1; /* not a valid type -- root element */
227 while ((token = lex_token(file)) != ERROR_LEX && \
228 token != ERROR_COMPILER && \
229 token != ERROR_INTERNAL && \
230 token != ERROR_PARSE && \
231 token != ERROR_PREPRO && file->length >= 0) {
236 error(ERROR_PARSE, "%s:%d Expected `(` after `if` for if statement\n", file->name, file->line);
237 PARSE_TREE_ADD(PARSE_TYPE_IF);
238 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
241 token = lex_token(file);
242 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
245 while ((token == ' ' || token == '\n') && file->length >= 0)
246 token = lex_token(file);
247 PARSE_TREE_ADD(PARSE_TYPE_FOR);
251 * This is a quick and easy way to do typedefs at parse time
252 * all power is in typedef_add(), in typedef.c. We handle
253 * the tokens accordingly here.
255 case TOKEN_TYPEDEF: {
258 token = lex_token(file);
259 token = lex_token(file); f = util_strdup(file->lastok);
260 token = lex_token(file);
261 token = lex_token(file); t = util_strdup(file->lastok);
265 printf("TYPEDEF %s as %s\n", f, t);
270 //while (token != '\n')
271 token = lex_token(file);
273 error(ERROR_PARSE, "%s:%d Expected `;` on typedef\n", file->name, file->line);
275 token = lex_token(file);
276 printf("TOK: %c\n", token);
281 * Returns are addable as-is, statement checking is during
282 * the actual parse tree check.
285 token = lex_token(file);
286 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
289 PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
292 case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {});
293 case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {});
294 case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {});
295 case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {});
296 case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {});
298 case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); goto fall;
299 case TOKEN_VECTOR: PARSE_TREE_ADD(PARSE_TYPE_VECTOR); goto fall;
300 case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); goto fall;
301 case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); goto fall;
302 /* fall into this for all types */
307 name = util_strdup(file->lastok);
308 token = lex_token (file);
310 /* is it NOT a definition? */
313 token = lex_token(file);
315 /* it's a function? */
318 * Now I essentially have to do a ton of parsing for
319 * function definition.
321 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
322 token = lex_token(file);
323 while (token != '\n' && token != ')') {
325 case TOKEN_VOID: PARSE_TREE_ADD(PARSE_TYPE_VOID); break;
326 case TOKEN_STRING: PARSE_TREE_ADD(PARSE_TYPE_STRING); break;
327 case TOKEN_ENTITY: PARSE_TREE_ADD(PARSE_TYPE_ENTITY); break;
328 case TOKEN_FLOAT: PARSE_TREE_ADD(PARSE_TYPE_FLOAT); break;
330 * TODO: Need to parse function pointers: I have no clue how
331 * I'm actually going to pull that off, it's going to be hard
332 * since you can have a function pointer-pointer-pointer ....
336 /* just a definition */
339 * I like to put my { on the same line as the ) for
340 * functions, ifs, elses, so we must support that!.
342 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
343 token = lex_token(file);
344 token = lex_token(file);
346 PARSE_TREE_ADD(PARSE_TYPE_LBS);
348 else if (token == '\n')
349 error(ERROR_COMPILER, "%s:%d Expecting `;` after function definition %s\n", file->name, file->line, name);
351 } else if (token == '=') {
352 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
354 error(ERROR_COMPILER, "%s:%d Invalid decltype: expected `(` [function], or `=` [constant], or `;` [definition] for %s\n", file->name, file->line, name);
358 printf("FOUND DEFINITION\n");
364 * From here down is all language punctuation: There is no
365 * need to actual create tokens from these because they're already
366 * tokenized as these individual tokens (which are in a special area
367 * of the ascii table which doesn't conflict with our other tokens
368 * which are higer than the ascii table.)
371 token = lex_token(file); /* skip '#' */
373 * If we make it here we found a directive, the supported
374 * directives so far are #include.
376 if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
378 * We only suport include " ", not <> like in C (why?)
379 * because the latter is silly.
381 while (*file->lastok != '"' && token != '\n')
382 token = lex_token(file);
384 /* we handle lexing at that point now */
386 return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line);
389 /* skip all tokens to end of directive */
390 while (token != '\n')
391 token = lex_token(file);
395 PARSE_TREE_ADD(PARSE_TYPE_DOT);
398 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
401 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
405 token = lex_token(file);
406 if (token == '&') { /* && */
407 token = lex_token(file);
408 PARSE_TREE_ADD(PARSE_TYPE_LAND);
411 PARSE_TREE_ADD(PARSE_TYPE_BAND);
414 token = lex_token(file);
415 if (token == '|') { /* || */
416 token = lex_token(file);
417 PARSE_TREE_ADD(PARSE_TYPE_LOR);
420 PARSE_TREE_ADD(PARSE_TYPE_BOR);
423 token = lex_token(file);
424 if (token == '=') { /* != */
425 token = lex_token(file);
426 PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
429 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
432 token = lex_token(file);
433 if (token == '=') { /* <= */
434 token = lex_token(file);
435 PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
438 PARSE_TREE_ADD(PARSE_TYPE_LT);
441 token = lex_token(file);
442 if (token == '=') { /* >= */
443 token = lex_token(file);
444 PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
447 PARSE_TREE_ADD(PARSE_TYPE_GT);
450 token = lex_token(file);
451 if (token == '=') { /* == */
452 token = lex_token(file);
453 PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
456 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
459 token = lex_token(file);
460 PARSE_TREE_ADD(PARSE_TYPE_DONE);
463 token = lex_token(file);
464 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
467 token = lex_token(file);
468 PARSE_TREE_ADD(PARSE_TYPE_ADD);
471 token = lex_token(file);
472 PARSE_TREE_ADD(PARSE_TYPE_LBS);
475 token = lex_token(file);
476 PARSE_TREE_ADD(PARSE_TYPE_RBS);
480 * TODO: Fix lexer to spit out ( ) as tokens, it seems the
481 * using '(' or ')' in parser doesn't work properly unless
482 * there are spaces before them to allow the lexer to properly
483 * seperate identifiers. -- otherwise it eats all of it.
486 token = lex_token(file);
487 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
491 parse_debug(parseroot);
493 parse_clear(parseroot);