5 * Permission is hereby granted, free of charge, to any person obtaining a copy of
6 * this software and associated documentation files (the "Software"), to deal in
7 * the Software without restriction, including without limitation the rights to
8 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is furnished to do
10 * so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 * These are not lexical tokens: These are parse tree types. Most people
30 * perform tokenizing on language punctuation which is wrong. That stuff
31 * is technically already tokenized, it just needs to be parsed into a tree
33 #define PARSE_TYPE_DO 0
34 #define PARSE_TYPE_ELSE 1
35 #define PARSE_TYPE_IF 2
36 #define PARSE_TYPE_WHILE 3
37 #define PARSE_TYPE_BREAK 4
38 #define PARSE_TYPE_CONTINUE 5
39 #define PARSE_TYPE_RETURN 6
40 #define PARSE_TYPE_GOTO 7
41 #define PARSE_TYPE_FOR 8
42 #define PARSE_TYPE_VOID 9
43 #define PARSE_TYPE_STRING 10
44 #define PARSE_TYPE_FLOAT 11
45 #define PARSE_TYPE_VECTOR 12
46 #define PARSE_TYPE_ENTITY 13
47 #define PARSE_TYPE_LAND 14
48 #define PARSE_TYPE_LOR 15
49 #define PARSE_TYPE_LTEQ 16
50 #define PARSE_TYPE_GTEQ 17
51 #define PARSE_TYPE_EQEQ 18
52 #define PARSE_TYPE_LNEQ 19
53 #define PARSE_TYPE_COMMA 20
54 #define PARSE_TYPE_LNOT 21
55 #define PARSE_TYPE_STAR 22
56 #define PARSE_TYPE_DIVIDE 23
57 #define PARSE_TYPE_LPARTH 24
58 #define PARSE_TYPE_RPARTH 25
59 #define PARSE_TYPE_MINUS 26
60 #define PARSE_TYPE_ADD 27
61 #define PARSE_TYPE_EQUAL 28
62 #define PARSE_TYPE_LBS 29
63 #define PARSE_TYPE_RBS 30
64 #define PARSE_TYPE_ELIP 31
65 #define PARSE_TYPE_DOT 32
66 #define PARSE_TYPE_LT 33
67 #define PARSE_TYPE_GT 34
68 #define PARSE_TYPE_BAND 35
69 #define PARSE_TYPE_BOR 36
70 #define PARSE_TYPE_DONE 37
71 #define PARSE_TYPE_IDENT 38
73 int parse[PARSE_TYPE_IDENT] = {
74 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
75 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80 * Adds a parse type to the parse tree, this is where all the hard
81 * work actually begins.
83 #define PARSE_TREE_ADD(X) \
85 parsetree->next = mem_a(sizeof(struct parsenode)); \
86 parsetree->next->next = NULL; \
87 parsetree->next->type = (X); \
88 parsetree = parsetree->next; \
91 #define PARSE_TREE_CHK(X,Y,Z) \
94 error(ERROR_PARSE, "Expected %c for %c\n", Y, Z); \
99 #define PARSE_TREE_PUT(X) do { parse[X] = 1; } while (0)
102 * This is all the punctuation handled in the parser, these don't
103 * need tokens, they're already tokens.
106 "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
107 "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
108 "." , "<" , ">" , "&" , "|" ,
116 void parse_debug(struct parsenode *tree) {
118 switch (tree->type) {
119 case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n");
120 case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n");
121 case PARSE_TYPE_BOR: STORE("OPERATOR: BITOR \n");
122 case PARSE_TYPE_COMMA: STORE("OPERATOR: SEPERATOR\n");
123 case PARSE_TYPE_DOT: STORE("OPERATOR: DOT\n");
124 case PARSE_TYPE_DIVIDE: STORE("OPERATOR: DIVIDE\n");
125 case PARSE_TYPE_EQUAL: STORE("OPERATOR: ASSIGNMENT\n");
127 case PARSE_TYPE_BREAK: STORE("STATEMENT: BREAK \n");
128 case PARSE_TYPE_CONTINUE: STORE("STATEMENT: CONTINUE\n");
129 case PARSE_TYPE_GOTO: STORE("STATEMENT: GOTO\n");
130 case PARSE_TYPE_RETURN: STORE("STATEMENT: RETURN\n");
131 case PARSE_TYPE_DONE: STORE("STATEMENT: DONE\n");
133 case PARSE_TYPE_VOID: STORE("DECLTYPE: VOID\n");
134 case PARSE_TYPE_STRING: STORE("DECLTYPE: STRING\n");
135 case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n");
136 case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n");
137 case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n");
138 case PARSE_TYPE_VECTOR: STORE("DECLTYPE: VECTOR\n");
140 case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n");
141 case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n");
142 case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n");
143 case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n");
144 case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n");
145 case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n");
147 case PARSE_TYPE_LBS: STORE("BLOCK: BEG\n");
148 case PARSE_TYPE_RBS: STORE("BLOCK: END\n");
149 case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n");
150 case PARSE_TYPE_IF: STORE("BLOCK: IF\n");
152 case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n");
153 case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n");
154 case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n");
156 case PARSE_TYPE_LPARTH: STORE("PARTH: BEG\n");
157 case PARSE_TYPE_RPARTH: STORE("PARTH: END\n");
159 case PARSE_TYPE_WHILE: STORE("LOOP: WHILE\n");
160 case PARSE_TYPE_FOR: STORE("LOOP: FOR\n");
161 case PARSE_TYPE_DO: STORE("LOOP: DO\n");
163 //case PARSE_TYPE_IDENT: STORE("IDENT: ???\n");
170 * Performs a parse operation: This is a macro to prevent bugs, if the
171 * calls to lex_token are'nt exactly enough to feed to the end of the
172 * actual lexees for the current thing that is being parsed, the state
173 * of the next iteration in the creation of the parse tree will be wrong
174 * and everything will fail.
176 #define PARSE_PERFORM(X,C) { \
177 token = lex_token(file); \
179 while (token != '\n') { \
180 token = lex_token(file); \
186 void parse_clear(struct parsenode *tree) {
188 struct parsenode *temp = NULL;
189 while (tree != NULL) {
195 /* free any potential typedefs */
200 * Generates a parse tree out of the lexees generated by the lexer. This
201 * is where the tree is built. This is where valid check is performed.
203 int parse_tree(struct lex_file *file) {
204 struct parsenode *parsetree = NULL;
205 struct parsenode *parseroot = NULL;
208 * Allocate memory for our parse tree:
209 * the parse tree is just a singly linked list which will contain
210 * all the data for code generation.
213 parseroot = mem_a(sizeof(struct parsenode));
215 return error(ERROR_INTERNAL, "Ran out of memory", " ");
216 parsetree = parseroot;
217 parsetree->type = -1; /* not a valid type -- root element */
221 while ((token = lex_token(file)) != ERROR_LEX && \
222 token != ERROR_COMPILER && \
223 token != ERROR_INTERNAL && \
224 token != ERROR_PARSE && \
225 token != ERROR_PREPRO && file->length >= 0) {
228 token = lex_token(file);
230 error(ERROR_PARSE, "Expected `(` on if statement:\n");
231 PARSE_TREE_ADD(PARSE_TYPE_IF);
232 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
233 PARSE_TREE_CHK(PARSE_TYPE_LPARTH, ')', '(');
234 PARSE_TREE_PUT(PARSE_TYPE_LPARTH);
237 token = lex_token(file);
238 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
241 //token = lex_token(file);
242 while ((token == ' ' || token == '\n') && file->length >= 0)
243 token = lex_token(file);
244 PARSE_TREE_ADD(PARSE_TYPE_FOR);
248 * This is a quick and easy way to do typedefs at parse time
249 * all power is in typedef_add(), in typedef.c. We handle
250 * the tokens accordingly here.
252 case TOKEN_TYPEDEF: {
255 token = lex_token(file);
256 token = lex_token(file); f = util_strdup(file->lastok);
257 token = lex_token(file);
258 token = lex_token(file); t = util_strdup(file->lastok);
265 while (token != '\n')
266 token = lex_token(file);
271 * Returns are addable as-is, statement checking is during
272 * the actual parse tree check.
275 token = lex_token(file);
276 PARSE_TREE_ADD(PARSE_TYPE_RETURN);
279 PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
282 case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {});
283 case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {});
284 case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {});
285 case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {});
286 case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {});
287 case TOKEN_STRING: PARSE_PERFORM(PARSE_TYPE_STRING, {});
288 case TOKEN_FLOAT: PARSE_PERFORM(PARSE_TYPE_FLOAT, {});
289 case TOKEN_VECTOR: PARSE_PERFORM(PARSE_TYPE_VECTOR, {});
290 case TOKEN_ENTITY: PARSE_PERFORM(PARSE_TYPE_ENTITY, {});
293 * From here down is all language punctuation: There is no
294 * need to actual create tokens from these because they're already
295 * tokenized as these individual tokens (which are in a special area
296 * of the ascii table which doesn't conflict with our other tokens
297 * which are higer than the ascii table.)
301 * Skip the preprocessor for now: We'll implement our own
302 * eventually. For now we need to make sure directives are
303 * not accidently tokenized.
305 token = lex_token(file);
306 token = lex_token(file);
308 /* skip all tokens to end of directive */
309 while (token != '\n')
310 token = lex_token(file);
314 //token = lex_token(file);
315 PARSE_TREE_ADD(PARSE_TYPE_DOT);
318 //token = lex_token(file);
319 PARSE_TREE_PUT(PARSE_TYPE_LPARTH);
320 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
323 //token = lex_token(file);
324 parse[PARSE_TYPE_LPARTH] = 0;
325 PARSE_TREE_PUT(PARSE_TYPE_RPARTH);
326 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
330 token = lex_token(file);
331 if (token == '&') { /* && */
332 token = lex_token(file);
333 PARSE_TREE_ADD(PARSE_TYPE_LAND);
336 PARSE_TREE_ADD(PARSE_TYPE_BAND);
339 token = lex_token(file);
340 if (token == '|') { /* || */
341 token = lex_token(file);
342 PARSE_TREE_ADD(PARSE_TYPE_LOR);
345 PARSE_TREE_ADD(PARSE_TYPE_BOR);
348 token = lex_token(file);
349 if (token == '=') { /* != */
350 token = lex_token(file);
351 PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
354 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
357 token = lex_token(file);
358 if (token == '=') { /* <= */
359 token = lex_token(file);
360 PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
363 PARSE_TREE_ADD(PARSE_TYPE_LT);
366 token = lex_token(file);
367 if (token == '=') { /* >= */
368 token = lex_token(file);
369 PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
372 PARSE_TREE_ADD(PARSE_TYPE_GT);
375 token = lex_token(file);
376 if (token == '=') { /* == */
377 token = lex_token(file);
378 PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
381 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
384 token = lex_token(file);
385 PARSE_TREE_ADD(PARSE_TYPE_DONE);
388 token = lex_token(file);
389 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
392 token = lex_token(file);
393 PARSE_TREE_ADD(PARSE_TYPE_ADD);
396 token = lex_token(file);
397 PARSE_TREE_ADD(PARSE_TYPE_LBS);
400 token = lex_token(file);
401 PARSE_TREE_ADD(PARSE_TYPE_RBS);
405 * TODO: Fix lexer to spit out ( ) as tokens, it seems the
406 * using '(' or ')' in parser doesn't work properly unless
407 * there are spaces before them to allow the lexer to properly
408 * seperate identifiers. -- otherwise it eats all of it.
411 token = lex_token(file);
412 PARSE_TREE_ADD(PARSE_TYPE_IDENT);
416 parse_debug(parseroot);
418 parse_clear(parseroot);