5 * Permission is hereby granted, free of charge, to any person obtaining a copy of
6 * this software and associated documentation files (the "Software"), to deal in
7 * the Software without restriction, including without limitation the rights to
8 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is furnished to do
10 * so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * These are not lexical tokens: These are parse tree types. Most people
29 * perform tokenizing on language punctuation which is wrong. That stuff
30 * is technically already tokenized, it just needs to be parsed into a tree
32 #define PARSE_TYPE_DO 0
33 #define PARSE_TYPE_ELSE 1
34 #define PARSE_TYPE_IF 2
35 #define PARSE_TYPE_WHILE 3
36 #define PARSE_TYPE_BREAK 4
37 #define PARSE_TYPE_CONTINUE 5
38 #define PARSE_TYPE_RETURN 6
39 #define PARSE_TYPE_GOTO 7
40 #define PARSE_TYPE_FOR 8 // extension
41 #define PARSE_TYPE_INT 9 // extension
42 #define PARSE_TYPE_BOOL 10 // extension
43 #define PARSE_TYPE_VOID 11
44 #define PARSE_TYPE_STRING 12
45 #define PARSE_TYPE_FLOAT 13
46 #define PARSE_TYPE_VECTOR 14
47 #define PARSE_TYPE_ENTITY 15
48 #define PARSE_TYPE_LAND 16
49 #define PARSE_TYPE_LOR 17
50 #define PARSE_TYPE_LTEQ 18
51 #define PARSE_TYPE_GTEQ 19
52 #define PARSE_TYPE_EQEQ 20
53 #define PARSE_TYPE_LNEQ 21
54 #define PARSE_TYPE_COMMA 22
55 #define PARSE_TYPE_LNOT 23
56 #define PARSE_TYPE_STAR 24
57 #define PARSE_TYPE_DIVIDE 25
58 #define PARSE_TYPE_LPARTH 26
59 #define PARSE_TYPE_RPARTH 27
60 #define PARSE_TYPE_MINUS 28
61 #define PARSE_TYPE_ADD 29
62 #define PARSE_TYPE_EQUAL 30
63 #define PARSE_TYPE_LSS 31 // left subscript
64 #define PARSE_TYPE_RSS 32
65 #define PARSE_TYPE_LBS 33 // left bracket scope
66 #define PARSE_TYPE_RBS 34 // right bracket scope
67 #define PARSE_TYPE_ELIP 35 // ...
68 #define PARSE_TYPE_DOT 36
69 #define PARSE_TYPE_LT 37
70 #define PARSE_TYPE_GT 38
71 #define PARSE_TYPE_BAND 39
72 #define PARSE_TYPE_BOR 40
73 #define PARSE_TYPE_DONE 41 // finished statement
76 * Adds a parse type to the parse tree, this is where all the hard
77 * work actually begins.
79 #define PARSE_TREE_ADD(X) \
81 parsetree->next = mem_a(sizeof(struct parsenode)); \
82 parsetree->next->next = NULL; \
83 parsetree->next->type = (X); \
84 parsetree = parsetree->next; \
87 static const char *const parse_punct[] = {
88 "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
89 "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
90 "." , "<" , ">" , "&" , "|" , NULL
98 void parse_debug(struct parsenode *tree) {
99 while (tree && tree->next != NULL) {
101 if (tree->type == 0) {
106 switch (tree->type) {
107 case PARSE_TYPE_ADD: STORE("ADD \n");
108 case PARSE_TYPE_BAND: STORE("BITAND \n");
109 case PARSE_TYPE_BOR: STORE("BITOR \n");
110 case PARSE_TYPE_BREAK: STORE("BREAK \n");
111 case PARSE_TYPE_COMMA: STORE("SEPERATOR\n");
112 case PARSE_TYPE_CONTINUE: STORE("CONTINUE\n");
113 case PARSE_TYPE_DIVIDE: STORE("DIVIDE\n");
114 case PARSE_TYPE_EQUAL: STORE("ASSIGNMENT\n");
115 case PARSE_TYPE_GOTO: STORE("GOTO\n");
116 case PARSE_TYPE_DOT: STORE("DOT\n");
119 case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n");
120 case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n");
121 case PARSE_TYPE_INT: STORE("DECLTYPE: INT\n");
122 case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n");
123 case PARSE_TYPE_BOOL: STORE("DECLTYPE: BOOL\n");
125 case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n");
126 case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n");
127 case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n");
128 case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n");
129 case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n");
130 case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n");
132 case PARSE_TYPE_LBS: break;
133 case PARSE_TYPE_RBS: break;
135 case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n");
136 case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n");
137 case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n");
138 case PARSE_TYPE_LPARTH: STORE("PARTH: END\n");
139 case PARSE_TYPE_RPARTH: STORE("PARTH: BEG\n");
141 case PARSE_TYPE_FOR: STORE("LOOP: FOR\n");
142 case PARSE_TYPE_DO: STORE("LOOP: DO\n");
143 case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n");
144 case PARSE_TYPE_IF: STORE("BLOCK: IF\n");
151 * This just skips the token and throws it in the parse tree for later
152 * checking / optimization / codegen, it doesn't do anything with it
153 * like syntax check for legal use -- like it should as it's a TODO item
154 * which is not implemented
156 #define PARSE_TODO(X) { \
157 token = lex_token(file); \
162 int parse(struct lex_file *file) {
163 struct parsenode *parsetree = NULL;
164 struct parsenode *parseroot = NULL;
167 * Allocate memory for our parse tree:
168 * the parse tree is just a singly linked list which will contain
169 * all the data for code generation.
172 parseroot = mem_a(sizeof(struct parsenode));
174 return error(ERROR_INTERNAL, "Ran out of memory", " ");
175 parsetree = parseroot;
176 parsetree = parseroot;
180 while ((token = lex_token(file)) != ERROR_LEX && \
181 token != ERROR_COMPILER && \
182 token != ERROR_INTERNAL && \
183 token != ERROR_PARSE && \
184 token != ERROR_PREPRO && file->length >= 0) {
187 token = lex_token(file);
188 while ((token == ' ' || token == '\n') && file->length >= 0)
189 token = lex_token(file);
192 error(ERROR_PARSE, "Expected `(` after if\n", "");
194 PARSE_TREE_ADD(PARSE_TYPE_IF);
197 token = lex_token(file);
198 while ((token == ' ' || token == '\n') && file->length >= 0)
199 token = lex_token(file);
201 PARSE_TREE_ADD(PARSE_TYPE_ELSE);
204 token = lex_token(file);
205 while ((token == ' ' || token == '\n') && file->length >= 0)
206 token = lex_token(file);
208 PARSE_TREE_ADD(PARSE_TYPE_FOR);
212 token = lex_token(file);
213 printf("FOO: %s\n", file->lastok);
217 case TOKEN_DO: PARSE_TODO(PARSE_TYPE_DO);
218 case TOKEN_WHILE: PARSE_TODO(PARSE_TYPE_WHILE);
219 case TOKEN_BREAK: PARSE_TODO(PARSE_TYPE_BREAK);
220 case TOKEN_CONTINUE: PARSE_TODO(PARSE_TYPE_CONTINUE);
221 case TOKEN_RETURN: PARSE_TODO(PARSE_TYPE_RETURN);
222 case TOKEN_GOTO: PARSE_TODO(PARSE_TYPE_GOTO);
223 case TOKEN_INT: PARSE_TODO(PARSE_TYPE_INT);
224 case TOKEN_VOID: PARSE_TODO(PARSE_TYPE_VOID);
225 case TOKEN_STRING: PARSE_TODO(PARSE_TYPE_STRING);
226 case TOKEN_FLOAT: PARSE_TODO(PARSE_TYPE_FLOAT);
227 case TOKEN_VECTOR: PARSE_TODO(PARSE_TYPE_VECTOR);
228 case TOKEN_ENTITY: PARSE_TODO(PARSE_TYPE_ENTITY);
230 /* TODO: Preprocessor */
232 token = lex_token(file);
233 token = lex_token(file);
234 token = lex_token(file);
235 token = lex_token(file);
236 token = lex_token(file);
237 token = lex_token(file);
241 * From here down is all language punctuation: There is no
242 * need to actual create tokens from these because they're already
243 * tokenized as these individual tokens (which are in a special area
244 * of the ascii table which doesn't conflict with our other tokens
245 * which are higer than the ascii table.
248 token = lex_token(file);
249 if (token == '&') { /* && */
250 token = lex_token(file);
251 PARSE_TREE_ADD(PARSE_TYPE_LAND);
254 PARSE_TREE_ADD(PARSE_TYPE_BAND);
257 token = lex_token(file);
258 if (token == '|') { /* || */
259 token = lex_token(file);
260 PARSE_TREE_ADD(PARSE_TYPE_LOR);
263 PARSE_TREE_ADD(PARSE_TYPE_BOR);
266 token = lex_token(file);
267 if (token == '=') { /* != */
268 token = lex_token(file);
269 PARSE_TREE_ADD(PARSE_TYPE_LNEQ);
272 PARSE_TREE_ADD(PARSE_TYPE_LNOT);
275 token = lex_token(file);
276 if (token == '=') { /* <= */
277 token = lex_token(file);
278 PARSE_TREE_ADD(PARSE_TYPE_LTEQ);
281 PARSE_TREE_ADD(PARSE_TYPE_LT);
284 token = lex_token(file);
285 if (token == '=') { /* >= */
286 token = lex_token(file);
287 PARSE_TREE_ADD(PARSE_TYPE_GTEQ);
290 PARSE_TREE_ADD(PARSE_TYPE_GT);
293 token = lex_token(file);
294 if (token == '=') { /* == */
295 token = lex_token(file);
296 PARSE_TREE_ADD(PARSE_TYPE_EQEQ);
299 PARSE_TREE_ADD(PARSE_TYPE_EQUAL);
302 token = lex_token(file);
303 PARSE_TREE_ADD(PARSE_TYPE_DONE);
306 token = lex_token(file);
307 PARSE_TREE_ADD(PARSE_TYPE_MINUS);
310 token = lex_token(file);
311 PARSE_TREE_ADD(PARSE_TYPE_ADD);
314 token = lex_token(file);
315 PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
318 token = lex_token(file);
319 PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
322 token = lex_token(file);
323 PARSE_TREE_ADD(PARSE_TYPE_LBS);
326 token = lex_token(file);
327 PARSE_TREE_ADD(PARSE_TYPE_RBS);
331 parse_debug(parseroot);