#include <limits.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
#include "gmqcc.h"
/*
#define PARSE_TYPE_CONTINUE 5
#define PARSE_TYPE_RETURN 6
#define PARSE_TYPE_GOTO 7
-#define PARSE_TYPE_FOR 8 // extension
+#define PARSE_TYPE_FOR 8
#define PARSE_TYPE_VOID 9
#define PARSE_TYPE_STRING 10
#define PARSE_TYPE_FLOAT 11
#define PARSE_TYPE_MINUS 26
#define PARSE_TYPE_ADD 27
#define PARSE_TYPE_EQUAL 28
-#define PARSE_TYPE_LBS 29 // left bracket scope
-#define PARSE_TYPE_RBS 30 // right bracket scope
-#define PARSE_TYPE_ELIP 31 // ...
+#define PARSE_TYPE_LBS 29
+#define PARSE_TYPE_RBS 30
+#define PARSE_TYPE_ELIP 31
#define PARSE_TYPE_DOT 32
#define PARSE_TYPE_LT 33
#define PARSE_TYPE_GT 34
#define PARSE_TYPE_BAND 35
#define PARSE_TYPE_BOR 36
-#define PARSE_TYPE_DONE 37 // finished statement
+#define PARSE_TYPE_DONE 37
+#define PARSE_TYPE_IDENT 38
/*
* Adds a parse type to the parse tree, this is where all the hard
parsetree = parsetree->next; \
} while (0)
-static const char *const parse_punct[] = {
+/*
+ * This is all the punctuation handled in the parser, these don't
+ * need tokens, they're already tokens.
+ */
+#if 0
"&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*",
"/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...",
- "." , "<" , ">" , "&" , "|" , NULL
-};
+ "." , "<" , ">" , "&" , "|" ,
+#endif
-#define STORE(X) { \
- printf(X); \
- break; \
+#define STORE(X) { \
+ printf(X); \
+ break; \
}
void parse_debug(struct parsenode *tree) {
- while (tree && tree->next != NULL) {
- /* skip blanks */
- if (tree->type == 0) {
- tree = tree->next;
- continue;
- }
-
+ while (tree) {
switch (tree->type) {
case PARSE_TYPE_ADD: STORE("OPERATOR: ADD \n");
case PARSE_TYPE_BAND: STORE("OPERATOR: BITAND \n");
case PARSE_TYPE_RETURN: STORE("STATEMENT: RETURN\n");
case PARSE_TYPE_DONE: STORE("STATEMENT: DONE\n");
-
+ case PARSE_TYPE_VOID: STORE("DECLTYPE: VOID\n");
+ case PARSE_TYPE_STRING: STORE("DECLTYPE: STRING\n");
case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n");
case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n");
case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n");
+ case PARSE_TYPE_VECTOR: STORE("DECLTYPE: VECTOR\n");
case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n");
case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n");
case PARSE_TYPE_FOR: STORE("LOOP: FOR\n");
case PARSE_TYPE_DO: STORE("LOOP: DO\n");
-
+ //case PARSE_TYPE_IDENT: STORE("IDENT: ???\n");
}
tree = tree->next;
}
}
/*
- * This just skips the token and throws it in the parse tree for later
- * checking / optimization / codegen, it doesn't do anything with it
- * like syntax check for legal use -- like it should as it's a TODO item
- * which is not implemented
+ * Performs a parse operation: This is a macro to prevent bugs, if the
+ * calls to lex_token are'nt exactly enough to feed to the end of the
+ * actual lexees for the current thing that is being parsed, the state
+ * of the next iteration in the creation of the parse tree will be wrong
+ * and everything will fail.
*/
-#define PARSE_TODO(X) { \
- token = lex_token(file); \
- PARSE_TREE_ADD(X); \
- break; \
+#define PARSE_PERFORM(X,C) { \
+ token = lex_token(file); \
+ { C } \
+ while (token != '\n') { \
+ token = lex_token(file); \
+ } \
+ PARSE_TREE_ADD(X); \
+ break; \
+}
+
+void parse_clear(struct parsenode *tree) {
+ if (!tree) return;
+ struct parsenode *temp = NULL;
+ while (tree != NULL) {
+ temp = tree;
+ tree = tree->next;
+ mem_d (temp);
+ }
+
+ /* free any potential typedefs */
+ typedef_clear();
}
-int parse(struct lex_file *file) {
+/*
+ * Generates a parse tree out of the lexees generated by the lexer. This
+ * is where the tree is built. This is where valid check is performed.
+ */
+int parse_tree(struct lex_file *file) {
struct parsenode *parsetree = NULL;
struct parsenode *parseroot = NULL;
parseroot = mem_a(sizeof(struct parsenode));
if (!parseroot)
return error(ERROR_INTERNAL, "Ran out of memory", " ");
- parsetree = parseroot;
- parsetree = parseroot;
+ parsetree = parseroot;
+ parsetree->type = -1; /* not a valid type -- root element */
}
int token = 0;
switch (token) {
case TOKEN_IF:
token = lex_token(file);
- while ((token == ' ' || token == '\n') && file->length >= 0)
- token = lex_token(file);
-
if (token != '(')
- error(ERROR_PARSE, "Expected `(` after if\n", "");
-
+ error(ERROR_PARSE, "Expected `(` on if statement:\n");
PARSE_TREE_ADD(PARSE_TYPE_IF);
+ PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
break;
case TOKEN_ELSE:
token = lex_token(file);
- while ((token == ' ' || token == '\n') && file->length >= 0)
- token = lex_token(file);
-
PARSE_TREE_ADD(PARSE_TYPE_ELSE);
break;
case TOKEN_FOR:
- token = lex_token(file);
+ //token = lex_token(file);
while ((token == ' ' || token == '\n') && file->length >= 0)
token = lex_token(file);
-
PARSE_TREE_ADD(PARSE_TYPE_FOR);
break;
-
- case LEX_IDENT:
- token = lex_token(file);
- break;
/*
* This is a quick and easy way to do typedefs at parse time
* the tokens accordingly here.
*/
case TOKEN_TYPEDEF: {
- char *f = NULL;
- char *t = NULL;
+ char *f,*t;
+
token = lex_token(file);
- token = lex_token(file); f = strdup(file->lastok);
+ token = lex_token(file); f = util_strdup(file->lastok);
token = lex_token(file);
- token = lex_token(file); t = strdup(file->lastok);
+ token = lex_token(file); t = util_strdup(file->lastok);
typedef_add(f, t);
- /* free stdup strings */
mem_d(f);
mem_d(t);
+
+ while (token != '\n')
+ token = lex_token(file);
break;
}
-
-
- case TOKEN_DO: PARSE_TODO(PARSE_TYPE_DO);
- case TOKEN_WHILE: PARSE_TODO(PARSE_TYPE_WHILE);
- case TOKEN_BREAK: PARSE_TODO(PARSE_TYPE_BREAK);
- case TOKEN_CONTINUE: PARSE_TODO(PARSE_TYPE_CONTINUE);
- case TOKEN_RETURN: PARSE_TODO(PARSE_TYPE_RETURN);
- case TOKEN_GOTO: PARSE_TODO(PARSE_TYPE_GOTO);
- case TOKEN_VOID: PARSE_TODO(PARSE_TYPE_VOID);
- case TOKEN_STRING: PARSE_TODO(PARSE_TYPE_STRING);
- case TOKEN_FLOAT: PARSE_TODO(PARSE_TYPE_FLOAT);
- case TOKEN_VECTOR: PARSE_TODO(PARSE_TYPE_VECTOR);
- case TOKEN_ENTITY: PARSE_TODO(PARSE_TYPE_ENTITY);
+
+ /*
+ * Returns are addable as-is, statement checking is during
+ * the actual parse tree check.
+ */
+ case TOKEN_RETURN:
+ token = lex_token(file);
+ PARSE_TREE_ADD(PARSE_TYPE_RETURN);
+ break;
+ case TOKEN_CONTINUE:
+ PARSE_TREE_ADD(PARSE_TYPE_CONTINUE);
+ break;
+
+ case TOKEN_DO: PARSE_PERFORM(PARSE_TYPE_DO, {});
+ case TOKEN_WHILE: PARSE_PERFORM(PARSE_TYPE_WHILE, {});
+ case TOKEN_BREAK: PARSE_PERFORM(PARSE_TYPE_BREAK, {});
+ case TOKEN_GOTO: PARSE_PERFORM(PARSE_TYPE_GOTO, {});
+ case TOKEN_VOID: PARSE_PERFORM(PARSE_TYPE_VOID, {});
+ case TOKEN_STRING: PARSE_PERFORM(PARSE_TYPE_STRING, {});
+ case TOKEN_FLOAT: PARSE_PERFORM(PARSE_TYPE_FLOAT, {});
+ case TOKEN_VECTOR: PARSE_PERFORM(PARSE_TYPE_VECTOR, {});
+ case TOKEN_ENTITY: PARSE_PERFORM(PARSE_TYPE_ENTITY, {});
/*
* From here down is all language punctuation: There is no
* of the ascii table which doesn't conflict with our other tokens
* which are higer than the ascii table.)
*/
- case '&': /* & */
+ case '#':
+ token = lex_token(file); /* skip '#' */
+ while (isspace(token)) {
+ if (token == '\n')
+ return error(ERROR_PARSE, "Expected valid preprocessor directive after `#` %s\n");
+ token = lex_token(file); /* try again */
+ }
+ /*
+ * If we make it here we found a directive, the supported
+ * directives so far are #include.
+ */
+ if (strncmp(file->lastok, "include", sizeof("include")) == 0) {
+ /*
+ * We only suport include " ", not <> like in C (why?)
+ * because the latter is silly.
+ */
+ while (*file->lastok != '"' && token != '\n')
+ token = lex_token(file);
+
+ /* we handle lexing at that point now */
+ if (token == '\n')
+ return error(ERROR_PARSE, "%d: Invalid use of include preprocessor directive: wanted #include \"file.h\"\n", file->line);
+ }
+
+ /* skip all tokens to end of directive */
+ while (token != '\n')
+ token = lex_token(file);
+ break;
+
+ case '.':
+ PARSE_TREE_ADD(PARSE_TYPE_DOT);
+ break;
+ case '(':
+ PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
+ break;
+ case ')':
+ PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
+ break;
+
+ case '&': /* & */
token = lex_token(file);
if (token == '&') { /* && */
token = lex_token(file);
}
PARSE_TREE_ADD(PARSE_TYPE_BAND);
break;
- case '|': /* | */
+ case '|': /* | */
token = lex_token(file);
if (token == '|') { /* || */
token = lex_token(file);
}
PARSE_TREE_ADD(PARSE_TYPE_BOR);
break;
- case '!':
+ case '!': /* ! */
token = lex_token(file);
if (token == '=') { /* != */
token = lex_token(file);
}
PARSE_TREE_ADD(PARSE_TYPE_LNOT);
break;
- case '<': /* < */
+ case '<': /* < */
token = lex_token(file);
if (token == '=') { /* <= */
token = lex_token(file);
}
PARSE_TREE_ADD(PARSE_TYPE_LT);
break;
- case '>': /* > */
+ case '>': /* > */
token = lex_token(file);
if (token == '=') { /* >= */
token = lex_token(file);
}
PARSE_TREE_ADD(PARSE_TYPE_GT);
break;
- case '=':
+ case '=': /* = */
token = lex_token(file);
if (token == '=') { /* == */
token = lex_token(file);
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_ADD);
break;
- case '(':
- token = lex_token(file);
- PARSE_TREE_ADD(PARSE_TYPE_LPARTH);
- break;
- case ')':
- token = lex_token(file);
- PARSE_TREE_ADD(PARSE_TYPE_RPARTH);
- break;
case '{':
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_LBS);
token = lex_token(file);
PARSE_TREE_ADD(PARSE_TYPE_RBS);
break;
+
+ /*
+ * TODO: Fix lexer to spit out ( ) as tokens, it seems the
+ * using '(' or ')' in parser doesn't work properly unless
+ * there are spaces before them to allow the lexer to properly
+ * seperate identifiers. -- otherwise it eats all of it.
+ */
+ case LEX_IDENT:
+ token = lex_token(file);
+ PARSE_TREE_ADD(PARSE_TYPE_IDENT);
+ break;
}
}
parse_debug(parseroot);
lex_reset(file);
-
+ parse_clear(parseroot);
return 1;
}