/*
- * Copyright (C) 2012
- * Dale Weiler
+ * Copyright (C) 2012
+ * Dale Weiler
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include <stdio.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
#include "gmqcc.h"
+/*
+ * Keywords are multichar, punctuation lexing is a bit more complicated
+ * than keyword lexing.
+ */
static const char *const lex_keywords[] = {
- "do", "else", "if", "while",
- "break", "continue", "return", "goto",
- "for",
-
- /* types */
- "int",
- "bool",
- "void",
- "string",
- "float",
- "vector",
- "entity",
+ "do", "else", "if", "while",
+ "break", "continue", "return", "goto",
+ "for", "typedef"
};
-struct lex_file *lex_open(FILE *fp) {
- struct lex_file *lex = mem_a(sizeof(struct lex_file));
- if (lex) {
- lex->file = fp;
- fseek(lex->file, 0, SEEK_END);
- lex->length = ftell(lex->file);
- lex->size = lex->length; /* copy, this is never changed */
- fseek(lex->file, 0, SEEK_SET);
- lex->last = 0;
-
- memset(lex->peek, 0, sizeof(lex->peek));
- }
- return lex;
+void lex_init(const char *file, lex_file **set) {
+ lex_file *lex = mem_a(sizeof(lex_file));
+ if (!lex)
+ return;
+
+ lex->file = fopen(file, "r");
+ if (!lex->file) {
+ mem_d(lex);
+ return;
+ }
+
+ fseek(lex->file, 0, SEEK_END);
+ lex->length = ftell(lex->file);
+ lex->size = lex->length; /* copy, this is never changed */
+ fseek(lex->file, 0, SEEK_SET);
+ lex->last = 0;
+ lex->line = 1;
+
+ memset(lex->peek, 0, sizeof(lex->peek));
+ *set = lex;
}
-int lex_close(struct lex_file *file) {
- int ret = -1;
- if (file) {
- ret = fclose(file->file);
- mem_d(file);
- }
- return ret;
+void lex_close(lex_file *file) {
+ if (!file) return;
+
+ fclose(file->file); /* may already be closed */
+ mem_d (file);
}
-static void lex_addch(int ch, struct lex_file *file) {
- if (file->current < sizeof(file->lastok)-1)
- file->lastok[file->current++] = (char)ch;
- if (file->current == sizeof(file->lastok)-1)
- file->lastok[file->current] = (char)'\0';
+static void lex_addch(int ch, lex_file *file) {
+ if (file->current < sizeof(file->lastok)-1)
+ file->lastok[file->current++] = (char)ch;
+ if (file->current == sizeof(file->lastok)-1)
+ file->lastok[file->current] = (char)'\0';
}
-static inline void lex_clear(struct lex_file *file) {
- file->current = 0;
+static GMQCC_INLINE void lex_clear(lex_file *file) {
+ file->current = 0;
}
/*
* This doesn't play with file streams, the lexer has
* it's own internal state for this.
*/
-static int lex_inget(struct lex_file *file) {
- file->length --;
- if (file->last > 0)
- return file->peek[--file->last];
- return fgetc(file->file);
+static int lex_inget(lex_file *file) {
+ char get;
+ file->length --;
+
+ if (file->last > 0) {
+ if ((get = file->peek[--file->last]) == '\n')
+ file->line ++;
+ return get;
+ }
+ if ((get = fgetc(file->file)) == '\n')
+ file->line++;
+
+ return get;
}
-static void lex_unget(int ch, struct lex_file *file) {
- if (file->last < sizeof(file->peek))
- file->peek[file->last++] = ch;
- file->length ++;
+static void lex_unget(int ch, lex_file *file) {
+ if (file->last < sizeof(file->peek)) {
+ if (ch == '\n')
+ file->line --;
+ file->peek[file->last++] = ch;
+ }
+ file->length ++;
}
/*
* This is trigraph and digraph support, a feature not qc compiler
* supports. Moving up in this world!
*/
-static int lex_trigraph(struct lex_file *file) {
- int ch;
- if ((ch = lex_inget(file)) != '?') {
- lex_unget(ch, file);
- return '?';
- }
-
- ch = lex_inget(file);
- switch (ch) {
- case '(' : return '[' ;
- case ')' : return ']' ;
- case '/' : return '\\';
- case '\'': return '^' ;
- case '<' : return '{' ;
- case '>' : return '}' ;
- case '!' : return '|' ;
- case '-' : return '~' ;
- case '=' : return '#' ;
- default:
- lex_unget('?', file);
- lex_unget(ch , file);
- return '?';
- }
- return '?';
+static int lex_trigraph(lex_file *file) {
+ int ch;
+ if ((ch = lex_inget(file)) != '?') {
+ lex_unget(ch, file);
+ return '?';
+ }
+
+ ch = lex_inget(file);
+ switch (ch) {
+ case '(' : return '[' ;
+ case ')' : return ']' ;
+ case '/' : return '\\';
+ case '\'': return '^' ;
+ case '<' : return '{' ;
+ case '>' : return '}' ;
+ case '!' : return '|' ;
+ case '-' : return '~' ;
+ case '=' : return '#' ;
+ default:
+ lex_unget('?', file);
+ lex_unget(ch , file);
+ }
+ return '?';
}
-static int lex_digraph(struct lex_file *file, int first) {
- int ch = lex_inget(file);
- switch (first) {
- case '<':
- if (ch == '%') return '{';
- if (ch == ':') return '[';
- break;
- case '%':
- if (ch == '>') return '}';
- if (ch == ':') return '#';
- break;
- case ':':
- if (ch == '>') return ']';
- break;
- }
-
- lex_unget(ch, file);
- return first;
+static int lex_digraph(lex_file *file, int first) {
+ int ch = lex_inget(file);
+ switch (first) {
+ case '<':
+ if (ch == '%') return '{';
+ if (ch == ':') return '[';
+ break;
+ case '%':
+ if (ch == '>') return '}';
+ if (ch == ':') return '#';
+ break;
+ case ':':
+ if (ch == '>') return ']';
+ break;
+ }
+
+ lex_unget(ch, file);
+ return first;
}
-static int lex_getch(struct lex_file *file) {
- int ch = lex_inget(file);
- if (ch == '?')
- return lex_trigraph(file);
- if (ch == '<' || ch == ':' || ch == '%')
- return lex_digraph (file, ch);
-
- return ch;
+static int lex_getch(lex_file *file) {
+ int ch = lex_inget(file);
+ if (ch == '?')
+ return lex_trigraph(file);
+ if (ch == '<' || ch == ':' || ch == '%')
+ return lex_digraph(file, ch);
+ return ch;
}
-static int lex_get(struct lex_file *file) {
- int ch;
- if (!isspace(ch = lex_getch(file)))
- return ch;
-
- /* skip over all spaces */
- while (isspace(ch) && ch != '\n')
- ch = lex_getch(file);
-
- if (ch == '\n')
- return ch;
-
- lex_unget(ch, file);
- return ' ';
+static int lex_get(lex_file *file) {
+ int ch;
+ if (!isspace(ch = lex_getch(file)))
+ return ch;
+
+ /* skip over all spaces */
+ while (isspace(ch) && ch != '\n')
+ ch = lex_getch(file);
+
+ if (ch == '\n')
+ return ch;
+ lex_unget(ch, file);
+ return ' ';
}
-static int lex_skipchr(struct lex_file *file) {
- int ch;
- int it;
-
- lex_clear(file);
- lex_addch('\'', file);
-
- for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) {
- lex_addch(ch, file);
-
- if (ch == '\n')
- return ERROR_LEX;
- if (ch == '\\')
- lex_addch(lex_getch(file), file);
- }
- lex_addch('\'', file);
- lex_addch('\0', file);
-
- if (it > 2)
- return ERROR_LEX;
-
- return LEX_CHRLIT;
+static int lex_skipchr(lex_file *file) {
+ int ch;
+ int it;
+
+ lex_clear(file);
+ lex_addch('\'', file);
+
+ for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) {
+ lex_addch(ch, file);
+
+ if (ch == '\n')
+ return ERROR_LEX;
+ if (ch == '\\')
+ lex_addch(lex_getch(file), file);
+ }
+ lex_addch('\'', file);
+ lex_addch('\0', file);
+
+ if (it > 2)
+ return ERROR_LEX;
+
+ return LEX_CHRLIT;
}
-static int lex_skipstr(struct lex_file *file) {
- int ch;
- lex_clear(file);
- lex_addch('"', file);
-
- while ((ch = lex_getch(file)) != '"') {
- if (ch == '\n' || ch == EOF)
- return ERROR_LEX;
-
- lex_addch(ch, file);
- if (ch == '\\')
- lex_addch(lex_inget(file), file);
- }
-
- lex_addch('"', file);
- lex_addch('\0', file);
-
- return LEX_STRLIT;
+static int lex_skipstr(lex_file *file) {
+ int ch;
+ lex_clear(file);
+ lex_addch('"', file);
+
+ while ((ch = lex_getch(file)) != '"') {
+ if (ch == '\n' || ch == EOF)
+ return ERROR_LEX;
+
+ lex_addch(ch, file);
+ if (ch == '\\')
+ lex_addch(lex_inget(file), file);
+ }
+
+ lex_addch('"', file);
+ lex_addch('\0', file);
+
+ return LEX_STRLIT;
}
-static int lex_skipcmt(struct lex_file *file) {
- int ch;
- lex_clear(file);
- ch = lex_getch(file);
-
- if (ch == '/') {
- lex_addch('/', file);
- lex_addch('/', file);
-
- while ((ch = lex_getch(file)) != '\n') {
- if (ch == '\\') {
- lex_addch(ch, file);
- lex_addch(lex_getch(file), file);
- } else {
- lex_addch(ch, file);
- }
- }
- lex_addch('\0', file);
- return LEX_COMMENT;
- }
-
- if (ch != '*') {
- lex_unget(ch, file);
- return '/';
- }
-
- lex_addch('/', file);
-
- /* hate this */
- do {
- lex_addch(ch, file);
- while ((ch = lex_getch(file)) != '*') {
- if (ch == EOF)
- return error(ERROR_LEX, "malformatted comment", " ");
- else
- lex_addch(ch, file);
- }
- lex_addch(ch, file);
- } while ((ch = lex_getch(file)) != '/');
-
- lex_addch('/', file);
- lex_addch('\0', file);
-
- return LEX_COMMENT;
+static int lex_skipcmt(lex_file *file) {
+ int ch;
+ lex_clear(file);
+ ch = lex_getch(file);
+
+ if (ch == '/') {
+ lex_addch('/', file);
+ lex_addch('/', file);
+
+ while ((ch = lex_getch(file)) != '\n') {
+ if (ch == '\\') {
+ lex_addch(ch, file);
+ lex_addch(lex_getch(file), file);
+ } else {
+ lex_addch(ch, file);
+ }
+ }
+ lex_addch('\0', file);
+ return LEX_COMMENT;
+ }
+
+ if (ch != '*') {
+ lex_unget(ch, file);
+ return '/';
+ }
+
+ lex_addch('/', file);
+
+ /* hate this */
+ do {
+ lex_addch(ch, file);
+ while ((ch = lex_getch(file)) != '*') {
+ if (ch == EOF)
+ return error(file, ERROR_LEX, "malformatted comment");
+ else
+ lex_addch(ch, file);
+ }
+ lex_addch(ch, file);
+ } while ((ch = lex_getch(file)) != '/');
+
+ lex_addch('/', file);
+ lex_addch('\0', file);
+
+ return LEX_COMMENT;
}
-static int lex_getsource(struct lex_file *file) {
- int ch = lex_get(file);
-
- /* skip char/string/comment */
- switch (ch) {
- case '\'': return lex_skipchr(file);
- case '"': return lex_skipstr(file);
- case '/': return lex_skipcmt(file);
- default: return ch;
- }
+static int lex_getsource(lex_file *file) {
+ int ch = lex_get(file);
+
+ /* skip char/string/comment */
+ switch (ch) {
+ case '\'': return lex_skipchr(file);
+ case '"': return lex_skipstr(file);
+ case '/': return lex_skipcmt(file);
+ default:
+ return ch;
+ }
}
-int lex_token(struct lex_file *file) {
- int ch = lex_getsource(file);
- int it;
-
- /* valid identifier */
- if (ch > 0 && (ch == '_' || isalpha(ch))) {
- lex_clear(file);
- while (ch > 0 && (isalpha(ch) || ch == '_')) {
- lex_addch(ch, file);
- ch = lex_getsource(file);
- }
- lex_unget(ch, file);
- lex_addch('\0', file);
-
- /* look inside the table for a keyword .. */
- for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++)
- if (!strncmp(file->lastok, lex_keywords[it], sizeof(lex_keywords[it])))
- return it;
-
- return LEX_IDENT;
- }
- return ch;
+int lex_token(lex_file *file) {
+ int ch = lex_getsource(file);
+ int it;
+
+ /* valid identifier */
+ if (ch > 0 && (ch == '_' || isalpha(ch))) {
+ lex_clear(file);
+
+ while (ch > 0 && (ch == '_' || isalpha(ch))) {
+ lex_addch(ch, file);
+ ch = lex_getsource(file);
+ }
+ lex_unget(ch, file);
+ lex_addch('\0', file);
+
+ /* look inside the table for a keyword .. */
+ for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++)
+ if (!strncmp(file->lastok, lex_keywords[it], strlen(lex_keywords[it])))
+ return it;
+
+ /* try a type? */
+ #define TEST_TYPE(X) \
+ do { \
+ if (!strncmp(X, "float", sizeof("float"))) \
+ return TOKEN_FLOAT; \
+ if (!strncmp(X, "vector", sizeof("vector"))) \
+ return TOKEN_VECTOR; \
+ if (!strncmp(X, "string", sizeof("string"))) \
+ return TOKEN_STRING; \
+ if (!strncmp(X, "entity", sizeof("entity"))) \
+ return TOKEN_ENTITY; \
+ if (!strncmp(X, "void" , sizeof("void"))) \
+ return TOKEN_VOID; \
+ } while(0)
+
+ TEST_TYPE(file->lastok);
+
+ /* try the hashtable for typedefs? */
+ if (typedef_find(file->lastok))
+ TEST_TYPE(typedef_find(file->lastok)->name);
+
+ #undef TEST_TYPE
+ return LEX_IDENT;
+ }
+ return (ch != ' ') ? ch : lex_token(file);
+}
+
+void lex_reset(lex_file *file) {
+ file->current = 0;
+ file->last = 0;
+ file->length = file->size;
+ fseek(file->file, 0, SEEK_SET);
+
+ memset(file->peek, 0, sizeof(file->peek ));
+ memset(file->lastok, 0, sizeof(file->lastok));
}
-void lex_reset(struct lex_file *file) {
- file->current = 0;
- file->last = 0;
- file->length = file->size;
- fseek(file->file, 0, SEEK_SET);
-
- memset(file->peek, 0, sizeof(file->peek ));
- memset(file->lastok, 0, sizeof(file->lastok));
+void lex_parse(lex_file *file) {
+ if (!file) return;
+ parse_gen(file); /* run parser */
}
-int lex_debug(struct lex_file *file) {
- int list_do = 0;
- int list_else = 0;
- int list_if = 0;
- int list_while = 0;
- int list_break = 0;
- int list_continue = 0;
- int list_return = 0;
- int list_goto = 0;
- int list_for = 0;
- int token = 0;
- printf("===========================\nTOKENS: \n===========================\n");
- while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) {
- if (token != -1) {
- switch (token) {
- case 0: list_do ++; break;
- case 1: list_else ++; break;
- case 2: list_if ++; break;
- case 3: list_while ++; break;
- case 4: list_break ++; break;
- case 5: list_continue++; break;
- case 6: list_return ++; break;
- case 7: list_goto ++; break;
- case 8: list_for ++; break;
- }
- }
- if (token >= 33 && token <= 126)
- putchar(token);
- }
- printf("\n===========================\nBRANCHES \n===========================\n");
- printf("\t if % 8d\n", list_if);
- printf("\t else % 8d\n", list_else);
- printf("===========================\nLOOPS \n===========================\n");
- printf("\t for % 8d\n", list_for);
- printf("\t while % 8d\n", list_while);
- printf("\t do % 8d\n", list_do);
- printf("===========================\nSTATEMENTS \n===========================\n");
- printf("\t break % 8d\n", list_break);
- printf("\t continue % 8d\n", list_continue);
- printf("\t return % 8d\n", list_return);
- printf("\t goto % 8d\n", list_goto);
- printf("===========================\nIDENTIFIERS\n===========================\n");
- lex_reset(file);
- while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0)
- if (token == LEX_IDENT)
- printf("%s ", file->lastok);
- lex_reset(file);
- return 1;
+/*
+ * Include a file into the lexer / parsing process: This really
+ * should check if names are the same to prevent endless include
+ * recrusion.
+ */
+lex_file *lex_include(lex_file *lex, const char *file) {
+ lex_file *set = NULL;
+
+ util_strrq(file);
+ if (strncmp(lex->name, file, strlen(lex->name)) == 0) {
+ error(lex, ERROR_LEX, "Source file cannot include itself\n");
+ exit (-1);
+ }
+ lex_init(file, &set);
+
+ return set;
}