X-Git-Url: https://git.xonotic.org/?p=xonotic%2Fgmqcc.git;a=blobdiff_plain;f=lex.c;h=fa09f27042391d7d7fd8d013331a8e3192f20a34;hp=9296158fd196cd5390e47fb60d7e1d096a936e7f;hb=206952b920856ef53b953d3d477f00869b7191fd;hpb=a440c1410bad2a06b6a9385094a3dae0142f9395 diff --git a/lex.c b/lex.c index 9296158..fa09f27 100644 --- a/lex.c +++ b/lex.c @@ -1,6 +1,6 @@ /* - * Copyright (C) 2012 - * Dale Weiler + * Copyright (C) 2012 + * Dale Weiler * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in @@ -20,11 +20,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include -#include -#include -#include #include "gmqcc.h" /* @@ -32,51 +27,48 @@ * than keyword lexing. */ static const char *const lex_keywords[] = { - "do", "else", "if", "while", - "break", "continue", "return", "goto", - "for", "typedef", - - /* types */ - "int", - "void", - "string", - "float", - "vector", - "entity", + "do", "else", "if", "while", + "break", "continue", "return", "goto", + "for", "typedef" }; -struct lex_file *lex_open(FILE *fp) { - struct lex_file *lex = mem_a(sizeof(struct lex_file)); - if (lex) { - lex->file = fp; - fseek(lex->file, 0, SEEK_END); - lex->length = ftell(lex->file); - lex->size = lex->length; /* copy, this is never changed */ - fseek(lex->file, 0, SEEK_SET); - lex->last = 0; - - memset(lex->peek, 0, sizeof(lex->peek)); - } - return lex; +void lex_init(const char *file, lex_file **set) { + lex_file *lex = mem_a(sizeof(lex_file)); + if (!lex) + return; + + lex->file = fopen(file, "r"); + if (!lex->file) { + mem_d(lex); + return; + } + + fseek(lex->file, 0, SEEK_END); + lex->length = ftell(lex->file); + lex->size = lex->length; /* copy, this is never changed */ + fseek(lex->file, 0, SEEK_SET); + lex->last = 0; + lex->line = 1; + + memset(lex->peek, 0, sizeof(lex->peek)); + *set = lex; } -int lex_close(struct lex_file *file) { - int ret = -1; - if (file) { - ret = fclose(file->file); - mem_d(file); - } - return ret; +void lex_close(lex_file *file) { + if (!file) return; + + fclose(file->file); /* may already be closed */ + mem_d (file); } -static void lex_addch(int ch, struct lex_file *file) { - if (file->current < sizeof(file->lastok)-1) - file->lastok[file->current++] = (char)ch; - if (file->current == sizeof(file->lastok)-1) - file->lastok[file->current] = (char)'\0'; +static void lex_addch(int ch, lex_file *file) { + if (file->current < sizeof(file->lastok)-1) + file->lastok[file->current++] = (char)ch; + if (file->current == sizeof(file->lastok)-1) + file->lastok[file->current] = (char)'\0'; } -static inline void lex_clear(struct lex_file *file) { - file->current = 0; +static inline void lex_clear(lex_file *file) { + file->current = 0; } /* @@ -84,283 +76,280 @@ static inline void lex_clear(struct lex_file *file) { * This doesn't play with file streams, the lexer has * it's own internal state for this. */ -static int lex_inget(struct lex_file *file) { - file->length --; - if (file->last > 0) - return file->peek[--file->last]; - return fgetc(file->file); +static int lex_inget(lex_file *file) { + char get; + file->length --; + + if (file->last > 0) { + if ((get = file->peek[--file->last]) == '\n') + file->line ++; + return get; + } + if ((get = fgetc(file->file)) == '\n') + file->line++; + + return get; } -static void lex_unget(int ch, struct lex_file *file) { - if (file->last < sizeof(file->peek)) - file->peek[file->last++] = ch; - file->length ++; +static void lex_unget(int ch, lex_file *file) { + if (file->last < sizeof(file->peek)) { + if (ch == '\n') + file->line --; + file->peek[file->last++] = ch; + } + file->length ++; } /* * This is trigraph and digraph support, a feature not qc compiler * supports. Moving up in this world! */ -static int lex_trigraph(struct lex_file *file) { - int ch; - if ((ch = lex_inget(file)) != '?') { - lex_unget(ch, file); - return '?'; - } - - ch = lex_inget(file); - switch (ch) { - case '(' : return '[' ; - case ')' : return ']' ; - case '/' : return '\\'; - case '\'': return '^' ; - case '<' : return '{' ; - case '>' : return '}' ; - case '!' : return '|' ; - case '-' : return '~' ; - case '=' : return '#' ; - default: - lex_unget('?', file); - lex_unget(ch , file); - return '?'; - } - return '?'; +static int lex_trigraph(lex_file *file) { + int ch; + if ((ch = lex_inget(file)) != '?') { + lex_unget(ch, file); + return '?'; + } + + ch = lex_inget(file); + switch (ch) { + case '(' : return '[' ; + case ')' : return ']' ; + case '/' : return '\\'; + case '\'': return '^' ; + case '<' : return '{' ; + case '>' : return '}' ; + case '!' : return '|' ; + case '-' : return '~' ; + case '=' : return '#' ; + default: + lex_unget('?', file); + lex_unget(ch , file); + return '?'; + } + return '?'; } -static int lex_digraph(struct lex_file *file, int first) { - int ch = lex_inget(file); - switch (first) { - case '<': - if (ch == '%') return '{'; - if (ch == ':') return '['; - break; - case '%': - if (ch == '>') return '}'; - if (ch == ':') return '#'; - break; - case ':': - if (ch == '>') return ']'; - break; - } - - lex_unget(ch, file); - return first; +static int lex_digraph(lex_file *file, int first) { + int ch = lex_inget(file); + switch (first) { + case '<': + if (ch == '%') return '{'; + if (ch == ':') return '['; + break; + case '%': + if (ch == '>') return '}'; + if (ch == ':') return '#'; + break; + case ':': + if (ch == '>') return ']'; + break; + } + + lex_unget(ch, file); + return first; } -static int lex_getch(struct lex_file *file) { - int ch = lex_inget(file); - if (ch == '?') - return lex_trigraph(file); - if (ch == '<' || ch == ':' || ch == '%') - return lex_digraph (file, ch); - - return ch; +static int lex_getch(lex_file *file) { + int ch = lex_inget(file); + if (ch == '?') + return lex_trigraph(file); + if (ch == '<' || ch == ':' || ch == '%') + return lex_digraph(file, ch); + return ch; } -static int lex_get(struct lex_file *file) { - int ch; - if (!isspace(ch = lex_getch(file))) - return ch; - - /* skip over all spaces */ - while (isspace(ch) && ch != '\n') - ch = lex_getch(file); - - if (ch == '\n') { - file->line ++; - return ch; - } - - lex_unget(ch, file); - return ' '; +static int lex_get(lex_file *file) { + int ch; + if (!isspace(ch = lex_getch(file))) + return ch; + + /* skip over all spaces */ + while (isspace(ch) && ch != '\n') + ch = lex_getch(file); + + if (ch == '\n') + return ch; + lex_unget(ch, file); + return ' '; } -static int lex_skipchr(struct lex_file *file) { - int ch; - int it; - - lex_clear(file); - lex_addch('\'', file); - - for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) { - lex_addch(ch, file); - - if (ch == '\n') - return ERROR_LEX; - if (ch == '\\') - lex_addch(lex_getch(file), file); - } - lex_addch('\'', file); - lex_addch('\0', file); - - if (it > 2) - return ERROR_LEX; - - return LEX_CHRLIT; +static int lex_skipchr(lex_file *file) { + int ch; + int it; + + lex_clear(file); + lex_addch('\'', file); + + for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) { + lex_addch(ch, file); + + if (ch == '\n') + return ERROR_LEX; + if (ch == '\\') + lex_addch(lex_getch(file), file); + } + lex_addch('\'', file); + lex_addch('\0', file); + + if (it > 2) + return ERROR_LEX; + + return LEX_CHRLIT; } -static int lex_skipstr(struct lex_file *file) { - int ch; - lex_clear(file); - lex_addch('"', file); - - while ((ch = lex_getch(file)) != '"') { - if (ch == '\n' || ch == EOF) - return ERROR_LEX; - - lex_addch(ch, file); - if (ch == '\\') - lex_addch(lex_inget(file), file); - } - - lex_addch('"', file); - lex_addch('\0', file); - - return LEX_STRLIT; +static int lex_skipstr(lex_file *file) { + int ch; + lex_clear(file); + lex_addch('"', file); + + while ((ch = lex_getch(file)) != '"') { + if (ch == '\n' || ch == EOF) + return ERROR_LEX; + + lex_addch(ch, file); + if (ch == '\\') + lex_addch(lex_inget(file), file); + } + + lex_addch('"', file); + lex_addch('\0', file); + + return LEX_STRLIT; } -static int lex_skipcmt(struct lex_file *file) { - int ch; - lex_clear(file); - ch = lex_getch(file); - - if (ch == '/') { - lex_addch('/', file); - lex_addch('/', file); - - while ((ch = lex_getch(file)) != '\n') { - if (ch == '\\') { - lex_addch(ch, file); - lex_addch(lex_getch(file), file); - } else { - lex_addch(ch, file); - } - } - lex_addch('\0', file); - return LEX_COMMENT; - } - - if (ch != '*') { - lex_unget(ch, file); - return '/'; - } - - lex_addch('/', file); - - /* hate this */ - do { - lex_addch(ch, file); - while ((ch = lex_getch(file)) != '*') { - if (ch == EOF) - return error(ERROR_LEX, "malformatted comment at line %d", file->line); - else - lex_addch(ch, file); - } - lex_addch(ch, file); - } while ((ch = lex_getch(file)) != '/'); - - lex_addch('/', file); - lex_addch('\0', file); - - return LEX_COMMENT; +static int lex_skipcmt(lex_file *file) { + int ch; + lex_clear(file); + ch = lex_getch(file); + + if (ch == '/') { + lex_addch('/', file); + lex_addch('/', file); + + while ((ch = lex_getch(file)) != '\n') { + if (ch == '\\') { + lex_addch(ch, file); + lex_addch(lex_getch(file), file); + } else { + lex_addch(ch, file); + } + } + lex_addch('\0', file); + return LEX_COMMENT; + } + + if (ch != '*') { + lex_unget(ch, file); + return '/'; + } + + lex_addch('/', file); + + /* hate this */ + do { + lex_addch(ch, file); + while ((ch = lex_getch(file)) != '*') { + if (ch == EOF) + return error(file, ERROR_LEX, "malformatted comment"); + else + lex_addch(ch, file); + } + lex_addch(ch, file); + } while ((ch = lex_getch(file)) != '/'); + + lex_addch('/', file); + lex_addch('\0', file); + + return LEX_COMMENT; } -static int lex_getsource(struct lex_file *file) { - int ch = lex_get(file); - - /* skip char/string/comment */ - switch (ch) { - case '\'': return lex_skipchr(file); - case '"': return lex_skipstr(file); - case '/': return lex_skipcmt(file); - default: return ch; - } +static int lex_getsource(lex_file *file) { + int ch = lex_get(file); + + /* skip char/string/comment */ + switch (ch) { + case '\'': return lex_skipchr(file); + case '"': return lex_skipstr(file); + case '/': return lex_skipcmt(file); + default: + return ch; + } +} + +int lex_token(lex_file *file) { + int ch = lex_getsource(file); + int it; + + /* valid identifier */ + if (ch > 0 && (ch == '_' || isalpha(ch))) { + lex_clear(file); + + while (ch > 0 && (ch == '_' || isalpha(ch))) { + lex_addch(ch, file); + ch = lex_getsource(file); + } + lex_unget(ch, file); + lex_addch('\0', file); + + /* look inside the table for a keyword .. */ + for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++) + if (!strncmp(file->lastok, lex_keywords[it], strlen(lex_keywords[it]))) + return it; + + /* try a type? */ + #define TEST_TYPE(X) \ + do { \ + if (!strncmp(X, "float", sizeof("float"))) \ + return TOKEN_FLOAT; \ + if (!strncmp(X, "vector", sizeof("vector"))) \ + return TOKEN_VECTOR; \ + if (!strncmp(X, "string", sizeof("string"))) \ + return TOKEN_STRING; \ + if (!strncmp(X, "entity", sizeof("entity"))) \ + return TOKEN_ENTITY; \ + if (!strncmp(X, "void" , sizeof("void"))) \ + return TOKEN_VOID; \ + } while(0) + + TEST_TYPE(file->lastok); + + /* try the hashtable for typedefs? */ + if (typedef_find(file->lastok)) + TEST_TYPE(typedef_find(file->lastok)->name); + + #undef TEST_TYPE + return LEX_IDENT; + } + return ch; } -int lex_token(struct lex_file *file) { - int ch = lex_getsource(file); - int it; - - /* valid identifier */ - if (ch > 0 && (ch == '_' || isalpha(ch))) { - lex_clear(file); - while (ch > 0 && (isalpha(ch) || ch == '_')) { - lex_addch(ch, file); - ch = lex_getsource(file); - } - lex_unget(ch, file); - lex_addch('\0', file); - - /* look inside the table for a keyword .. */ - for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++) - if (!strncmp(file->lastok, lex_keywords[it], sizeof(lex_keywords[it]))) - return it; - - /* try the hashtable for typedefs? */ - if (typedef_find(file->lastok)) - for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++) - if (!strncmp(typedef_find(file->lastok)->name, lex_keywords[it], sizeof(lex_keywords[it]))) - return it; - - return LEX_IDENT; - } - return ch; +void lex_reset(lex_file *file) { + file->current = 0; + file->last = 0; + file->length = file->size; + fseek(file->file, 0, SEEK_SET); + + memset(file->peek, 0, sizeof(file->peek )); + memset(file->lastok, 0, sizeof(file->lastok)); } -void lex_reset(struct lex_file *file) { - file->current = 0; - file->last = 0; - file->length = file->size; - fseek(file->file, 0, SEEK_SET); - - memset(file->peek, 0, sizeof(file->peek )); - memset(file->lastok, 0, sizeof(file->lastok)); +void lex_parse(lex_file *file) { + if (!file) return; + parse_gen(file); /* run parser */ } -int lex_debug(struct lex_file *file) { - int list_do = 0; - int list_else = 0; - int list_if = 0; - int list_while = 0; - int list_break = 0; - int list_continue = 0; - int list_return = 0; - int list_goto = 0; - int list_for = 0; - int token = 0; - printf("===========================\nTOKENS: \n===========================\n"); - while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) { - if (token != -1) { - switch (token) { - case 0: list_do ++; break; - case 1: list_else ++; break; - case 2: list_if ++; break; - case 3: list_while ++; break; - case 4: list_break ++; break; - case 5: list_continue++; break; - case 6: list_return ++; break; - case 7: list_goto ++; break; - case 8: list_for ++; break; - } - } - if (token >= 33 && token <= 126) - putchar(token); - } - printf("\n===========================\nBRANCHES \n===========================\n"); - printf("\t if % 8d\n", list_if); - printf("\t else % 8d\n", list_else); - printf("===========================\nLOOPS \n===========================\n"); - printf("\t for % 8d\n", list_for); - printf("\t while % 8d\n", list_while); - printf("\t do % 8d\n", list_do); - printf("===========================\nSTATEMENTS \n===========================\n"); - printf("\t break % 8d\n", list_break); - printf("\t continue % 8d\n", list_continue); - printf("\t return % 8d\n", list_return); - printf("\t goto % 8d\n", list_goto); - printf("===========================\nIDENTIFIERS\n===========================\n"); - lex_reset(file); - while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) - if (token == LEX_IDENT) - printf("%s ", file->lastok); - fputc('\n', stdout); - lex_reset(file); - return 1; +/* + * Include a file into the lexer / parsing process: This really + * should check if names are the same to prevent endless include + * recrusion. + */ +lex_file *lex_include(lex_file *lex, const char *file) { + util_strrq(file); + if (strncmp(lex->name, file, strlen(lex->name)) == 0) { + error(lex, ERROR_LEX, "Source file cannot include itself\n"); + exit (-1); + } + + lex_file *set = NULL; + lex_init(file, &set); + + return set; }