X-Git-Url: https://git.xonotic.org/?a=blobdiff_plain;f=lexer.c;h=1d1c42f3b61fc442a23b2b080aa23f180cf07605;hb=6c0b4a46fe1939a18ce851d63066f0f05e90a5d8;hp=b3243f071e0c2d316186034bbe5f2f8e68013e63;hpb=5d23fc5f5f196ba6669497f0411b324dbd4b6808;p=xonotic%2Fgmqcc.git diff --git a/lexer.c b/lexer.c index b3243f0..1d1c42f 100644 --- a/lexer.c +++ b/lexer.c @@ -6,19 +6,14 @@ #include "gmqcc.h" #include "lexer.h" -MEM_VEC_FUNCTIONS(token, char, value) -MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames) - -VECTOR_MAKE(char*, lex_filenames); +char* *lex_filenames; void lexerror(lex_file *lex, const char *fmt, ...) { va_list ap; - parser->errors++; - va_start(ap, fmt); - vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap); + con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap); va_end(ap); } @@ -34,12 +29,14 @@ bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...) lvl = LVL_ERROR; va_start(ap, fmt); - vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap); + con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap); va_end(ap); return opts_werror; } + +#if 0 token* token_new() { token *tok = (token*)mem_a(sizeof(token)); @@ -115,6 +112,22 @@ token* token_copy_all(const token *cp) return out; } +#else +static void lex_token_new(lex_file *lex) +{ +#if 0 + if (lex->tok) + token_delete(lex->tok); + lex->tok = token_new(); +#else + if (lex->tok.value) + vec_shrinkto(lex->tok.value, 0); + lex->tok.constval.t = 0; + lex->tok.ctx.line = lex->sline; + lex->tok.ctx.file = lex->name; +#endif +} +#endif lex_file* lex_open(const char *file) { @@ -142,7 +155,34 @@ lex_file* lex_open(const char *file) lex->peekpos = 0; lex->eof = false; - lex_filenames_add(lex->name); + vec_push(lex_filenames, lex->name); + return lex; +} + +lex_file* lex_open_string(const char *str, size_t len, const char *name) +{ + lex_file *lex; + + lex = (lex_file*)mem_a(sizeof(*lex)); + if (!lex) { + lexerror(NULL, "out of memory\n"); + return NULL; + } + + memset(lex, 0, sizeof(*lex)); + + lex->file = NULL; + lex->open_string = str; + lex->open_string_length = len; + lex->open_string_pos = 0; + + lex->name = util_strdup(name ? name : ""); + lex->line = 1; /* we start counting at 1 */ + + lex->peekpos = 0; + lex->eof = false; + + vec_push(lex_filenames, lex->name); return lex; } @@ -150,34 +190,96 @@ lex_file* lex_open(const char *file) void lex_cleanup(void) { size_t i; - for (i = 0; i < lex_filenames_elements; ++i) - mem_d(lex_filenames_data[i]); - mem_d(lex_filenames_data); + for (i = 0; i < vec_size(lex_filenames); ++i) + mem_d(lex_filenames[i]); + vec_free(lex_filenames); } void lex_close(lex_file *lex) { size_t i; - for (i = 0; i < lex->frames_count; ++i) + for (i = 0; i < vec_size(lex->frames); ++i) mem_d(lex->frames[i].name); - MEM_VECTOR_CLEAR(lex, frames); + vec_free(lex->frames); if (lex->modelname) - mem_d(lex->modelname); + vec_free(lex->modelname); if (lex->file) fclose(lex->file); +#if 0 if (lex->tok) token_delete(lex->tok); +#else + vec_free(lex->tok.value); +#endif /* mem_d(lex->name); collected in lex_filenames */ mem_d(lex); } +static int lex_fgetc(lex_file *lex) +{ + if (lex->file) + return fgetc(lex->file); + if (lex->open_string) { + if (lex->open_string_pos >= lex->open_string_length) + return EOF; + return lex->open_string[lex->open_string_pos++]; + } + return EOF; +} + /* Get or put-back data * The following to functions do NOT understand what kind of data they * are working on. * The are merely wrapping get/put in order to count line numbers. */ +static void lex_ungetch(lex_file *lex, int ch); +static int lex_try_trigraph(lex_file *lex, int old) +{ + int c2, c3; + c2 = lex_fgetc(lex); + if (c2 != '?') { + lex_ungetch(lex, c2); + return old; + } + + c3 = lex_fgetc(lex); + switch (c3) { + case '=': return '#'; + case '/': return '\\'; + case '\'': return '^'; + case '(': return '['; + case ')': return ']'; + case '!': return '|'; + case '<': return '{'; + case '>': return '}'; + case '-': return '~'; + default: + lex_ungetch(lex, c3); + lex_ungetch(lex, c2); + return old; + } +} + +static int lex_try_digraph(lex_file *lex, int ch) +{ + int c2; + c2 = lex_fgetc(lex); + if (ch == '<' && c2 == ':') + return '['; + else if (ch == ':' && c2 == '>') + return ']'; + else if (ch == '<' && c2 == '%') + return '{'; + else if (ch == '%' && c2 == '>') + return '}'; + else if (ch == '%' && c2 == ':') + return '#'; + lex_ungetch(lex, c2); + return ch; +} + static int lex_getch(lex_file *lex) { int ch; @@ -189,9 +291,13 @@ static int lex_getch(lex_file *lex) return lex->peek[lex->peekpos]; } - ch = fgetc(lex->file); + ch = lex_fgetc(lex); if (ch == '\n') lex->line++; + else if (ch == '?') + return lex_try_trigraph(lex, ch); + else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%')) + return lex_try_digraph(lex, ch); return ch; } @@ -225,6 +331,19 @@ static bool isxdigit_only(int ch) return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } +/* Append a character to the token buffer */ +static void lex_tokench(lex_file *lex, int ch) +{ + vec_push(lex->tok.value, ch); +} + +/* Append a trailing null-byte */ +static void lex_endtoken(lex_file *lex) +{ + vec_push(lex->tok.value, 0); + vec_shrinkby(lex->tok.value, 1); +} + /* Skip whitespace and comments and return the first * non-white character. * As this makes use of the above getch() ungetch() functions, @@ -260,11 +379,29 @@ printf( "line one\n" static int lex_skipwhite(lex_file *lex) { int ch = 0; + bool haswhite = false; do { ch = lex_getch(lex); - while (ch != EOF && isspace(ch)) ch = lex_getch(lex); + while (ch != EOF && isspace(ch)) { + if (lex->flags.preprocessing) { + if (ch == '\n') { + /* end-of-line */ + /* see if there was whitespace first */ + if (haswhite) { /* (vec_size(lex->tok.value)) { */ + lex_ungetch(lex, ch); + lex_endtoken(lex); + return TOKEN_WHITE; + } + /* otherwise return EOL */ + return TOKEN_EOL; + } + haswhite = true; + lex_tokench(lex, ch); + } + ch = lex_getch(lex); + } if (ch == '/') { ch = lex_getch(lex); @@ -273,39 +410,51 @@ static int lex_skipwhite(lex_file *lex) /* one line comment */ ch = lex_getch(lex); - /* check for special: '/', '/', '*', '/' */ - if (ch == '*') { - ch = lex_getch(lex); - if (ch == '/') { - ch = ' '; - continue; - } + if (lex->flags.preprocessing) { + haswhite = true; + lex_tokench(lex, '/'); + lex_tokench(lex, '/'); } while (ch != EOF && ch != '\n') { + if (lex->flags.preprocessing) + lex_tokench(lex, ch); ch = lex_getch(lex); } + if (lex->flags.preprocessing) { + lex_ungetch(lex, '\n'); + lex_endtoken(lex); + return TOKEN_WHITE; + } continue; } if (ch == '*') { /* multiline comment */ + if (lex->flags.preprocessing) { + haswhite = true; + lex_tokench(lex, '/'); + lex_tokench(lex, '*'); + } + while (ch != EOF) { ch = lex_getch(lex); if (ch == '*') { ch = lex_getch(lex); if (ch == '/') { - ch = lex_getch(lex); + if (lex->flags.preprocessing) { + lex_tokench(lex, '*'); + lex_tokench(lex, '/'); + } break; } } + if (lex->flags.preprocessing) { + lex_tokench(lex, ch); + } } - if (ch == '/') /* allow *//* direct following comment */ - { - lex_ungetch(lex, ch); - ch = ' '; /* cause TRUE in the isspace check */ - } + ch = ' '; /* cause TRUE in the isspace check */ continue; } /* Otherwise roll back to the slash and break out of the loop */ @@ -315,28 +464,12 @@ static int lex_skipwhite(lex_file *lex) } } while (ch != EOF && isspace(ch)); - return ch; -} - -/* Append a character to the token buffer */ -static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) -{ - if (!token_value_add(lex->tok, ch)) { - lexerror(lex, "out of memory"); - return false; + if (haswhite) { + lex_endtoken(lex); + lex_ungetch(lex, ch); + return TOKEN_WHITE; } - return true; -} - -/* Append a trailing null-byte */ -static bool GMQCC_WARN lex_endtoken(lex_file *lex) -{ - if (!token_value_add(lex->tok, 0)) { - lexerror(lex, "out of memory"); - return false; - } - lex->tok->value_count--; - return true; + return ch; } /* Get a token */ @@ -347,8 +480,7 @@ static bool GMQCC_WARN lex_finish_ident(lex_file *lex) ch = lex_getch(lex); while (ch != EOF && isident(ch)) { - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); ch = lex_getch(lex); } @@ -363,9 +495,7 @@ static int lex_parse_frame(lex_file *lex) { int ch; - if (lex->tok) - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); ch = lex_getch(lex); while (ch != EOF && ch != '\n' && isspace(ch)) @@ -379,12 +509,10 @@ static int lex_parse_frame(lex_file *lex) return -1; } - if (!lex_tokench(lex, ch)) - return -1; + lex_tokench(lex, ch); if (!lex_finish_ident(lex)) return -1; - if (!lex_endtoken(lex)) - return -1; + lex_endtoken(lex); return 0; } @@ -402,22 +530,21 @@ static bool lex_finish_frames(lex_file *lex) if (rc < 0) /* error */ return false; - for (i = 0; i < lex->frames_count; ++i) { - if (!strcmp(lex->tok->value, lex->frames[i].name)) { + for (i = 0; i < vec_size(lex->frames); ++i) { + if (!strcmp(lex->tok.value, lex->frames[i].name)) { lex->frames[i].value = lex->framevalue++; - if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok->value)) + if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value)) return false; - continue; + break; } } + if (i < vec_size(lex->frames)) + continue; m.value = lex->framevalue++; - m.name = lex->tok->value; - lex->tok->value = NULL; - if (!lex_file_frames_add(lex, m)) { - lexerror(lex, "out of memory"); - return false; - } + m.name = util_strdup(lex->tok.value); + vec_shrinkto(lex->tok.value, 0); + vec_push(lex->frames, m); } while (true); } @@ -431,12 +558,12 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) if (ch == quote) return TOKEN_STRINGCONST; - if (ch == '\\') { + if (!lex->flags.preprocessing && ch == '\\') { ch = lex_getch(lex); if (ch == EOF) { lexerror(lex, "unexpected end of file"); lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } switch (ch) { @@ -451,19 +578,17 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) default: lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch); /* so we just add the character plus backslash no matter what it actually is */ - if (!lex_tokench(lex, '\\')) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, '\\'); } /* add the character finally */ - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); } - else if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + else + lex_tokench(lex, ch); } lexerror(lex, "unexpected end of file within string constant"); lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) @@ -473,10 +598,9 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) int ch = lastch; /* parse a number... */ - lex->tok->ttype = TOKEN_INTCONST; + lex->tok.ttype = TOKEN_INTCONST; - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); ch = lex_getch(lex); if (ch != '.' && !isdigit(ch)) @@ -485,11 +609,10 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) { /* end of the number or EOF */ lex_ungetch(lex, ch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_endtoken(lex); - lex->tok->constval.i = lastch - '0'; - return lex->tok->ttype; + lex->tok.constval.i = lastch - '0'; + return lex->tok.ttype; } ishex = true; @@ -499,13 +622,11 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) if (ch != '.') { - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); ch = lex_getch(lex); while (isdigit(ch) || (ishex && isxdigit_only(ch))) { - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); ch = lex_getch(lex); } } @@ -513,61 +634,75 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) if (ch == '.' && !ishex) { /* Allow floating comma in non-hex mode */ - lex->tok->ttype = TOKEN_FLOATCONST; - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex->tok.ttype = TOKEN_FLOATCONST; + lex_tokench(lex, ch); /* continue digits-only */ ch = lex_getch(lex); while (isdigit(ch)) { - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); ch = lex_getch(lex); } } /* put back the last character */ /* but do not put back the trailing 'f' or a float */ - if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f') + if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f') ch = lex_getch(lex); /* generally we don't want words to follow numbers: */ if (isident(ch)) { lexerror(lex, "unexpected trailing characters after number"); - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } lex_ungetch(lex, ch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - if (lex->tok->ttype == TOKEN_FLOATCONST) - lex->tok->constval.f = strtod(lex->tok->value, NULL); + lex_endtoken(lex); + if (lex->tok.ttype == TOKEN_FLOATCONST) + lex->tok.constval.f = strtod(lex->tok.value, NULL); else - lex->tok->constval.i = strtol(lex->tok->value, NULL, 0); - return lex->tok->ttype; + lex->tok.constval.i = strtol(lex->tok.value, NULL, 0); + return lex->tok.ttype; } int lex_do(lex_file *lex) { int ch, nextch; - if (lex->tok) - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); +#if 0 if (!lex->tok) return TOKEN_FATAL; +#endif + + while (true) { + ch = lex_skipwhite(lex); + if (!lex->flags.mergelines || ch != '\\') + break; + ch = lex_getch(lex); + if (ch != '\n') { + lex_ungetch(lex, ch); + ch = '\\'; + break; + } + /* we reached a linemerge */ + continue; + } - ch = lex_skipwhite(lex); lex->sline = lex->line; - lex->tok->ctx.line = lex->sline; - lex->tok->ctx.file = lex->name; + lex->tok.ctx.line = lex->sline; + lex->tok.ctx.file = lex->name; + + if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) { + return (lex->tok.ttype = ch); + } if (lex->eof) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); if (ch == EOF) { lex->eof = true; - return (lex->tok->ttype = TOKEN_EOF); + return (lex->tok.ttype = TOKEN_EOF); } /* modelgen / spiritgen commands */ @@ -580,14 +715,12 @@ int lex_do(lex_file *lex) lexerror(lex, "hanging '$' modelgen/spritegen command line"); return lex_do(lex); } - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); if (!lex_finish_ident(lex)) - return (lex->tok->ttype = TOKEN_ERROR); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_ERROR); + lex_endtoken(lex); /* skip the known commands */ - v = lex->tok->value; + v = lex->tok.value; if (!strcmp(v, "frame") || !strcmp(v, "framesave")) { @@ -597,7 +730,7 @@ int lex_do(lex_file *lex) * which the parser is unaware of */ if (!lex_finish_frames(lex)) - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); return lex_do(lex); } @@ -612,16 +745,14 @@ int lex_do(lex_file *lex) return lex_do(lex); } - token_delete(lex->tok); - lex->tok = token_new(); - lex->tok->ttype = lex_finish_digit(lex, ch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - if (lex->tok->ttype != TOKEN_INTCONST) { + lex_token_new(lex); + lex->tok.ttype = lex_finish_digit(lex, ch); + lex_endtoken(lex); + if (lex->tok.ttype != TOKEN_INTCONST) { lexerror(lex, "$framevalue requires an integer parameter"); return lex_do(lex); } - lex->framevalue = lex->tok->constval.i; + lex->framevalue = lex->tok.constval.i; return lex_do(lex); } @@ -629,8 +760,7 @@ int lex_do(lex_file *lex) { int rc; - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); rc = lex_parse_frame(lex); @@ -639,10 +769,10 @@ int lex_do(lex_file *lex) return lex_do(lex); } if (rc < 0) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); - v = lex->tok->value; - for (frame = 0; frame < lex->frames_count; ++frame) { + v = lex->tok.value; + for (frame = 0; frame < vec_size(lex->frames); ++frame) { if (!strcmp(v, lex->frames[frame].name)) { lex->framevalue = lex->frames[frame].value; return lex_do(lex); @@ -656,46 +786,36 @@ int lex_do(lex_file *lex) { int rc; - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); rc = lex_parse_frame(lex); if (rc > 0) { - lexerror(lex, "$framerestore requires a framename parameter"); + lexerror(lex, "$modelname requires a parameter"); return lex_do(lex); } if (rc < 0) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); - v = lex->tok->value; + v = lex->tok.value; if (lex->modelname) { frame_macro m; m.value = lex->framevalue; m.name = lex->modelname; lex->modelname = NULL; - if (!lex_file_frames_add(lex, m)) { - lexerror(lex, "out of memory"); - return (lex->tok->ttype = TOKEN_FATAL); - } - } - lex->modelname = lex->tok->value; - lex->tok->value = NULL; - for (frame = 0; frame < lex->frames_count; ++frame) { - if (!strcmp(v, lex->frames[frame].name)) { - lex->framevalue = lex->frames[frame].value; - break; - } + vec_push(lex->frames, m); } + lex->modelname = lex->tok.value; + lex->tok.value = NULL; return lex_do(lex); } if (!strcmp(v, "flush")) { size_t frame; - for (frame = 0; frame < lex->frames_count; ++frame) + for (frame = 0; frame < vec_size(lex->frames); ++frame) mem_d(lex->frames[frame].name); - MEM_VECTOR_CLEAR(lex, frames); + vec_free(lex->frames); /* skip line (fteqcc does it too) */ ch = lex_getch(lex); while (ch != EOF && ch != '\n') @@ -717,10 +837,10 @@ int lex_do(lex_file *lex) return lex_do(lex); } - for (frame = 0; frame < lex->frames_count; ++frame) { + for (frame = 0; frame < vec_size(lex->frames); ++frame) { if (!strcmp(v, lex->frames[frame].name)) { - lex->tok->constval.i = lex->frames[frame].value; - return (lex->tok->ttype = TOKEN_INTCONST); + lex->tok.constval.i = lex->frames[frame].value; + return (lex->tok.ttype = TOKEN_INTCONST); } } @@ -731,30 +851,24 @@ int lex_do(lex_file *lex) /* single-character tokens */ switch (ch) { + case '[': case '(': - if (!lex_tokench(lex, ch) || - !lex_endtoken(lex)) - { - return (lex->tok->ttype = TOKEN_FATAL); - } + lex_tokench(lex, ch); + lex_endtoken(lex); if (lex->flags.noops) - return (lex->tok->ttype = ch); + return (lex->tok.ttype = ch); else - return (lex->tok->ttype = TOKEN_OPERATOR); + return (lex->tok.ttype = TOKEN_OPERATOR); case ')': case ';': case '{': case '}': - case '[': case ']': case '#': - if (!lex_tokench(lex, ch) || - !lex_endtoken(lex)) - { - return (lex->tok->ttype = TOKEN_FATAL); - } - return (lex->tok->ttype = ch); + lex_tokench(lex, ch); + lex_endtoken(lex); + return (lex->tok.ttype = ch); default: break; } @@ -779,55 +893,43 @@ int lex_do(lex_file *lex) case '~': case ',': case '!': - if (!lex_tokench(lex, ch) || - !lex_endtoken(lex)) - { - return (lex->tok->ttype = TOKEN_FATAL); - } - return (lex->tok->ttype = ch); + lex_tokench(lex, ch); + lex_endtoken(lex); + return (lex->tok.ttype = ch); default: break; } if (ch == '.') { - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); /* peak ahead once */ nextch = lex_getch(lex); if (nextch != '.') { lex_ungetch(lex, nextch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = ch); + lex_endtoken(lex); + return (lex->tok.ttype = ch); } /* peak ahead again */ nextch = lex_getch(lex); if (nextch != '.') { lex_ungetch(lex, nextch); lex_ungetch(lex, nextch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = ch); + lex_endtoken(lex); + return (lex->tok.ttype = ch); } /* fill the token to be "..." */ - if (!lex_tokench(lex, ch) || - !lex_tokench(lex, ch) || - !lex_endtoken(lex)) - { - return (lex->tok->ttype = TOKEN_FATAL); - } - return (lex->tok->ttype = TOKEN_DOTS); + lex_tokench(lex, ch); + lex_tokench(lex, ch); + lex_endtoken(lex); + return (lex->tok.ttype = TOKEN_DOTS); } } if (ch == ',' || ch == '.') { - if (!lex_tokench(lex, ch) || - !lex_endtoken(lex)) - { - return (lex->tok->ttype = TOKEN_FATAL); - } - return (lex->tok->ttype = TOKEN_OPERATOR); + lex_tokench(lex, ch); + lex_endtoken(lex); + return (lex->tok.ttype = TOKEN_OPERATOR); } if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */ @@ -835,86 +937,74 @@ int lex_do(lex_file *lex) ch == '=' || ch == '!' || /* ==, != */ ch == '&' || ch == '|') /* &&, ||, &=, |= */ { - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); nextch = lex_getch(lex); if (nextch == ch || nextch == '=') { - if (!lex_tokench(lex, nextch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, nextch); } else if (ch == '-' && nextch == '>') { - if (!lex_tokench(lex, nextch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, nextch); } else lex_ungetch(lex, nextch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = TOKEN_OPERATOR); + lex_endtoken(lex); + return (lex->tok.ttype = TOKEN_OPERATOR); } /* if (ch == '^' || ch == '~' || ch == '!') { - if (!lex_tokench(lex, ch) || - !lex_endtoken(lex)) - { - return (lex->tok->ttype = TOKEN_FATAL); - } - return (lex->tok->ttype = TOKEN_OPERATOR); + lex_tokench(lex, ch); + lex_endtoken(lex); + return (lex->tok.ttype = TOKEN_OPERATOR); } */ if (ch == '*' || ch == '/') /* *=, /= */ { - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); nextch = lex_getch(lex); if (nextch == '=') { - if (!lex_tokench(lex, nextch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, nextch); } else lex_ungetch(lex, nextch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = TOKEN_OPERATOR); + lex_endtoken(lex); + return (lex->tok.ttype = TOKEN_OPERATOR); } if (isident_start(ch)) { const char *v; - if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + lex_tokench(lex, ch); if (!lex_finish_ident(lex)) { /* error? */ - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - lex->tok->ttype = TOKEN_IDENT; + lex_endtoken(lex); + lex->tok.ttype = TOKEN_IDENT; - v = lex->tok->value; + v = lex->tok.value; if (!strcmp(v, "void")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_VOID; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_VOID; } else if (!strcmp(v, "int")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_INTEGER; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_INTEGER; } else if (!strcmp(v, "float")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_FLOAT; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_FLOAT; } else if (!strcmp(v, "string")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_STRING; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_STRING; } else if (!strcmp(v, "entity")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_ENTITY; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_ENTITY; } else if (!strcmp(v, "vector")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_VECTOR; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_VECTOR; } else if (!strcmp(v, "for") || !strcmp(v, "while") || !strcmp(v, "do") || @@ -922,16 +1012,37 @@ int lex_do(lex_file *lex) !strcmp(v, "else") || !strcmp(v, "local") || !strcmp(v, "return") || + !strcmp(v, "not") || !strcmp(v, "const")) - lex->tok->ttype = TOKEN_KEYWORD; + { + lex->tok.ttype = TOKEN_KEYWORD; + } + else if (opts_standard != COMPILER_QCC) + { + /* other standards reserve these keywords */ + if (!strcmp(v, "switch") || + !strcmp(v, "struct") || + !strcmp(v, "union") || + !strcmp(v, "break") || + !strcmp(v, "continue") || + !strcmp(v, "var")) + { + lex->tok.ttype = TOKEN_KEYWORD; + } + } - return lex->tok->ttype; + return lex->tok.ttype; } if (ch == '"') { - lex->tok->ttype = lex_finish_string(lex, '"'); - while (lex->tok->ttype == TOKEN_STRINGCONST) + lex->flags.nodigraphs = true; + if (lex->flags.preprocessing) + lex_tokench(lex, ch); + lex->tok.ttype = lex_finish_string(lex, '"'); + if (lex->flags.preprocessing) + lex_tokench(lex, ch); + while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST) { /* Allow c style "string" "continuation" */ ch = lex_skipwhite(lex); @@ -940,11 +1051,11 @@ int lex_do(lex_file *lex) break; } - lex->tok->ttype = lex_finish_string(lex, '"'); + lex->tok.ttype = lex_finish_string(lex, '"'); } - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return lex->tok->ttype; + lex->flags.nodigraphs = false; + lex_endtoken(lex); + return lex->tok.ttype; } if (ch == '\'') @@ -954,33 +1065,36 @@ int lex_do(lex_file *lex) * Likewise actual unescaping has to be done by the parser. * The difference is we don't allow 'char' 'continuation'. */ - lex->tok->ttype = lex_finish_string(lex, '\''); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); + if (lex->flags.preprocessing) + lex_tokench(lex, ch); + lex->tok.ttype = lex_finish_string(lex, '\''); + if (lex->flags.preprocessing) + lex_tokench(lex, ch); + lex_endtoken(lex); /* It's a vector if we can successfully scan 3 floats */ #ifdef WIN32 - if (sscanf_s(lex->tok->value, " %f %f %f ", - &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3) + if (sscanf_s(lex->tok.value, " %f %f %f ", + &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3) #else - if (sscanf(lex->tok->value, " %f %f %f ", - &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3) + if (sscanf(lex->tok.value, " %f %f %f ", + &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3) #endif - { - lex->tok->ttype = TOKEN_VECTORCONST; - } - return lex->tok->ttype; + { + lex->tok.ttype = TOKEN_VECTORCONST; + } + + return lex->tok.ttype; } if (isdigit(ch)) { - lex->tok->ttype = lex_finish_digit(lex, ch); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return lex->tok->ttype; + lex->tok.ttype = lex_finish_digit(lex, ch); + lex_endtoken(lex); + return lex->tok.ttype; } lexerror(lex, "unknown token"); - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); }