X-Git-Url: https://git.xonotic.org/?a=blobdiff_plain;f=lexer.c;h=64caad686c6ea35f43a729136c53ef9ae836a642;hb=01b619d53d3aef62234a6c56ee7fedd294ee7820;hp=080969250af38dab471e465ac4dad1ed922e7a8d;hpb=4f758284a1773aa1de081fe2801d7ad9f778cbe6;p=xonotic%2Fgmqcc.git diff --git a/lexer.c b/lexer.c index 0809692..64caad6 100644 --- a/lexer.c +++ b/lexer.c @@ -38,6 +38,8 @@ bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...) return opts_werror; } + +#if 0 token* token_new() { token *tok = (token*)mem_a(sizeof(token)); @@ -113,6 +115,21 @@ token* token_copy_all(const token *cp) return out; } +#else +static void lex_token_new(lex_file *lex) +{ +#if 0 + if (lex->tok) + token_delete(lex->tok); + lex->tok = token_new(); +#else + lex->tok.value_count = 0; + lex->tok.constval.t = 0; + lex->tok.ctx.line = lex->sline; + lex->tok.ctx.file = lex->name; +#endif +} +#endif lex_file* lex_open(const char *file) { @@ -145,6 +162,34 @@ lex_file* lex_open(const char *file) return lex; } +lex_file* lex_open_string(const char *str, size_t len, const char *name) +{ + lex_file *lex; + + lex = (lex_file*)mem_a(sizeof(*lex)); + if (!lex) { + lexerror(NULL, "out of memory\n"); + return NULL; + } + + memset(lex, 0, sizeof(*lex)); + + lex->file = NULL; + lex->open_string = str; + lex->open_string_length = len; + lex->open_string_pos = 0; + + lex->name = util_strdup(name ? name : ""); + lex->line = 1; /* we start counting at 1 */ + + lex->peekpos = 0; + lex->eof = false; + + lex_filenames_add(lex->name); + + return lex; +} + void lex_cleanup(void) { size_t i; @@ -165,17 +210,79 @@ void lex_close(lex_file *lex) if (lex->file) fclose(lex->file); +#if 0 if (lex->tok) token_delete(lex->tok); +#else + MEM_VECTOR_CLEAR(&(lex->tok), value); +#endif /* mem_d(lex->name); collected in lex_filenames */ mem_d(lex); } +static int lex_fgetc(lex_file *lex) +{ + if (lex->file) + return fgetc(lex->file); + if (lex->open_string) { + if (lex->open_string_pos >= lex->open_string_length) + return EOF; + return lex->open_string[lex->open_string_pos++]; + } + return EOF; +} + /* Get or put-back data * The following to functions do NOT understand what kind of data they * are working on. * The are merely wrapping get/put in order to count line numbers. */ +static void lex_ungetch(lex_file *lex, int ch); +static int lex_try_trigraph(lex_file *lex, int old) +{ + int c2, c3; + c2 = lex_fgetc(lex); + if (c2 != '?') { + lex_ungetch(lex, c2); + return old; + } + + c3 = lex_fgetc(lex); + switch (c3) { + case '=': return '#'; + case '/': return '\\'; + case '\'': return '^'; + case '(': return '['; + case ')': return ']'; + case '!': return '|'; + case '<': return '{'; + case '>': return '}'; + case '-': return '~'; + default: + lex_ungetch(lex, c3); + lex_ungetch(lex, c2); + return old; + } +} + +static int lex_try_digraph(lex_file *lex, int ch) +{ + int c2; + c2 = lex_fgetc(lex); + if (ch == '<' && c2 == ':') + return '['; + else if (ch == ':' && c2 == '>') + return ']'; + else if (ch == '<' && c2 == '%') + return '{'; + else if (ch == '%' && c2 == '>') + return '}'; + else if (ch == '%' && c2 == ':') + return '#'; + lex_ungetch(lex, c2); + return ch; +} + static int lex_getch(lex_file *lex) { int ch; @@ -187,9 +294,13 @@ static int lex_getch(lex_file *lex) return lex->peek[lex->peekpos]; } - ch = fgetc(lex->file); + ch = lex_fgetc(lex); if (ch == '\n') lex->line++; + else if (ch == '?') + return lex_try_trigraph(lex, ch); + else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%')) + return lex_try_digraph(lex, ch); return ch; } @@ -223,6 +334,27 @@ static bool isxdigit_only(int ch) return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } +/* Append a character to the token buffer */ +static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) +{ + if (!token_value_add(&lex->tok, ch)) { + lexerror(lex, "out of memory"); + return false; + } + return true; +} + +/* Append a trailing null-byte */ +static bool GMQCC_WARN lex_endtoken(lex_file *lex) +{ + if (!token_value_add(&lex->tok, 0)) { + lexerror(lex, "out of memory"); + return false; + } + lex->tok.value_count--; + return true; +} + /* Skip whitespace and comments and return the first * non-white character. * As this makes use of the above getch() ungetch() functions, @@ -258,11 +390,31 @@ printf( "line one\n" static int lex_skipwhite(lex_file *lex) { int ch = 0; + bool haswhite = false; do { ch = lex_getch(lex); - while (ch != EOF && isspace(ch)) ch = lex_getch(lex); + while (ch != EOF && isspace(ch)) { + if (lex->flags.preprocessing) { + if (ch == '\n') { + /* end-of-line */ + /* see if there was whitespace first */ + if (haswhite) { /* (lex->tok.value_count) { */ + lex_ungetch(lex, ch); + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + return TOKEN_WHITE; + } + /* otherwise return EOL */ + return TOKEN_EOL; + } + haswhite = true; + if (!lex_tokench(lex, ch)) + return TOKEN_FATAL; + } + ch = lex_getch(lex); + } if (ch == '/') { ch = lex_getch(lex); @@ -271,39 +423,62 @@ static int lex_skipwhite(lex_file *lex) /* one line comment */ ch = lex_getch(lex); - /* check for special: '/', '/', '*', '/' */ - if (ch == '*') { - ch = lex_getch(lex); - if (ch == '/') { - ch = ' '; - continue; + if (lex->flags.preprocessing) { + haswhite = true; + if (!lex_tokench(lex, '/') || + !lex_tokench(lex, '/')) + { + return TOKEN_FATAL; } } while (ch != EOF && ch != '\n') { + if (lex->flags.preprocessing && !lex_tokench(lex, ch)) + return TOKEN_FATAL; ch = lex_getch(lex); } + if (lex->flags.preprocessing) { + lex_ungetch(lex, '\n'); + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + return TOKEN_WHITE; + } continue; } if (ch == '*') { /* multiline comment */ + if (lex->flags.preprocessing) { + haswhite = true; + if (!lex_tokench(lex, '/') || + !lex_tokench(lex, '*')) + { + return TOKEN_FATAL; + } + } + while (ch != EOF) { ch = lex_getch(lex); if (ch == '*') { ch = lex_getch(lex); if (ch == '/') { - ch = lex_getch(lex); + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, '*') || + !lex_tokench(lex, '/')) + { + return TOKEN_FATAL; + } + } break; } } + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, ch)) + return TOKEN_FATAL; + } } - if (ch == '/') /* allow *//* direct following comment */ - { - lex_ungetch(lex, ch); - ch = ' '; /* cause TRUE in the isspace check */ - } + ch = ' '; /* cause TRUE in the isspace check */ continue; } /* Otherwise roll back to the slash and break out of the loop */ @@ -313,28 +488,13 @@ static int lex_skipwhite(lex_file *lex) } } while (ch != EOF && isspace(ch)); - return ch; -} - -/* Append a character to the token buffer */ -static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) -{ - if (!token_value_add(lex->tok, ch)) { - lexerror(lex, "out of memory"); - return false; - } - return true; -} - -/* Append a trailing null-byte */ -static bool GMQCC_WARN lex_endtoken(lex_file *lex) -{ - if (!token_value_add(lex->tok, 0)) { - lexerror(lex, "out of memory"); - return false; + if (haswhite) { + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + lex_ungetch(lex, ch); + return TOKEN_WHITE; } - lex->tok->value_count--; - return true; + return ch; } /* Get a token */ @@ -346,7 +506,7 @@ static bool GMQCC_WARN lex_finish_ident(lex_file *lex) while (ch != EOF && isident(ch)) { if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); ch = lex_getch(lex); } @@ -361,9 +521,7 @@ static int lex_parse_frame(lex_file *lex) { int ch; - if (lex->tok) - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); ch = lex_getch(lex); while (ch != EOF && ch != '\n' && isspace(ch)) @@ -401,17 +559,20 @@ static bool lex_finish_frames(lex_file *lex) return false; for (i = 0; i < lex->frames_count; ++i) { - if (!strcmp(lex->tok->value, lex->frames[i].name)) { + if (!strcmp(lex->tok.value, lex->frames[i].name)) { lex->frames[i].value = lex->framevalue++; - if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok->value)) + if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value)) return false; - continue; + break; } } + if (i < lex->frames_count) + continue; m.value = lex->framevalue++; - m.name = lex->tok->value; - lex->tok->value = NULL; + m.name = lex->tok.value; + lex->tok.value = NULL; + lex->tok.value_alloc = lex->tok.value_count = 0; if (!lex_file_frames_add(lex, m)) { lexerror(lex, "out of memory"); return false; @@ -429,12 +590,12 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) if (ch == quote) return TOKEN_STRINGCONST; - if (ch == '\\') { + if (!lex->flags.preprocessing && ch == '\\') { ch = lex_getch(lex); if (ch == EOF) { lexerror(lex, "unexpected end of file"); lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } switch (ch) { @@ -450,18 +611,18 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch); /* so we just add the character plus backslash no matter what it actually is */ if (!lex_tokench(lex, '\\')) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } /* add the character finally */ if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } else if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } lexerror(lex, "unexpected end of file within string constant"); lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) @@ -471,10 +632,10 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) int ch = lastch; /* parse a number... */ - lex->tok->ttype = TOKEN_INTCONST; + lex->tok.ttype = TOKEN_INTCONST; if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); ch = lex_getch(lex); if (ch != '.' && !isdigit(ch)) @@ -484,10 +645,10 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) /* end of the number or EOF */ lex_ungetch(lex, ch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); - lex->tok->constval.i = lastch - '0'; - return lex->tok->ttype; + lex->tok.constval.i = lastch - '0'; + return lex->tok.ttype; } ishex = true; @@ -498,12 +659,12 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) if (ch != '.') { if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); ch = lex_getch(lex); while (isdigit(ch) || (ishex && isxdigit_only(ch))) { if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); ch = lex_getch(lex); } } @@ -511,61 +672,65 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) if (ch == '.' && !ishex) { /* Allow floating comma in non-hex mode */ - lex->tok->ttype = TOKEN_FLOATCONST; + lex->tok.ttype = TOKEN_FLOATCONST; if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); /* continue digits-only */ ch = lex_getch(lex); while (isdigit(ch)) { if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); ch = lex_getch(lex); } } /* put back the last character */ /* but do not put back the trailing 'f' or a float */ - if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f') + if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f') ch = lex_getch(lex); /* generally we don't want words to follow numbers: */ if (isident(ch)) { lexerror(lex, "unexpected trailing characters after number"); - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } lex_ungetch(lex, ch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - if (lex->tok->ttype == TOKEN_FLOATCONST) - lex->tok->constval.f = strtod(lex->tok->value, NULL); + return (lex->tok.ttype = TOKEN_FATAL); + if (lex->tok.ttype == TOKEN_FLOATCONST) + lex->tok.constval.f = strtod(lex->tok.value, NULL); else - lex->tok->constval.i = strtol(lex->tok->value, NULL, 0); - return lex->tok->ttype; + lex->tok.constval.i = strtol(lex->tok.value, NULL, 0); + return lex->tok.ttype; } int lex_do(lex_file *lex) { int ch, nextch; - if (lex->tok) - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); +#if 0 if (!lex->tok) return TOKEN_FATAL; +#endif ch = lex_skipwhite(lex); lex->sline = lex->line; - lex->tok->ctx.line = lex->sline; - lex->tok->ctx.file = lex->name; + lex->tok.ctx.line = lex->sline; + lex->tok.ctx.file = lex->name; + + if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) { + return (lex->tok.ttype = ch); + } if (lex->eof) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); if (ch == EOF) { lex->eof = true; - return (lex->tok->ttype = TOKEN_EOF); + return (lex->tok.ttype = TOKEN_EOF); } /* modelgen / spiritgen commands */ @@ -579,13 +744,13 @@ int lex_do(lex_file *lex) return lex_do(lex); } if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); if (!lex_finish_ident(lex)) - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); /* skip the known commands */ - v = lex->tok->value; + v = lex->tok.value; if (!strcmp(v, "frame") || !strcmp(v, "framesave")) { @@ -595,7 +760,7 @@ int lex_do(lex_file *lex) * which the parser is unaware of */ if (!lex_finish_frames(lex)) - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); return lex_do(lex); } @@ -610,16 +775,15 @@ int lex_do(lex_file *lex) return lex_do(lex); } - token_delete(lex->tok); - lex->tok = token_new(); - lex->tok->ttype = lex_finish_digit(lex, ch); + lex_token_new(lex); + lex->tok.ttype = lex_finish_digit(lex, ch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - if (lex->tok->ttype != TOKEN_INTCONST) { + return (lex->tok.ttype = TOKEN_FATAL); + if (lex->tok.ttype != TOKEN_INTCONST) { lexerror(lex, "$framevalue requires an integer parameter"); return lex_do(lex); } - lex->framevalue = lex->tok->constval.i; + lex->framevalue = lex->tok.constval.i; return lex_do(lex); } @@ -627,8 +791,7 @@ int lex_do(lex_file *lex) { int rc; - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); rc = lex_parse_frame(lex); @@ -637,9 +800,9 @@ int lex_do(lex_file *lex) return lex_do(lex); } if (rc < 0) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); - v = lex->tok->value; + v = lex->tok.value; for (frame = 0; frame < lex->frames_count; ++frame) { if (!strcmp(v, lex->frames[frame].name)) { lex->framevalue = lex->frames[frame].value; @@ -654,8 +817,7 @@ int lex_do(lex_file *lex) { int rc; - token_delete(lex->tok); - lex->tok = token_new(); + lex_token_new(lex); rc = lex_parse_frame(lex); @@ -664,9 +826,9 @@ int lex_do(lex_file *lex) return lex_do(lex); } if (rc < 0) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); - v = lex->tok->value; + v = lex->tok.value; if (lex->modelname) { frame_macro m; m.value = lex->framevalue; @@ -674,11 +836,12 @@ int lex_do(lex_file *lex) lex->modelname = NULL; if (!lex_file_frames_add(lex, m)) { lexerror(lex, "out of memory"); - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } } - lex->modelname = lex->tok->value; - lex->tok->value = NULL; + lex->modelname = lex->tok.value; + lex->tok.value = NULL; + lex->tok.value_alloc = lex->tok.value_count = 0; for (frame = 0; frame < lex->frames_count; ++frame) { if (!strcmp(v, lex->frames[frame].name)) { lex->framevalue = lex->frames[frame].value; @@ -717,8 +880,8 @@ int lex_do(lex_file *lex) for (frame = 0; frame < lex->frames_count; ++frame) { if (!strcmp(v, lex->frames[frame].name)) { - lex->tok->constval.i = lex->frames[frame].value; - return (lex->tok->ttype = TOKEN_INTCONST); + lex->tok.constval.i = lex->frames[frame].value; + return (lex->tok.ttype = TOKEN_INTCONST); } } @@ -733,12 +896,12 @@ int lex_do(lex_file *lex) if (!lex_tokench(lex, ch) || !lex_endtoken(lex)) { - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } if (lex->flags.noops) - return (lex->tok->ttype = ch); + return (lex->tok.ttype = ch); else - return (lex->tok->ttype = TOKEN_OPERATOR); + return (lex->tok.ttype = TOKEN_OPERATOR); case ')': case ';': case '{': @@ -750,9 +913,9 @@ int lex_do(lex_file *lex) if (!lex_tokench(lex, ch) || !lex_endtoken(lex)) { - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } - return (lex->tok->ttype = ch); + return (lex->tok.ttype = ch); default: break; } @@ -780,9 +943,9 @@ int lex_do(lex_file *lex) if (!lex_tokench(lex, ch) || !lex_endtoken(lex)) { - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } - return (lex->tok->ttype = ch); + return (lex->tok.ttype = ch); default: break; } @@ -790,14 +953,14 @@ int lex_do(lex_file *lex) if (ch == '.') { if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); /* peak ahead once */ nextch = lex_getch(lex); if (nextch != '.') { lex_ungetch(lex, nextch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = ch); + return (lex->tok.ttype = TOKEN_FATAL); + return (lex->tok.ttype = ch); } /* peak ahead again */ nextch = lex_getch(lex); @@ -805,17 +968,17 @@ int lex_do(lex_file *lex) lex_ungetch(lex, nextch); lex_ungetch(lex, nextch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = ch); + return (lex->tok.ttype = TOKEN_FATAL); + return (lex->tok.ttype = ch); } /* fill the token to be "..." */ if (!lex_tokench(lex, ch) || !lex_tokench(lex, ch) || !lex_endtoken(lex)) { - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } - return (lex->tok->ttype = TOKEN_DOTS); + return (lex->tok.ttype = TOKEN_DOTS); } } @@ -823,9 +986,9 @@ int lex_do(lex_file *lex) if (!lex_tokench(lex, ch) || !lex_endtoken(lex)) { - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } - return (lex->tok->ttype = TOKEN_OPERATOR); + return (lex->tok.ttype = TOKEN_OPERATOR); } if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */ @@ -834,21 +997,21 @@ int lex_do(lex_file *lex) ch == '&' || ch == '|') /* &&, ||, &=, |= */ { if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); nextch = lex_getch(lex); if (nextch == ch || nextch == '=') { if (!lex_tokench(lex, nextch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } else if (ch == '-' && nextch == '>') { if (!lex_tokench(lex, nextch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } else lex_ungetch(lex, nextch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = TOKEN_OPERATOR); + return (lex->tok.ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_OPERATOR); } /* @@ -857,27 +1020,27 @@ int lex_do(lex_file *lex) if (!lex_tokench(lex, ch) || !lex_endtoken(lex)) { - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } - return (lex->tok->ttype = TOKEN_OPERATOR); + return (lex->tok.ttype = TOKEN_OPERATOR); } */ if (ch == '*' || ch == '/') /* *=, /= */ { if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); nextch = lex_getch(lex); if (nextch == '=') { if (!lex_tokench(lex, nextch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); } else lex_ungetch(lex, nextch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return (lex->tok->ttype = TOKEN_OPERATOR); + return (lex->tok.ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_OPERATOR); } if (isident_start(ch)) @@ -885,34 +1048,34 @@ int lex_do(lex_file *lex) const char *v; if (!lex_tokench(lex, ch)) - return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok.ttype = TOKEN_FATAL); if (!lex_finish_ident(lex)) { /* error? */ - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); } if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - lex->tok->ttype = TOKEN_IDENT; + return (lex->tok.ttype = TOKEN_FATAL); + lex->tok.ttype = TOKEN_IDENT; - v = lex->tok->value; + v = lex->tok.value; if (!strcmp(v, "void")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_VOID; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_VOID; } else if (!strcmp(v, "int")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_INTEGER; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_INTEGER; } else if (!strcmp(v, "float")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_FLOAT; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_FLOAT; } else if (!strcmp(v, "string")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_STRING; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_STRING; } else if (!strcmp(v, "entity")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_ENTITY; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_ENTITY; } else if (!strcmp(v, "vector")) { - lex->tok->ttype = TOKEN_TYPENAME; - lex->tok->constval.t = TYPE_VECTOR; + lex->tok.ttype = TOKEN_TYPENAME; + lex->tok.constval.t = TYPE_VECTOR; } else if (!strcmp(v, "for") || !strcmp(v, "while") || !strcmp(v, "do") || @@ -921,15 +1084,34 @@ int lex_do(lex_file *lex) !strcmp(v, "local") || !strcmp(v, "return") || !strcmp(v, "const")) - lex->tok->ttype = TOKEN_KEYWORD; + { + lex->tok.ttype = TOKEN_KEYWORD; + } + else if (opts_standard != COMPILER_QCC) + { + /* other standards reserve these keywords */ + if (!strcmp(v, "switch") || + !strcmp(v, "struct") || + !strcmp(v, "union") || + !strcmp(v, "break") || + !strcmp(v, "continue")) + { + lex->tok.ttype = TOKEN_KEYWORD; + } + } - return lex->tok->ttype; + return lex->tok.ttype; } if (ch == '"') { - lex->tok->ttype = lex_finish_string(lex, '"'); - while (lex->tok->ttype == TOKEN_STRINGCONST) + lex->flags.nodigraphs = true; + if (lex->flags.preprocessing && !lex_tokench(lex, ch)) + return TOKEN_FATAL; + lex->tok.ttype = lex_finish_string(lex, '"'); + if (lex->flags.preprocessing && !lex_tokench(lex, ch)) + return TOKEN_FATAL; + while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST) { /* Allow c style "string" "continuation" */ ch = lex_skipwhite(lex); @@ -938,11 +1120,12 @@ int lex_do(lex_file *lex) break; } - lex->tok->ttype = lex_finish_string(lex, '"'); + lex->tok.ttype = lex_finish_string(lex, '"'); } + lex->flags.nodigraphs = false; if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return lex->tok->ttype; + return (lex->tok.ttype = TOKEN_FATAL); + return lex->tok.ttype; } if (ch == '\'') @@ -952,33 +1135,38 @@ int lex_do(lex_file *lex) * Likewise actual unescaping has to be done by the parser. * The difference is we don't allow 'char' 'continuation'. */ - lex->tok->ttype = lex_finish_string(lex, '\''); - if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); + if (lex->flags.preprocessing && !lex_tokench(lex, ch)) + return TOKEN_FATAL; + lex->tok.ttype = lex_finish_string(lex, '\''); + if (lex->flags.preprocessing && !lex_tokench(lex, ch)) + return TOKEN_FATAL; + if (!lex_endtoken(lex)) + return (lex->tok.ttype = TOKEN_FATAL); /* It's a vector if we can successfully scan 3 floats */ #ifdef WIN32 - if (sscanf_s(lex->tok->value, " %f %f %f ", - &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3) + if (sscanf_s(lex->tok.value, " %f %f %f ", + &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3) #else - if (sscanf(lex->tok->value, " %f %f %f ", - &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3) + if (sscanf(lex->tok.value, " %f %f %f ", + &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3) #endif - { - lex->tok->ttype = TOKEN_VECTORCONST; - } - return lex->tok->ttype; + { + lex->tok.ttype = TOKEN_VECTORCONST; + } + + return lex->tok.ttype; } if (isdigit(ch)) { - lex->tok->ttype = lex_finish_digit(lex, ch); + lex->tok.ttype = lex_finish_digit(lex, ch); if (!lex_endtoken(lex)) - return (lex->tok->ttype = TOKEN_FATAL); - return lex->tok->ttype; + return (lex->tok.ttype = TOKEN_FATAL); + return lex->tok.ttype; } lexerror(lex, "unknown token"); - return (lex->tok->ttype = TOKEN_ERROR); + return (lex->tok.ttype = TOKEN_ERROR); }