X-Git-Url: https://git.xonotic.org/?a=blobdiff_plain;f=lexer.c;h=08a63d624bb7cfc258c9e654aadae2cefb95ed05;hb=b219d4b468e5393d693a6c37b6748f1a1579f9e6;hp=373009394e811a47f0c31ccc944d92b8867c3ad9;hpb=e6cebc3ea35b3e96282e88f2792b8a2df68a868d;p=xonotic%2Fgmqcc.git diff --git a/lexer.c b/lexer.c index 3730093..08a63d6 100644 --- a/lexer.c +++ b/lexer.c @@ -225,6 +225,24 @@ static int lex_try_trigraph(lex_file *lex, int old) } } +static int lex_try_digraph(lex_file *lex, int ch) +{ + int c2; + c2 = fgetc(lex->file); + if (ch == '<' && c2 == ':') + return '['; + else if (ch == ':' && c2 == '>') + return ']'; + else if (ch == '<' && c2 == '%') + return '{'; + else if (ch == '%' && c2 == '>') + return '}'; + else if (ch == '%' && c2 == ':') + return '#'; + lex_ungetch(lex, c2); + return ch; +} + static int lex_getch(lex_file *lex) { int ch; @@ -241,6 +259,8 @@ static int lex_getch(lex_file *lex) lex->line++; else if (ch == '?') return lex_try_trigraph(lex, ch); + else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%')) + return lex_try_digraph(lex, ch); return ch; } @@ -274,6 +294,27 @@ static bool isxdigit_only(int ch) return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } +/* Append a character to the token buffer */ +static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) +{ + if (!token_value_add(&lex->tok, ch)) { + lexerror(lex, "out of memory"); + return false; + } + return true; +} + +/* Append a trailing null-byte */ +static bool GMQCC_WARN lex_endtoken(lex_file *lex) +{ + if (!token_value_add(&lex->tok, 0)) { + lexerror(lex, "out of memory"); + return false; + } + lex->tok.value_count--; + return true; +} + /* Skip whitespace and comments and return the first * non-white character. * As this makes use of the above getch() ungetch() functions, @@ -309,52 +350,95 @@ printf( "line one\n" static int lex_skipwhite(lex_file *lex) { int ch = 0; + bool haswhite = false; do { ch = lex_getch(lex); - while (ch != EOF && isspace(ch)) ch = lex_getch(lex); + while (ch != EOF && isspace(ch)) { + if (lex->flags.preprocessing) { + if (ch == '\n') { + /* end-of-line */ + /* see if there was whitespace first */ + if (haswhite) { /* (lex->tok.value_count) { */ + lex_ungetch(lex, ch); + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + return TOKEN_WHITE; + } + /* otherwise return EOL */ + return TOKEN_EOL; + } + haswhite = true; + if (!lex_tokench(lex, ch)) + return TOKEN_FATAL; + } + ch = lex_getch(lex); + } if (ch == '/') { ch = lex_getch(lex); if (ch == '/') { /* one line comment */ + haswhite = true; ch = lex_getch(lex); - /* check for special: '/', '/', '*', '/' */ - if (ch == '*') { - ch = lex_getch(lex); - if (ch == '/') { - ch = ' '; - continue; + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, '/') || + !lex_tokench(lex, '/')) + { + return TOKEN_FATAL; } } while (ch != EOF && ch != '\n') { + if (lex->flags.preprocessing && !lex_tokench(lex, ch)) + return TOKEN_FATAL; ch = lex_getch(lex); } + if (lex->flags.preprocessing) { + lex_ungetch(lex, '\n'); + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + return TOKEN_WHITE; + } continue; } if (ch == '*') { /* multiline comment */ + haswhite = true; + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, '/') || + !lex_tokench(lex, '*')) + { + return TOKEN_FATAL; + } + } + while (ch != EOF) { ch = lex_getch(lex); if (ch == '*') { ch = lex_getch(lex); if (ch == '/') { - ch = lex_getch(lex); + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, '*') || + !lex_tokench(lex, '/')) + { + return TOKEN_FATAL; + } + } break; } } + if (lex->flags.preprocessing) { + if (!lex_tokench(lex, ch)) + return TOKEN_FATAL; + } } - if (ch == '/') /* allow *//* direct following comment */ - { - lex_ungetch(lex, ch); - ch = ' '; /* cause TRUE in the isspace check */ - } + ch = ' '; /* cause TRUE in the isspace check */ continue; } /* Otherwise roll back to the slash and break out of the loop */ @@ -364,28 +448,13 @@ static int lex_skipwhite(lex_file *lex) } } while (ch != EOF && isspace(ch)); - return ch; -} - -/* Append a character to the token buffer */ -static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) -{ - if (!token_value_add(&lex->tok, ch)) { - lexerror(lex, "out of memory"); - return false; - } - return true; -} - -/* Append a trailing null-byte */ -static bool GMQCC_WARN lex_endtoken(lex_file *lex) -{ - if (!token_value_add(&lex->tok, 0)) { - lexerror(lex, "out of memory"); - return false; + if (haswhite) { + if (!lex_endtoken(lex)) + return TOKEN_FATAL; + lex_ungetch(lex, ch); + return TOKEN_WHITE; } - lex->tok.value_count--; - return true; + return ch; } /* Get a token */ @@ -612,6 +681,10 @@ int lex_do(lex_file *lex) lex->tok.ctx.line = lex->sline; lex->tok.ctx.file = lex->name; + if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) { + return (lex->tok.ttype = ch); + } + if (lex->eof) return (lex->tok.ttype = TOKEN_FATAL); @@ -971,13 +1044,28 @@ int lex_do(lex_file *lex) !strcmp(v, "local") || !strcmp(v, "return") || !strcmp(v, "const")) + { lex->tok.ttype = TOKEN_KEYWORD; + } + else if (opts_standard != COMPILER_QCC) + { + /* other standards reserve these keywords */ + if (!strcmp(v, "switch") || + !strcmp(v, "struct") || + !strcmp(v, "union") || + !strcmp(v, "break") || + !strcmp(v, "continue")) + { + lex->tok.ttype = TOKEN_KEYWORD; + } + } return lex->tok.ttype; } if (ch == '"') { + lex->flags.nodigraphs = true; lex->tok.ttype = lex_finish_string(lex, '"'); while (lex->tok.ttype == TOKEN_STRINGCONST) { @@ -990,6 +1078,7 @@ int lex_do(lex_file *lex) lex->tok.ttype = lex_finish_string(lex, '"'); } + lex->flags.nodigraphs = false; if (!lex_endtoken(lex)) return (lex->tok.ttype = TOKEN_FATAL); return lex->tok.ttype;