lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 /*
  10  * List of Keywords
  11  */
  12
  13 /* original */
  14 static const char *keywords_qc[] = {
  15     "for", "do", "while",
  16     "if", "else",
  17     "local",
  18     "return",
  19     "const"
  20 };
  21 static size_t num_keywords_qc = sizeof(keywords_qc) / sizeof(keywords_qc[0]);
  22
  23 /* For fte/gmgqcc */
  24 static const char *keywords_fg[] = {
  25     "var",
  26     "switch", "case", "default",
  27     "struct", "union",
  28     "break", "continue"
  29 };
  30 static size_t num_keywords_fg = sizeof(keywords_fg) / sizeof(keywords_fg[0]);
  31
  32 /*
  33  * Lexer code
  34  */
  35
  36 char* *lex_filenames;
  37
  38 void lexerror(lex_file *lex, const char *fmt, ...)
  39 {
  40     va_list ap;
  41
  42     va_start(ap, fmt);
  43     if (lex)
  44         con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  45     else
  46         con_vprintmsg(LVL_ERROR, "", 0, "parse error", fmt, ap);
  47     va_end(ap);
  48 }
  49
  50 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  51 {
  52     va_list ap;
  53     int lvl = LVL_WARNING;
  54
  55     if (!OPTS_WARN(warntype))
  56         return false;
  57
  58     if (opts_werror)
  59         lvl = LVL_ERROR;
  60
  61     va_start(ap, fmt);
  62     con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  63     va_end(ap);
  64
  65     return opts_werror;
  66 }
  67
  68
  69 #if 0
  70 token* token_new()
  71 {
  72     token *tok = (token*)mem_a(sizeof(token));
  73     if (!tok)
  74         return NULL;
  75     memset(tok, 0, sizeof(*tok));
  76     return tok;
  77 }
  78
  79 void token_delete(token *self)
  80 {
  81     if (self->next && self->next->prev == self)
  82         self->next->prev = self->prev;
  83     if (self->prev && self->prev->next == self)
  84         self->prev->next = self->next;
  85     MEM_VECTOR_CLEAR(self, value);
  86     mem_d(self);
  87 }
  88
  89 token* token_copy(const token *cp)
  90 {
  91     token* self = token_new();
  92     if (!self)
  93         return NULL;
  94     /* copy the value */
  95     self->value_alloc = cp->value_count + 1;
  96     self->value_count = cp->value_count;
  97     self->value = (char*)mem_a(self->value_alloc);
  98     if (!self->value) {
  99         mem_d(self);
 100         return NULL;
 101     }
 102     memcpy(self->value, cp->value, cp->value_count);
 103     self->value[self->value_alloc-1] = 0;
 104
 105     /* rest */
 106     self->ctx = cp->ctx;
 107     self->ttype = cp->ttype;
 108     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
 109     return self;
 110 }
 111
 112 void token_delete_all(token *t)
 113 {
 114     token *n;
 115
 116     do {
 117         n = t->next;
 118         token_delete(t);
 119         t = n;
 120     } while(t);
 121 }
 122
 123 token* token_copy_all(const token *cp)
 124 {
 125     token *cur;
 126     token *out;
 127
 128     out = cur = token_copy(cp);
 129     if (!out)
 130         return NULL;
 131
 132     while (cp->next) {
 133         cp = cp->next;
 134         cur->next = token_copy(cp);
 135         if (!cur->next) {
 136             token_delete_all(out);
 137             return NULL;
 138         }
 139         cur->next->prev = cur;
 140         cur = cur->next;
 141     }
 142
 143     return out;
 144 }
 145 #else
 146 static void lex_token_new(lex_file *lex)
 147 {
 148 #if 0
 149     if (lex->tok)
 150         token_delete(lex->tok);
 151     lex->tok = token_new();
 152 #else
 153     if (lex->tok.value)
 154         vec_shrinkto(lex->tok.value, 0);
 155     lex->tok.constval.t  = 0;
 156     lex->tok.ctx.line = lex->sline;
 157     lex->tok.ctx.file = lex->name;
 158 #endif
 159 }
 160 #endif
 161
 162 lex_file* lex_open(const char *file)
 163 {
 164     lex_file *lex;
 165     FILE *in = util_fopen(file, "rb");
 166
 167     if (!in) {
 168         lexerror(NULL, "open failed: '%s'\n", file);
 169         return NULL;
 170     }
 171
 172     lex = (lex_file*)mem_a(sizeof(*lex));
 173     if (!lex) {
 174         fclose(in);
 175         lexerror(NULL, "out of memory\n");
 176         return NULL;
 177     }
 178
 179     memset(lex, 0, sizeof(*lex));
 180
 181     lex->file = in;
 182     lex->name = util_strdup(file);
 183     lex->line = 1; /* we start counting at 1 */
 184
 185     lex->peekpos = 0;
 186     lex->eof = false;
 187
 188     vec_push(lex_filenames, lex->name);
 189     return lex;
 190 }
 191
 192 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 193 {
 194     lex_file *lex;
 195
 196     lex = (lex_file*)mem_a(sizeof(*lex));
 197     if (!lex) {
 198         lexerror(NULL, "out of memory\n");
 199         return NULL;
 200     }
 201
 202     memset(lex, 0, sizeof(*lex));
 203
 204     lex->file = NULL;
 205     lex->open_string        = str;
 206     lex->open_string_length = len;
 207     lex->open_string_pos    = 0;
 208
 209     lex->name = util_strdup(name ? name : "<string-source>");
 210     lex->line = 1; /* we start counting at 1 */
 211
 212     lex->peekpos = 0;
 213     lex->eof = false;
 214
 215     vec_push(lex_filenames, lex->name);
 216
 217     return lex;
 218 }
 219
 220 void lex_cleanup(void)
 221 {
 222     size_t i;
 223     for (i = 0; i < vec_size(lex_filenames); ++i)
 224         mem_d(lex_filenames[i]);
 225     vec_free(lex_filenames);
 226 }
 227
 228 void lex_close(lex_file *lex)
 229 {
 230     size_t i;
 231     for (i = 0; i < vec_size(lex->frames); ++i)
 232         mem_d(lex->frames[i].name);
 233     vec_free(lex->frames);
 234
 235     if (lex->modelname)
 236         vec_free(lex->modelname);
 237
 238     if (lex->file)
 239         fclose(lex->file);
 240 #if 0
 241     if (lex->tok)
 242         token_delete(lex->tok);
 243 #else
 244     vec_free(lex->tok.value);
 245 #endif
 246     /* mem_d(lex->name); collected in lex_filenames */
 247     mem_d(lex);
 248 }
 249
 250 static int lex_fgetc(lex_file *lex)
 251 {
 252     if (lex->file)
 253         return fgetc(lex->file);
 254     if (lex->open_string) {
 255         if (lex->open_string_pos >= lex->open_string_length)
 256             return EOF;
 257         return lex->open_string[lex->open_string_pos++];
 258     }
 259     return EOF;
 260 }
 261
 262 /* Get or put-back data
 263  * The following to functions do NOT understand what kind of data they
 264  * are working on.
 265  * The are merely wrapping get/put in order to count line numbers.
 266  */
 267 static void lex_ungetch(lex_file *lex, int ch);
 268 static int lex_try_trigraph(lex_file *lex, int old)
 269 {
 270     int c2, c3;
 271     c2 = lex_fgetc(lex);
 272     if (c2 != '?') {
 273         lex_ungetch(lex, c2);
 274         return old;
 275     }
 276
 277     c3 = lex_fgetc(lex);
 278     switch (c3) {
 279         case '=': return '#';
 280         case '/': return '\\';
 281         case '\'': return '^';
 282         case '(': return '[';
 283         case ')': return ']';
 284         case '!': return '|';
 285         case '<': return '{';
 286         case '>': return '}';
 287         case '-': return '~';
 288         default:
 289             lex_ungetch(lex, c3);
 290             lex_ungetch(lex, c2);
 291             return old;
 292     }
 293 }
 294
 295 static int lex_try_digraph(lex_file *lex, int ch)
 296 {
 297     int c2;
 298     c2 = lex_fgetc(lex);
 299     if      (ch == '<' && c2 == ':')
 300         return '[';
 301     else if (ch == ':' && c2 == '>')
 302         return ']';
 303     else if (ch == '<' && c2 == '%')
 304         return '{';
 305     else if (ch == '%' && c2 == '>')
 306         return '}';
 307     else if (ch == '%' && c2 == ':')
 308         return '#';
 309     lex_ungetch(lex, c2);
 310     return ch;
 311 }
 312
 313 static int lex_getch(lex_file *lex)
 314 {
 315     int ch;
 316
 317     if (lex->peekpos) {
 318         lex->peekpos--;
 319         if (!lex->push_line && lex->peek[lex->peekpos] == '\n')
 320             lex->line++;
 321         return lex->peek[lex->peekpos];
 322     }
 323
 324     ch = lex_fgetc(lex);
 325     if (!lex->push_line && ch == '\n')
 326         lex->line++;
 327     else if (ch == '?')
 328         return lex_try_trigraph(lex, ch);
 329     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 330         return lex_try_digraph(lex, ch);
 331     return ch;
 332 }
 333
 334 static void lex_ungetch(lex_file *lex, int ch)
 335 {
 336     lex->peek[lex->peekpos++] = ch;
 337     if (!lex->push_line && ch == '\n')
 338         lex->line--;
 339 }
 340
 341 /* classify characters
 342  * some additions to the is*() functions of ctype.h
 343  */
 344
 345 /* Idents are alphanumberic, but they start with alpha or _ */
 346 static bool isident_start(int ch)
 347 {
 348     return isalpha(ch) || ch == '_';
 349 }
 350
 351 static bool isident(int ch)
 352 {
 353     return isident_start(ch) || isdigit(ch);
 354 }
 355
 356 /* isxdigit_only is used when we already know it's not a digit
 357  * and want to see if it's a hex digit anyway.
 358  */
 359 static bool isxdigit_only(int ch)
 360 {
 361     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 362 }
 363
 364 /* Append a character to the token buffer */
 365 static void lex_tokench(lex_file *lex, int ch)
 366 {
 367     vec_push(lex->tok.value, ch);
 368 }
 369
 370 /* Append a trailing null-byte */
 371 static void lex_endtoken(lex_file *lex)
 372 {
 373     vec_push(lex->tok.value, 0);
 374     vec_shrinkby(lex->tok.value, 1);
 375 }
 376
 377 static bool lex_try_pragma(lex_file *lex)
 378 {
 379     int ch;
 380     char *pragma  = NULL;
 381     char *command = NULL;
 382     char *param   = NULL;
 383     size_t line;
 384
 385     if (lex->flags.preprocessing)
 386         return false;
 387
 388     line = lex->line;
 389
 390     ch = lex_getch(lex);
 391     if (ch != '#') {
 392         lex_ungetch(lex, ch);
 393         return false;
 394     }
 395
 396     for (ch = lex_getch(lex); vec_size(pragma) < 8 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 397         vec_push(pragma, ch);
 398     vec_push(pragma, 0);
 399
 400     if (ch != ' ' || strcmp(pragma, "pragma")) {
 401         lex_ungetch(lex, ch);
 402         goto unroll;
 403     }
 404
 405     for (ch = lex_getch(lex); vec_size(command) < 32 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 406         vec_push(command, ch);
 407     vec_push(command, 0);
 408
 409     if (ch != '(') {
 410         lex_ungetch(lex, ch);
 411         goto unroll;
 412     }
 413
 414     for (ch = lex_getch(lex); vec_size(param) < 32 && ch != ')' && ch != '\n'; ch = lex_getch(lex))
 415         vec_push(param, ch);
 416     vec_push(param, 0);
 417
 418     if (ch != ')') {
 419         lex_ungetch(lex, ch);
 420         goto unroll;
 421     }
 422
 423     if (!strcmp(command, "push")) {
 424         if (!strcmp(param, "line")) {
 425             lex->push_line++;
 426             --line;
 427         }
 428         else
 429             goto unroll;
 430     }
 431     else if (!strcmp(command, "pop")) {
 432         if (!strcmp(param, "line")) {
 433             if (lex->push_line)
 434                 lex->push_line--;
 435             --line;
 436         }
 437         else
 438             goto unroll;
 439     }
 440     else if (!strcmp(command, "file")) {
 441         lex->name = util_strdup(param);
 442         vec_push(lex_filenames, lex->name);
 443     }
 444     else if (!strcmp(command, "line")) {
 445         line = strtol(param, NULL, 0)-1;
 446     }
 447     else
 448         goto unroll;
 449
 450     lex->line = line;
 451     while (ch != '\n' && ch != EOF)
 452         ch = lex_getch(lex);
 453     return true;
 454
 455 unroll:
 456     if (command) {
 457         vec_pop(command);
 458         while (vec_size(command)) {
 459             lex_ungetch(lex, vec_last(command));
 460             vec_pop(command);
 461         }
 462         vec_free(command);
 463     }
 464     if (command) {
 465         vec_pop(command);
 466         while (vec_size(command)) {
 467             lex_ungetch(lex, vec_last(command));
 468             vec_pop(command);
 469         }
 470         vec_free(command);
 471     }
 472     if (pragma) {
 473         vec_pop(pragma);
 474         while (vec_size(pragma)) {
 475             lex_ungetch(lex, vec_last(pragma));
 476             vec_pop(pragma);
 477         }
 478         vec_free(pragma);
 479     }
 480     lex_ungetch(lex, '#');
 481
 482     lex->line = line;
 483     return false;
 484 }
 485
 486 /* Skip whitespace and comments and return the first
 487  * non-white character.
 488  * As this makes use of the above getch() ungetch() functions,
 489  * we don't need to care at all about line numbering anymore.
 490  *
 491  * In theory, this function should only be used at the beginning
 492  * of lexing, or when we *know* the next character is part of the token.
 493  * Otherwise, if the parser throws an error, the linenumber may not be
 494  * the line of the error, but the line of the next token AFTER the error.
 495  *
 496  * This is currently only problematic when using c-like string-continuation,
 497  * since comments and whitespaces are allowed between 2 such strings.
 498  * Example:
 499 printf(   "line one\n"
 500 // A comment
 501           "A continuation of the previous string"
 502 // This line is skipped
 503       , foo);
 504
 505  * In this case, if the parse decides it didn't actually want a string,
 506  * and uses lex->line to print an error, it will show the ', foo);' line's
 507  * linenumber.
 508  *
 509  * On the other hand, the parser is supposed to remember the line of the next
 510  * token's beginning. In this case we would want skipwhite() to be called
 511  * AFTER reading a token, so that the parser, before reading the NEXT token,
 512  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 513  *
 514  * THIS SOLUTION
 515  *    here is to store the line of the first character after skipping
 516  *    the initial whitespace in lex->sline, this happens in lex_do.
 517  */
 518 static int lex_skipwhite(lex_file *lex)
 519 {
 520     int ch = 0;
 521     bool haswhite = false;
 522
 523     do
 524     {
 525         ch = lex_getch(lex);
 526         while (ch != EOF && isspace(ch)) {
 527             if (ch == '\n') {
 528                 if (lex_try_pragma(lex))
 529                     continue;
 530             }
 531             if (lex->flags.preprocessing) {
 532                 if (ch == '\n') {
 533                     /* end-of-line */
 534                     /* see if there was whitespace first */
 535                     if (haswhite) { /* (vec_size(lex->tok.value)) { */
 536                         lex_ungetch(lex, ch);
 537                         lex_endtoken(lex);
 538                         return TOKEN_WHITE;
 539                     }
 540                     /* otherwise return EOL */
 541                     return TOKEN_EOL;
 542                 }
 543                 haswhite = true;
 544                 lex_tokench(lex, ch);
 545             }
 546             ch = lex_getch(lex);
 547         }
 548
 549         if (ch == '/') {
 550             ch = lex_getch(lex);
 551             if (ch == '/')
 552             {
 553                 /* one line comment */
 554                 ch = lex_getch(lex);
 555
 556                 if (lex->flags.preprocessing) {
 557                     haswhite = true;
 558                     /*
 559                     lex_tokench(lex, '/');
 560                     lex_tokench(lex, '/');
 561                     */
 562                     lex_tokench(lex, ' ');
 563                     lex_tokench(lex, ' ');
 564                 }
 565
 566                 while (ch != EOF && ch != '\n') {
 567                     if (lex->flags.preprocessing)
 568                         lex_tokench(lex, ' '); /* ch); */
 569                     ch = lex_getch(lex);
 570                 }
 571                 if (lex->flags.preprocessing) {
 572                     lex_ungetch(lex, '\n');
 573                     lex_endtoken(lex);
 574                     return TOKEN_WHITE;
 575                 }
 576                 continue;
 577             }
 578             if (ch == '*')
 579             {
 580                 /* multiline comment */
 581                 if (lex->flags.preprocessing) {
 582                     haswhite = true;
 583                     /*
 584                     lex_tokench(lex, '/');
 585                     lex_tokench(lex, '*');
 586                     */
 587                     lex_tokench(lex, ' ');
 588                     lex_tokench(lex, ' ');
 589                 }
 590
 591                 while (ch != EOF)
 592                 {
 593                     ch = lex_getch(lex);
 594                     if (ch == '*') {
 595                         ch = lex_getch(lex);
 596                         if (ch == '/') {
 597                             if (lex->flags.preprocessing) {
 598                                 /*
 599                                 lex_tokench(lex, '*');
 600                                 lex_tokench(lex, '/');
 601                                 */
 602                                 lex_tokench(lex, ' ');
 603                                 lex_tokench(lex, ' ');
 604                             }
 605                             break;
 606                         }
 607                         lex_ungetch(lex, ch);
 608                     }
 609                     if (lex->flags.preprocessing) {
 610                         lex_tokench(lex, ' '); /* ch); */
 611                     }
 612                 }
 613                 ch = ' '; /* cause TRUE in the isspace check */
 614                 continue;
 615             }
 616             /* Otherwise roll back to the slash and break out of the loop */
 617             lex_ungetch(lex, ch);
 618             ch = '/';
 619             break;
 620         }
 621     } while (ch != EOF && isspace(ch));
 622
 623     if (haswhite) {
 624         lex_endtoken(lex);
 625         lex_ungetch(lex, ch);
 626         return TOKEN_WHITE;
 627     }
 628     return ch;
 629 }
 630
 631 /* Get a token */
 632 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 633 {
 634     int ch;
 635
 636     ch = lex_getch(lex);
 637     while (ch != EOF && isident(ch))
 638     {
 639         lex_tokench(lex, ch);
 640         ch = lex_getch(lex);
 641     }
 642
 643     /* last ch was not an ident ch: */
 644     lex_ungetch(lex, ch);
 645
 646     return true;
 647 }
 648
 649 /* read one ident for the frame list */
 650 static int lex_parse_frame(lex_file *lex)
 651 {
 652     int ch;
 653
 654     lex_token_new(lex);
 655
 656     ch = lex_getch(lex);
 657     while (ch != EOF && ch != '\n' && isspace(ch))
 658         ch = lex_getch(lex);
 659
 660     if (ch == '\n')
 661         return 1;
 662
 663     if (!isident_start(ch)) {
 664         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 665         return -1;
 666     }
 667
 668     lex_tokench(lex, ch);
 669     if (!lex_finish_ident(lex))
 670         return -1;
 671     lex_endtoken(lex);
 672     return 0;
 673 }
 674
 675 /* read a list of $frames */
 676 static bool lex_finish_frames(lex_file *lex)
 677 {
 678     do {
 679         size_t i;
 680         int    rc;
 681         frame_macro m;
 682
 683         rc = lex_parse_frame(lex);
 684         if (rc > 0) /* end of line */
 685             return true;
 686         if (rc < 0) /* error */
 687             return false;
 688
 689         for (i = 0; i < vec_size(lex->frames); ++i) {
 690             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 691                 lex->frames[i].value = lex->framevalue++;
 692                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 693                     return false;
 694                 break;
 695             }
 696         }
 697         if (i < vec_size(lex->frames))
 698             continue;
 699
 700         m.value = lex->framevalue++;
 701         m.name = util_strdup(lex->tok.value);
 702         vec_shrinkto(lex->tok.value, 0);
 703         vec_push(lex->frames, m);
 704     } while (true);
 705 }
 706
 707 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 708 {
 709     int ch = 0;
 710
 711     while (ch != EOF)
 712     {
 713         ch = lex_getch(lex);
 714         if (ch == quote)
 715             return TOKEN_STRINGCONST;
 716
 717         if (lex->flags.preprocessing && ch == '\\') {
 718             lex_tokench(lex, ch);
 719             ch = lex_getch(lex);
 720             if (ch == EOF) {
 721                 lexerror(lex, "unexpected end of file");
 722                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 723                 return (lex->tok.ttype = TOKEN_ERROR);
 724             }
 725             lex_tokench(lex, ch);
 726         }
 727         else if (ch == '\\') {
 728             ch = lex_getch(lex);
 729             if (ch == EOF) {
 730                 lexerror(lex, "unexpected end of file");
 731                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 732                 return (lex->tok.ttype = TOKEN_ERROR);
 733             }
 734
 735             switch (ch) {
 736             case '\\': break;
 737             case '\'': break;
 738             case '"':  break;
 739             case 'a':  ch = '\a'; break;
 740             case 'b':  ch = '\b'; break;
 741             case 'r':  ch = '\r'; break;
 742             case 'n':  ch = '\n'; break;
 743             case 't':  ch = '\t'; break;
 744             case 'f':  ch = '\f'; break;
 745             case 'v':  ch = '\v'; break;
 746             default:
 747                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 748                 /* so we just add the character plus backslash no matter what it actually is */
 749                 lex_tokench(lex, '\\');
 750             }
 751             /* add the character finally */
 752             lex_tokench(lex, ch);
 753         }
 754         else
 755             lex_tokench(lex, ch);
 756     }
 757     lexerror(lex, "unexpected end of file within string constant");
 758     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 759     return (lex->tok.ttype = TOKEN_ERROR);
 760 }
 761
 762 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 763 {
 764     bool ishex = false;
 765
 766     int  ch = lastch;
 767
 768     /* parse a number... */
 769     lex->tok.ttype = TOKEN_INTCONST;
 770
 771     lex_tokench(lex, ch);
 772
 773     ch = lex_getch(lex);
 774     if (ch != '.' && !isdigit(ch))
 775     {
 776         if (lastch != '0' || ch != 'x')
 777         {
 778             /* end of the number or EOF */
 779             lex_ungetch(lex, ch);
 780             lex_endtoken(lex);
 781
 782             lex->tok.constval.i = lastch - '0';
 783             return lex->tok.ttype;
 784         }
 785
 786         ishex = true;
 787     }
 788
 789     /* EOF would have been caught above */
 790
 791     if (ch != '.')
 792     {
 793         lex_tokench(lex, ch);
 794         ch = lex_getch(lex);
 795         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 796         {
 797             lex_tokench(lex, ch);
 798             ch = lex_getch(lex);
 799         }
 800     }
 801     /* NOT else, '.' can come from above as well */
 802     if (ch == '.' && !ishex)
 803     {
 804         /* Allow floating comma in non-hex mode */
 805         lex->tok.ttype = TOKEN_FLOATCONST;
 806         lex_tokench(lex, ch);
 807
 808         /* continue digits-only */
 809         ch = lex_getch(lex);
 810         while (isdigit(ch))
 811         {
 812             lex_tokench(lex, ch);
 813             ch = lex_getch(lex);
 814         }
 815     }
 816     /* put back the last character */
 817     /* but do not put back the trailing 'f' or a float */
 818     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 819         ch = lex_getch(lex);
 820
 821     /* generally we don't want words to follow numbers: */
 822     if (isident(ch)) {
 823         lexerror(lex, "unexpected trailing characters after number");
 824         return (lex->tok.ttype = TOKEN_ERROR);
 825     }
 826     lex_ungetch(lex, ch);
 827
 828     lex_endtoken(lex);
 829     if (lex->tok.ttype == TOKEN_FLOATCONST)
 830         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 831     else
 832         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 833     return lex->tok.ttype;
 834 }
 835
 836 int lex_do(lex_file *lex)
 837 {
 838     int ch, nextch, thirdch;
 839
 840     lex_token_new(lex);
 841 #if 0
 842     if (!lex->tok)
 843         return TOKEN_FATAL;
 844 #endif
 845
 846     while (true) {
 847         ch = lex_skipwhite(lex);
 848         if (!lex->flags.mergelines || ch != '\\')
 849             break;
 850         ch = lex_getch(lex);
 851         if (ch != '\n') {
 852             lex_ungetch(lex, ch);
 853             ch = '\\';
 854             break;
 855         }
 856         /* we reached a linemerge */
 857         lex_tokench(lex, '\n');
 858         continue;
 859     }
 860
 861     lex->sline = lex->line;
 862     lex->tok.ctx.line = lex->sline;
 863     lex->tok.ctx.file = lex->name;
 864
 865     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 866         return (lex->tok.ttype = ch);
 867     }
 868
 869     if (lex->eof)
 870         return (lex->tok.ttype = TOKEN_FATAL);
 871
 872     if (ch == EOF) {
 873         lex->eof = true;
 874         return (lex->tok.ttype = TOKEN_EOF);
 875     }
 876
 877     /* modelgen / spiritgen commands */
 878     if (ch == '$') {
 879         const char *v;
 880         size_t frame;
 881
 882         ch = lex_getch(lex);
 883         if (!isident_start(ch)) {
 884             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 885             return lex_do(lex);
 886         }
 887         lex_tokench(lex, ch);
 888         if (!lex_finish_ident(lex))
 889             return (lex->tok.ttype = TOKEN_ERROR);
 890         lex_endtoken(lex);
 891         /* skip the known commands */
 892         v = lex->tok.value;
 893
 894         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 895         {
 896             /* frame/framesave command works like an enum
 897              * similar to fteqcc we handle this in the lexer.
 898              * The reason for this is that it is sensitive to newlines,
 899              * which the parser is unaware of
 900              */
 901             if (!lex_finish_frames(lex))
 902                  return (lex->tok.ttype = TOKEN_ERROR);
 903             return lex_do(lex);
 904         }
 905
 906         if (!strcmp(v, "framevalue"))
 907         {
 908             ch = lex_getch(lex);
 909             while (ch != EOF && isspace(ch) && ch != '\n')
 910                 ch = lex_getch(lex);
 911
 912             if (!isdigit(ch)) {
 913                 lexerror(lex, "$framevalue requires an integer parameter");
 914                 return lex_do(lex);
 915             }
 916
 917             lex_token_new(lex);
 918             lex->tok.ttype = lex_finish_digit(lex, ch);
 919             lex_endtoken(lex);
 920             if (lex->tok.ttype != TOKEN_INTCONST) {
 921                 lexerror(lex, "$framevalue requires an integer parameter");
 922                 return lex_do(lex);
 923             }
 924             lex->framevalue = lex->tok.constval.i;
 925             return lex_do(lex);
 926         }
 927
 928         if (!strcmp(v, "framerestore"))
 929         {
 930             int rc;
 931
 932             lex_token_new(lex);
 933
 934             rc = lex_parse_frame(lex);
 935
 936             if (rc > 0) {
 937                 lexerror(lex, "$framerestore requires a framename parameter");
 938                 return lex_do(lex);
 939             }
 940             if (rc < 0)
 941                 return (lex->tok.ttype = TOKEN_FATAL);
 942
 943             v = lex->tok.value;
 944             for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 945                 if (!strcmp(v, lex->frames[frame].name)) {
 946                     lex->framevalue = lex->frames[frame].value;
 947                     return lex_do(lex);
 948                 }
 949             }
 950             lexerror(lex, "unknown framename `%s`", v);
 951             return lex_do(lex);
 952         }
 953
 954         if (!strcmp(v, "modelname"))
 955         {
 956             int rc;
 957
 958             lex_token_new(lex);
 959
 960             rc = lex_parse_frame(lex);
 961
 962             if (rc > 0) {
 963                 lexerror(lex, "$modelname requires a parameter");
 964                 return lex_do(lex);
 965             }
 966             if (rc < 0)
 967                 return (lex->tok.ttype = TOKEN_FATAL);
 968
 969             v = lex->tok.value;
 970             if (lex->modelname) {
 971                 frame_macro m;
 972                 m.value = lex->framevalue;
 973                 m.name = lex->modelname;
 974                 lex->modelname = NULL;
 975                 vec_push(lex->frames, m);
 976             }
 977             lex->modelname = lex->tok.value;
 978             lex->tok.value = NULL;
 979             return lex_do(lex);
 980         }
 981
 982         if (!strcmp(v, "flush"))
 983         {
 984             size_t fi;
 985             for (fi = 0; fi < vec_size(lex->frames); ++fi)
 986                 mem_d(lex->frames[fi].name);
 987             vec_free(lex->frames);
 988             /* skip line (fteqcc does it too) */
 989             ch = lex_getch(lex);
 990             while (ch != EOF && ch != '\n')
 991                 ch = lex_getch(lex);
 992             return lex_do(lex);
 993         }
 994
 995         if (!strcmp(v, "cd") ||
 996             !strcmp(v, "origin") ||
 997             !strcmp(v, "base") ||
 998             !strcmp(v, "flags") ||
 999             !strcmp(v, "scale") ||
1000             !strcmp(v, "skin"))
1001         {
1002             /* skip line */
1003             ch = lex_getch(lex);
1004             while (ch != EOF && ch != '\n')
1005                 ch = lex_getch(lex);
1006             return lex_do(lex);
1007         }
1008
1009         for (frame = 0; frame < vec_size(lex->frames); ++frame) {
1010             if (!strcmp(v, lex->frames[frame].name)) {
1011                 lex->tok.constval.i = lex->frames[frame].value;
1012                 return (lex->tok.ttype = TOKEN_INTCONST);
1013             }
1014         }
1015
1016         lexerror(lex, "invalid frame macro");
1017         return lex_do(lex);
1018     }
1019
1020     /* single-character tokens */
1021     switch (ch)
1022     {
1023         case '[':
1024         case '(':
1025         case ':':
1026         case '?':
1027             lex_tokench(lex, ch);
1028             lex_endtoken(lex);
1029             if (lex->flags.noops)
1030                 return (lex->tok.ttype = ch);
1031             else
1032                 return (lex->tok.ttype = TOKEN_OPERATOR);
1033         case ')':
1034         case ';':
1035         case '{':
1036         case '}':
1037         case ']':
1038
1039         case '#':
1040             lex_tokench(lex, ch);
1041             lex_endtoken(lex);
1042             return (lex->tok.ttype = ch);
1043         default:
1044             break;
1045     }
1046
1047     if (lex->flags.noops)
1048     {
1049         /* Detect characters early which are normally
1050          * operators OR PART of an operator.
1051          */
1052         switch (ch)
1053         {
1054             case '+':
1055             case '-':
1056             case '*':
1057             case '/':
1058             case '<':
1059             case '>':
1060             case '=':
1061             case '&':
1062             case '|':
1063             case '^':
1064             case '~':
1065             case ',':
1066             case '!':
1067                 lex_tokench(lex, ch);
1068                 lex_endtoken(lex);
1069                 return (lex->tok.ttype = ch);
1070             default:
1071                 break;
1072         }
1073
1074         if (ch == '.')
1075         {
1076             lex_tokench(lex, ch);
1077             /* peak ahead once */
1078             nextch = lex_getch(lex);
1079             if (nextch != '.') {
1080                 lex_ungetch(lex, nextch);
1081                 lex_endtoken(lex);
1082                 return (lex->tok.ttype = ch);
1083             }
1084             /* peak ahead again */
1085             nextch = lex_getch(lex);
1086             if (nextch != '.') {
1087                 lex_ungetch(lex, nextch);
1088                 lex_ungetch(lex, nextch);
1089                 lex_endtoken(lex);
1090                 return (lex->tok.ttype = ch);
1091             }
1092             /* fill the token to be "..." */
1093             lex_tokench(lex, ch);
1094             lex_tokench(lex, ch);
1095             lex_endtoken(lex);
1096             return (lex->tok.ttype = TOKEN_DOTS);
1097         }
1098     }
1099
1100     if (ch == ',' || ch == '.') {
1101         lex_tokench(lex, ch);
1102         lex_endtoken(lex);
1103         return (lex->tok.ttype = TOKEN_OPERATOR);
1104     }
1105
1106     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
1107         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
1108         ch == '=' || ch == '!' || /* ==, != */
1109         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
1110     {
1111         lex_tokench(lex, ch);
1112
1113         nextch = lex_getch(lex);
1114         if (nextch == ch || nextch == '=') {
1115             lex_tokench(lex, nextch);
1116         } else if (ch == '-' && nextch == '>') {
1117             lex_tokench(lex, nextch);
1118         } else if (ch == '&' && nextch == '~') {
1119             thirdch = lex_getch(lex);
1120             if (thirdch != '=') {
1121                 lex_ungetch(lex, thirdch);
1122                 lex_ungetch(lex, nextch);
1123             }
1124             else {
1125                 lex_tokench(lex, nextch);
1126                 lex_tokench(lex, thirdch);
1127             }
1128         } else
1129             lex_ungetch(lex, nextch);
1130
1131         lex_endtoken(lex);
1132         return (lex->tok.ttype = TOKEN_OPERATOR);
1133     }
1134
1135     /*
1136     if (ch == '^' || ch == '~' || ch == '!')
1137     {
1138         lex_tokench(lex, ch);
1139         lex_endtoken(lex);
1140         return (lex->tok.ttype = TOKEN_OPERATOR);
1141     }
1142     */
1143
1144     if (ch == '*' || ch == '/') /* *=, /= */
1145     {
1146         lex_tokench(lex, ch);
1147
1148         nextch = lex_getch(lex);
1149         if (nextch == '=') {
1150             lex_tokench(lex, nextch);
1151         } else
1152             lex_ungetch(lex, nextch);
1153
1154         lex_endtoken(lex);
1155         return (lex->tok.ttype = TOKEN_OPERATOR);
1156     }
1157
1158     if (isident_start(ch))
1159     {
1160         const char *v;
1161
1162         lex_tokench(lex, ch);
1163         if (!lex_finish_ident(lex)) {
1164             /* error? */
1165             return (lex->tok.ttype = TOKEN_ERROR);
1166         }
1167         lex_endtoken(lex);
1168         lex->tok.ttype = TOKEN_IDENT;
1169
1170         v = lex->tok.value;
1171         if (!strcmp(v, "void")) {
1172             lex->tok.ttype = TOKEN_TYPENAME;
1173             lex->tok.constval.t = TYPE_VOID;
1174         } else if (!strcmp(v, "int")) {
1175             lex->tok.ttype = TOKEN_TYPENAME;
1176             lex->tok.constval.t = TYPE_INTEGER;
1177         } else if (!strcmp(v, "float")) {
1178             lex->tok.ttype = TOKEN_TYPENAME;
1179             lex->tok.constval.t = TYPE_FLOAT;
1180         } else if (!strcmp(v, "string")) {
1181             lex->tok.ttype = TOKEN_TYPENAME;
1182             lex->tok.constval.t = TYPE_STRING;
1183         } else if (!strcmp(v, "entity")) {
1184             lex->tok.ttype = TOKEN_TYPENAME;
1185             lex->tok.constval.t = TYPE_ENTITY;
1186         } else if (!strcmp(v, "vector")) {
1187             lex->tok.ttype = TOKEN_TYPENAME;
1188             lex->tok.constval.t = TYPE_VECTOR;
1189         } else {
1190             size_t kw;
1191             for (kw = 0; kw < num_keywords_qc; ++kw) {
1192                 if (!strcmp(v, keywords_qc[kw]))
1193                     return (lex->tok.ttype = TOKEN_KEYWORD);
1194             }
1195             if (opts_standard != COMPILER_QCC) {
1196                 for (kw = 0; kw < num_keywords_fg; ++kw) {
1197                     if (!strcmp(v, keywords_fg[kw]))
1198                         return (lex->tok.ttype = TOKEN_KEYWORD);
1199                 }
1200             }
1201         }
1202
1203         return lex->tok.ttype;
1204     }
1205
1206     if (ch == '"')
1207     {
1208         lex->flags.nodigraphs = true;
1209         if (lex->flags.preprocessing)
1210             lex_tokench(lex, ch);
1211         lex->tok.ttype = lex_finish_string(lex, '"');
1212         if (lex->flags.preprocessing)
1213             lex_tokench(lex, ch);
1214         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1215         {
1216             /* Allow c style "string" "continuation" */
1217             ch = lex_skipwhite(lex);
1218             if (ch != '"') {
1219                 lex_ungetch(lex, ch);
1220                 break;
1221             }
1222
1223             lex->tok.ttype = lex_finish_string(lex, '"');
1224         }
1225         lex->flags.nodigraphs = false;
1226         lex_endtoken(lex);
1227         return lex->tok.ttype;
1228     }
1229
1230     if (ch == '\'')
1231     {
1232         /* we parse character constants like string,
1233          * but return TOKEN_CHARCONST, or a vector type if it fits...
1234          * Likewise actual unescaping has to be done by the parser.
1235          * The difference is we don't allow 'char' 'continuation'.
1236          */
1237         if (lex->flags.preprocessing)
1238             lex_tokench(lex, ch);
1239         lex->tok.ttype = lex_finish_string(lex, '\'');
1240         if (lex->flags.preprocessing)
1241             lex_tokench(lex, ch);
1242         lex_endtoken(lex);
1243
1244          /* It's a vector if we can successfully scan 3 floats */
1245 #ifdef WIN32
1246         if (sscanf_s(lex->tok.value, " %f %f %f ",
1247                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1248 #else
1249         if (sscanf(lex->tok.value, " %f %f %f ",
1250                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1251 #endif
1252
1253         {
1254              lex->tok.ttype = TOKEN_VECTORCONST;
1255         }
1256
1257         return lex->tok.ttype;
1258     }
1259
1260     if (isdigit(ch))
1261     {
1262         lex->tok.ttype = lex_finish_digit(lex, ch);
1263         lex_endtoken(lex);
1264         return lex->tok.ttype;
1265     }
1266
1267     lexerror(lex, "unknown token");
1268     return (lex->tok.ttype = TOKEN_ERROR);
1269 }