lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 /*
  10  * List of Keywords
  11  */
  12
  13 /* original */
  14 static const char *keywords_qc[] = {
  15     "for", "do", "while",
  16     "if", "else",
  17     "local",
  18     "return",
  19     "const"
  20 };
  21 static size_t num_keywords_qc = sizeof(keywords_qc) / sizeof(keywords_qc[0]);
  22
  23 /* For fte/gmgqcc */
  24 static const char *keywords_fg[] = {
  25     "var",
  26     "switch", "case", "default",
  27     "struct", "union",
  28     "break", "continue"
  29 };
  30 static size_t num_keywords_fg = sizeof(keywords_fg) / sizeof(keywords_fg[0]);
  31
  32 /*
  33  * Lexer code
  34  */
  35
  36 char* *lex_filenames;
  37
  38 void lexerror(lex_file *lex, const char *fmt, ...)
  39 {
  40     va_list ap;
  41
  42     va_start(ap, fmt);
  43     if (lex)
  44         con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  45     else
  46         con_vprintmsg(LVL_ERROR, "", 0, "parse error", fmt, ap);
  47     va_end(ap);
  48 }
  49
  50 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  51 {
  52     va_list ap;
  53     int lvl = LVL_WARNING;
  54
  55     if (!OPTS_WARN(warntype))
  56         return false;
  57
  58     if (opts_werror)
  59         lvl = LVL_ERROR;
  60
  61     va_start(ap, fmt);
  62     con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  63     va_end(ap);
  64
  65     return opts_werror;
  66 }
  67
  68
  69 #if 0
  70 token* token_new()
  71 {
  72     token *tok = (token*)mem_a(sizeof(token));
  73     if (!tok)
  74         return NULL;
  75     memset(tok, 0, sizeof(*tok));
  76     return tok;
  77 }
  78
  79 void token_delete(token *self)
  80 {
  81     if (self->next && self->next->prev == self)
  82         self->next->prev = self->prev;
  83     if (self->prev && self->prev->next == self)
  84         self->prev->next = self->next;
  85     MEM_VECTOR_CLEAR(self, value);
  86     mem_d(self);
  87 }
  88
  89 token* token_copy(const token *cp)
  90 {
  91     token* self = token_new();
  92     if (!self)
  93         return NULL;
  94     /* copy the value */
  95     self->value_alloc = cp->value_count + 1;
  96     self->value_count = cp->value_count;
  97     self->value = (char*)mem_a(self->value_alloc);
  98     if (!self->value) {
  99         mem_d(self);
 100         return NULL;
 101     }
 102     memcpy(self->value, cp->value, cp->value_count);
 103     self->value[self->value_alloc-1] = 0;
 104
 105     /* rest */
 106     self->ctx = cp->ctx;
 107     self->ttype = cp->ttype;
 108     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
 109     return self;
 110 }
 111
 112 void token_delete_all(token *t)
 113 {
 114     token *n;
 115
 116     do {
 117         n = t->next;
 118         token_delete(t);
 119         t = n;
 120     } while(t);
 121 }
 122
 123 token* token_copy_all(const token *cp)
 124 {
 125     token *cur;
 126     token *out;
 127
 128     out = cur = token_copy(cp);
 129     if (!out)
 130         return NULL;
 131
 132     while (cp->next) {
 133         cp = cp->next;
 134         cur->next = token_copy(cp);
 135         if (!cur->next) {
 136             token_delete_all(out);
 137             return NULL;
 138         }
 139         cur->next->prev = cur;
 140         cur = cur->next;
 141     }
 142
 143     return out;
 144 }
 145 #else
 146 static void lex_token_new(lex_file *lex)
 147 {
 148 #if 0
 149     if (lex->tok)
 150         token_delete(lex->tok);
 151     lex->tok = token_new();
 152 #else
 153     if (lex->tok.value)
 154         vec_shrinkto(lex->tok.value, 0);
 155     lex->tok.constval.t  = 0;
 156     lex->tok.ctx.line = lex->sline;
 157     lex->tok.ctx.file = lex->name;
 158 #endif
 159 }
 160 #endif
 161
 162 lex_file* lex_open(const char *file)
 163 {
 164     lex_file *lex;
 165     FILE *in = util_fopen(file, "rb");
 166
 167     if (!in) {
 168         lexerror(NULL, "open failed: '%s'\n", file);
 169         return NULL;
 170     }
 171
 172     lex = (lex_file*)mem_a(sizeof(*lex));
 173     if (!lex) {
 174         fclose(in);
 175         lexerror(NULL, "out of memory\n");
 176         return NULL;
 177     }
 178
 179     memset(lex, 0, sizeof(*lex));
 180
 181     lex->file = in;
 182     lex->name = util_strdup(file);
 183     lex->line = 1; /* we start counting at 1 */
 184
 185     lex->peekpos = 0;
 186     lex->eof = false;
 187
 188     vec_push(lex_filenames, lex->name);
 189     return lex;
 190 }
 191
 192 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 193 {
 194     lex_file *lex;
 195
 196     lex = (lex_file*)mem_a(sizeof(*lex));
 197     if (!lex) {
 198         lexerror(NULL, "out of memory\n");
 199         return NULL;
 200     }
 201
 202     memset(lex, 0, sizeof(*lex));
 203
 204     lex->file = NULL;
 205     lex->open_string        = str;
 206     lex->open_string_length = len;
 207     lex->open_string_pos    = 0;
 208
 209     lex->name = util_strdup(name ? name : "<string-source>");
 210     lex->line = 1; /* we start counting at 1 */
 211
 212     lex->peekpos = 0;
 213     lex->eof = false;
 214
 215     vec_push(lex_filenames, lex->name);
 216
 217     return lex;
 218 }
 219
 220 void lex_cleanup(void)
 221 {
 222     size_t i;
 223     for (i = 0; i < vec_size(lex_filenames); ++i)
 224         mem_d(lex_filenames[i]);
 225     vec_free(lex_filenames);
 226 }
 227
 228 void lex_close(lex_file *lex)
 229 {
 230     size_t i;
 231     for (i = 0; i < vec_size(lex->frames); ++i)
 232         mem_d(lex->frames[i].name);
 233     vec_free(lex->frames);
 234
 235     if (lex->modelname)
 236         vec_free(lex->modelname);
 237
 238     if (lex->file)
 239         fclose(lex->file);
 240 #if 0
 241     if (lex->tok)
 242         token_delete(lex->tok);
 243 #else
 244     vec_free(lex->tok.value);
 245 #endif
 246     /* mem_d(lex->name); collected in lex_filenames */
 247     mem_d(lex);
 248 }
 249
 250 static int lex_fgetc(lex_file *lex)
 251 {
 252     if (lex->file)
 253         return fgetc(lex->file);
 254     if (lex->open_string) {
 255         if (lex->open_string_pos >= lex->open_string_length)
 256             return EOF;
 257         return lex->open_string[lex->open_string_pos++];
 258     }
 259     return EOF;
 260 }
 261
 262 /* Get or put-back data
 263  * The following to functions do NOT understand what kind of data they
 264  * are working on.
 265  * The are merely wrapping get/put in order to count line numbers.
 266  */
 267 static void lex_ungetch(lex_file *lex, int ch);
 268 static int lex_try_trigraph(lex_file *lex, int old)
 269 {
 270     int c2, c3;
 271     c2 = lex_fgetc(lex);
 272     if (c2 != '?') {
 273         lex_ungetch(lex, c2);
 274         return old;
 275     }
 276
 277     c3 = lex_fgetc(lex);
 278     switch (c3) {
 279         case '=': return '#';
 280         case '/': return '\\';
 281         case '\'': return '^';
 282         case '(': return '[';
 283         case ')': return ']';
 284         case '!': return '|';
 285         case '<': return '{';
 286         case '>': return '}';
 287         case '-': return '~';
 288         default:
 289             lex_ungetch(lex, c3);
 290             lex_ungetch(lex, c2);
 291             return old;
 292     }
 293 }
 294
 295 static int lex_try_digraph(lex_file *lex, int ch)
 296 {
 297     int c2;
 298     c2 = lex_fgetc(lex);
 299     if      (ch == '<' && c2 == ':')
 300         return '[';
 301     else if (ch == ':' && c2 == '>')
 302         return ']';
 303     else if (ch == '<' && c2 == '%')
 304         return '{';
 305     else if (ch == '%' && c2 == '>')
 306         return '}';
 307     else if (ch == '%' && c2 == ':')
 308         return '#';
 309     lex_ungetch(lex, c2);
 310     return ch;
 311 }
 312
 313 static int lex_getch(lex_file *lex)
 314 {
 315     int ch;
 316
 317     if (lex->peekpos) {
 318         lex->peekpos--;
 319         if (!lex->push_line && lex->peek[lex->peekpos] == '\n')
 320             lex->line++;
 321         return lex->peek[lex->peekpos];
 322     }
 323
 324     ch = lex_fgetc(lex);
 325     if (!lex->push_line && ch == '\n')
 326         lex->line++;
 327     else if (ch == '?')
 328         return lex_try_trigraph(lex, ch);
 329     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 330         return lex_try_digraph(lex, ch);
 331     return ch;
 332 }
 333
 334 static void lex_ungetch(lex_file *lex, int ch)
 335 {
 336     lex->peek[lex->peekpos++] = ch;
 337     if (!lex->push_line && ch == '\n')
 338         lex->line--;
 339 }
 340
 341 /* classify characters
 342  * some additions to the is*() functions of ctype.h
 343  */
 344
 345 /* Idents are alphanumberic, but they start with alpha or _ */
 346 static bool isident_start(int ch)
 347 {
 348     return isalpha(ch) || ch == '_';
 349 }
 350
 351 static bool isident(int ch)
 352 {
 353     return isident_start(ch) || isdigit(ch);
 354 }
 355
 356 /* isxdigit_only is used when we already know it's not a digit
 357  * and want to see if it's a hex digit anyway.
 358  */
 359 static bool isxdigit_only(int ch)
 360 {
 361     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 362 }
 363
 364 /* Append a character to the token buffer */
 365 static void lex_tokench(lex_file *lex, int ch)
 366 {
 367     vec_push(lex->tok.value, ch);
 368 }
 369
 370 /* Append a trailing null-byte */
 371 static void lex_endtoken(lex_file *lex)
 372 {
 373     vec_push(lex->tok.value, 0);
 374     vec_shrinkby(lex->tok.value, 1);
 375 }
 376
 377 static bool lex_try_pragma(lex_file *lex)
 378 {
 379     int ch;
 380     char *pragma  = NULL;
 381     char *command = NULL;
 382     char *param   = NULL;
 383     size_t line;
 384
 385     if (lex->flags.preprocessing)
 386         return false;
 387
 388     line = lex->line;
 389
 390     ch = lex_getch(lex);
 391     if (ch != '#') {
 392         lex_ungetch(lex, ch);
 393         return false;
 394     }
 395
 396     for (ch = lex_getch(lex); vec_size(pragma) < 8 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 397         vec_push(pragma, ch);
 398     vec_push(pragma, 0);
 399
 400     if (ch != ' ' || strcmp(pragma, "pragma")) {
 401         lex_ungetch(lex, ch);
 402         goto unroll;
 403     }
 404
 405     for (ch = lex_getch(lex); vec_size(command) < 32 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 406         vec_push(command, ch);
 407     vec_push(command, 0);
 408
 409     if (ch != '(') {
 410         lex_ungetch(lex, ch);
 411         goto unroll;
 412     }
 413
 414     for (ch = lex_getch(lex); vec_size(param) < 32 && ch != ')' && ch != '\n'; ch = lex_getch(lex))
 415         vec_push(param, ch);
 416     vec_push(param, 0);
 417
 418     if (ch != ')') {
 419         lex_ungetch(lex, ch);
 420         goto unroll;
 421     }
 422
 423     if (!strcmp(command, "push")) {
 424         if (!strcmp(param, "line")) {
 425             lex->push_line++;
 426             --line;
 427         }
 428         else
 429             goto unroll;
 430     }
 431     else if (!strcmp(command, "pop")) {
 432         if (!strcmp(param, "line")) {
 433             if (lex->push_line)
 434                 lex->push_line--;
 435             --line;
 436         }
 437         else
 438             goto unroll;
 439     }
 440     else if (!strcmp(command, "file")) {
 441         lex->name = util_strdup(param);
 442         vec_push(lex_filenames, lex->name);
 443     }
 444     else if (!strcmp(command, "line")) {
 445         line = strtol(param, NULL, 0)-1;
 446     }
 447     else
 448         goto unroll;
 449
 450     lex->line = line;
 451     while (ch != '\n' && ch != EOF)
 452         ch = lex_getch(lex);
 453     return true;
 454
 455 unroll:
 456     if (command) {
 457         vec_pop(command);
 458         while (vec_size(command)) {
 459             lex_ungetch(lex, vec_last(command));
 460             vec_pop(command);
 461         }
 462         vec_free(command);
 463     }
 464     if (command) {
 465         vec_pop(command);
 466         while (vec_size(command)) {
 467             lex_ungetch(lex, vec_last(command));
 468             vec_pop(command);
 469         }
 470         vec_free(command);
 471     }
 472     if (pragma) {
 473         vec_pop(pragma);
 474         while (vec_size(pragma)) {
 475             lex_ungetch(lex, vec_last(pragma));
 476             vec_pop(pragma);
 477         }
 478         vec_free(pragma);
 479     }
 480     lex_ungetch(lex, '#');
 481
 482     lex->line = line;
 483     return false;
 484 }
 485
 486 /* Skip whitespace and comments and return the first
 487  * non-white character.
 488  * As this makes use of the above getch() ungetch() functions,
 489  * we don't need to care at all about line numbering anymore.
 490  *
 491  * In theory, this function should only be used at the beginning
 492  * of lexing, or when we *know* the next character is part of the token.
 493  * Otherwise, if the parser throws an error, the linenumber may not be
 494  * the line of the error, but the line of the next token AFTER the error.
 495  *
 496  * This is currently only problematic when using c-like string-continuation,
 497  * since comments and whitespaces are allowed between 2 such strings.
 498  * Example:
 499 printf(   "line one\n"
 500 // A comment
 501           "A continuation of the previous string"
 502 // This line is skipped
 503       , foo);
 504
 505  * In this case, if the parse decides it didn't actually want a string,
 506  * and uses lex->line to print an error, it will show the ', foo);' line's
 507  * linenumber.
 508  *
 509  * On the other hand, the parser is supposed to remember the line of the next
 510  * token's beginning. In this case we would want skipwhite() to be called
 511  * AFTER reading a token, so that the parser, before reading the NEXT token,
 512  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 513  *
 514  * THIS SOLUTION
 515  *    here is to store the line of the first character after skipping
 516  *    the initial whitespace in lex->sline, this happens in lex_do.
 517  */
 518 static int lex_skipwhite(lex_file *lex)
 519 {
 520     int ch = 0;
 521     bool haswhite = false;
 522
 523     do
 524     {
 525         ch = lex_getch(lex);
 526         while (ch != EOF && isspace(ch)) {
 527             if (ch == '\n') {
 528                 if (lex_try_pragma(lex))
 529                     continue;
 530             }
 531             if (lex->flags.preprocessing) {
 532                 if (ch == '\n') {
 533                     /* end-of-line */
 534                     /* see if there was whitespace first */
 535                     if (haswhite) { /* (vec_size(lex->tok.value)) { */
 536                         lex_ungetch(lex, ch);
 537                         lex_endtoken(lex);
 538                         return TOKEN_WHITE;
 539                     }
 540                     /* otherwise return EOL */
 541                     return TOKEN_EOL;
 542                 }
 543                 haswhite = true;
 544                 lex_tokench(lex, ch);
 545             }
 546             ch = lex_getch(lex);
 547         }
 548
 549         if (ch == '/') {
 550             ch = lex_getch(lex);
 551             if (ch == '/')
 552             {
 553                 /* one line comment */
 554                 ch = lex_getch(lex);
 555
 556                 if (lex->flags.preprocessing) {
 557                     haswhite = true;
 558                     /*
 559                     lex_tokench(lex, '/');
 560                     lex_tokench(lex, '/');
 561                     */
 562                     lex_tokench(lex, ' ');
 563                     lex_tokench(lex, ' ');
 564                 }
 565
 566                 while (ch != EOF && ch != '\n') {
 567                     if (lex->flags.preprocessing)
 568                         lex_tokench(lex, ' '); /* ch); */
 569                     ch = lex_getch(lex);
 570                 }
 571                 if (lex->flags.preprocessing) {
 572                     lex_ungetch(lex, '\n');
 573                     lex_endtoken(lex);
 574                     return TOKEN_WHITE;
 575                 }
 576                 continue;
 577             }
 578             if (ch == '*')
 579             {
 580                 /* multiline comment */
 581                 if (lex->flags.preprocessing) {
 582                     haswhite = true;
 583                     /*
 584                     lex_tokench(lex, '/');
 585                     lex_tokench(lex, '*');
 586                     */
 587                     lex_tokench(lex, ' ');
 588                     lex_tokench(lex, ' ');
 589                 }
 590
 591                 while (ch != EOF)
 592                 {
 593                     ch = lex_getch(lex);
 594                     if (ch == '*') {
 595                         ch = lex_getch(lex);
 596                         if (ch == '/') {
 597                             if (lex->flags.preprocessing) {
 598                                 /*
 599                                 lex_tokench(lex, '*');
 600                                 lex_tokench(lex, '/');
 601                                 */
 602                                 lex_tokench(lex, ' ');
 603                                 lex_tokench(lex, ' ');
 604                             }
 605                             break;
 606                         }
 607                     }
 608                     if (lex->flags.preprocessing) {
 609                         lex_tokench(lex, ' '); /* ch); */
 610                     }
 611                 }
 612                 ch = ' '; /* cause TRUE in the isspace check */
 613                 continue;
 614             }
 615             /* Otherwise roll back to the slash and break out of the loop */
 616             lex_ungetch(lex, ch);
 617             ch = '/';
 618             break;
 619         }
 620     } while (ch != EOF && isspace(ch));
 621
 622     if (haswhite) {
 623         lex_endtoken(lex);
 624         lex_ungetch(lex, ch);
 625         return TOKEN_WHITE;
 626     }
 627     return ch;
 628 }
 629
 630 /* Get a token */
 631 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 632 {
 633     int ch;
 634
 635     ch = lex_getch(lex);
 636     while (ch != EOF && isident(ch))
 637     {
 638         lex_tokench(lex, ch);
 639         ch = lex_getch(lex);
 640     }
 641
 642     /* last ch was not an ident ch: */
 643     lex_ungetch(lex, ch);
 644
 645     return true;
 646 }
 647
 648 /* read one ident for the frame list */
 649 static int lex_parse_frame(lex_file *lex)
 650 {
 651     int ch;
 652
 653     lex_token_new(lex);
 654
 655     ch = lex_getch(lex);
 656     while (ch != EOF && ch != '\n' && isspace(ch))
 657         ch = lex_getch(lex);
 658
 659     if (ch == '\n')
 660         return 1;
 661
 662     if (!isident_start(ch)) {
 663         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 664         return -1;
 665     }
 666
 667     lex_tokench(lex, ch);
 668     if (!lex_finish_ident(lex))
 669         return -1;
 670     lex_endtoken(lex);
 671     return 0;
 672 }
 673
 674 /* read a list of $frames */
 675 static bool lex_finish_frames(lex_file *lex)
 676 {
 677     do {
 678         size_t i;
 679         int    rc;
 680         frame_macro m;
 681
 682         rc = lex_parse_frame(lex);
 683         if (rc > 0) /* end of line */
 684             return true;
 685         if (rc < 0) /* error */
 686             return false;
 687
 688         for (i = 0; i < vec_size(lex->frames); ++i) {
 689             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 690                 lex->frames[i].value = lex->framevalue++;
 691                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 692                     return false;
 693                 break;
 694             }
 695         }
 696         if (i < vec_size(lex->frames))
 697             continue;
 698
 699         m.value = lex->framevalue++;
 700         m.name = util_strdup(lex->tok.value);
 701         vec_shrinkto(lex->tok.value, 0);
 702         vec_push(lex->frames, m);
 703     } while (true);
 704 }
 705
 706 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 707 {
 708     int ch = 0;
 709
 710     while (ch != EOF)
 711     {
 712         ch = lex_getch(lex);
 713         if (ch == quote)
 714             return TOKEN_STRINGCONST;
 715
 716         if (lex->flags.preprocessing && ch == '\\') {
 717             lex_tokench(lex, ch);
 718             ch = lex_getch(lex);
 719             if (ch == EOF) {
 720                 lexerror(lex, "unexpected end of file");
 721                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 722                 return (lex->tok.ttype = TOKEN_ERROR);
 723             }
 724             lex_tokench(lex, ch);
 725         }
 726         else if (ch == '\\') {
 727             ch = lex_getch(lex);
 728             if (ch == EOF) {
 729                 lexerror(lex, "unexpected end of file");
 730                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 731                 return (lex->tok.ttype = TOKEN_ERROR);
 732             }
 733
 734             switch (ch) {
 735             case '\\': break;
 736             case '\'': break;
 737             case '"':  break;
 738             case 'a':  ch = '\a'; break;
 739             case 'b':  ch = '\b'; break;
 740             case 'r':  ch = '\r'; break;
 741             case 'n':  ch = '\n'; break;
 742             case 't':  ch = '\t'; break;
 743             case 'f':  ch = '\f'; break;
 744             case 'v':  ch = '\v'; break;
 745             default:
 746                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 747                 /* so we just add the character plus backslash no matter what it actually is */
 748                 lex_tokench(lex, '\\');
 749             }
 750             /* add the character finally */
 751             lex_tokench(lex, ch);
 752         }
 753         else
 754             lex_tokench(lex, ch);
 755     }
 756     lexerror(lex, "unexpected end of file within string constant");
 757     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 758     return (lex->tok.ttype = TOKEN_ERROR);
 759 }
 760
 761 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 762 {
 763     bool ishex = false;
 764
 765     int  ch = lastch;
 766
 767     /* parse a number... */
 768     lex->tok.ttype = TOKEN_INTCONST;
 769
 770     lex_tokench(lex, ch);
 771
 772     ch = lex_getch(lex);
 773     if (ch != '.' && !isdigit(ch))
 774     {
 775         if (lastch != '0' || ch != 'x')
 776         {
 777             /* end of the number or EOF */
 778             lex_ungetch(lex, ch);
 779             lex_endtoken(lex);
 780
 781             lex->tok.constval.i = lastch - '0';
 782             return lex->tok.ttype;
 783         }
 784
 785         ishex = true;
 786     }
 787
 788     /* EOF would have been caught above */
 789
 790     if (ch != '.')
 791     {
 792         lex_tokench(lex, ch);
 793         ch = lex_getch(lex);
 794         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 795         {
 796             lex_tokench(lex, ch);
 797             ch = lex_getch(lex);
 798         }
 799     }
 800     /* NOT else, '.' can come from above as well */
 801     if (ch == '.' && !ishex)
 802     {
 803         /* Allow floating comma in non-hex mode */
 804         lex->tok.ttype = TOKEN_FLOATCONST;
 805         lex_tokench(lex, ch);
 806
 807         /* continue digits-only */
 808         ch = lex_getch(lex);
 809         while (isdigit(ch))
 810         {
 811             lex_tokench(lex, ch);
 812             ch = lex_getch(lex);
 813         }
 814     }
 815     /* put back the last character */
 816     /* but do not put back the trailing 'f' or a float */
 817     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 818         ch = lex_getch(lex);
 819
 820     /* generally we don't want words to follow numbers: */
 821     if (isident(ch)) {
 822         lexerror(lex, "unexpected trailing characters after number");
 823         return (lex->tok.ttype = TOKEN_ERROR);
 824     }
 825     lex_ungetch(lex, ch);
 826
 827     lex_endtoken(lex);
 828     if (lex->tok.ttype == TOKEN_FLOATCONST)
 829         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 830     else
 831         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 832     return lex->tok.ttype;
 833 }
 834
 835 int lex_do(lex_file *lex)
 836 {
 837     int ch, nextch;
 838
 839     lex_token_new(lex);
 840 #if 0
 841     if (!lex->tok)
 842         return TOKEN_FATAL;
 843 #endif
 844
 845     while (true) {
 846         ch = lex_skipwhite(lex);
 847         if (!lex->flags.mergelines || ch != '\\')
 848             break;
 849         ch = lex_getch(lex);
 850         if (ch != '\n') {
 851             lex_ungetch(lex, ch);
 852             ch = '\\';
 853             break;
 854         }
 855         /* we reached a linemerge */
 856         lex_tokench(lex, '\n');
 857         continue;
 858     }
 859
 860     lex->sline = lex->line;
 861     lex->tok.ctx.line = lex->sline;
 862     lex->tok.ctx.file = lex->name;
 863
 864     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 865         return (lex->tok.ttype = ch);
 866     }
 867
 868     if (lex->eof)
 869         return (lex->tok.ttype = TOKEN_FATAL);
 870
 871     if (ch == EOF) {
 872         lex->eof = true;
 873         return (lex->tok.ttype = TOKEN_EOF);
 874     }
 875
 876     /* modelgen / spiritgen commands */
 877     if (ch == '$') {
 878         const char *v;
 879         size_t frame;
 880
 881         ch = lex_getch(lex);
 882         if (!isident_start(ch)) {
 883             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 884             return lex_do(lex);
 885         }
 886         lex_tokench(lex, ch);
 887         if (!lex_finish_ident(lex))
 888             return (lex->tok.ttype = TOKEN_ERROR);
 889         lex_endtoken(lex);
 890         /* skip the known commands */
 891         v = lex->tok.value;
 892
 893         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 894         {
 895             /* frame/framesave command works like an enum
 896              * similar to fteqcc we handle this in the lexer.
 897              * The reason for this is that it is sensitive to newlines,
 898              * which the parser is unaware of
 899              */
 900             if (!lex_finish_frames(lex))
 901                  return (lex->tok.ttype = TOKEN_ERROR);
 902             return lex_do(lex);
 903         }
 904
 905         if (!strcmp(v, "framevalue"))
 906         {
 907             ch = lex_getch(lex);
 908             while (ch != EOF && isspace(ch) && ch != '\n')
 909                 ch = lex_getch(lex);
 910
 911             if (!isdigit(ch)) {
 912                 lexerror(lex, "$framevalue requires an integer parameter");
 913                 return lex_do(lex);
 914             }
 915
 916             lex_token_new(lex);
 917             lex->tok.ttype = lex_finish_digit(lex, ch);
 918             lex_endtoken(lex);
 919             if (lex->tok.ttype != TOKEN_INTCONST) {
 920                 lexerror(lex, "$framevalue requires an integer parameter");
 921                 return lex_do(lex);
 922             }
 923             lex->framevalue = lex->tok.constval.i;
 924             return lex_do(lex);
 925         }
 926
 927         if (!strcmp(v, "framerestore"))
 928         {
 929             int rc;
 930
 931             lex_token_new(lex);
 932
 933             rc = lex_parse_frame(lex);
 934
 935             if (rc > 0) {
 936                 lexerror(lex, "$framerestore requires a framename parameter");
 937                 return lex_do(lex);
 938             }
 939             if (rc < 0)
 940                 return (lex->tok.ttype = TOKEN_FATAL);
 941
 942             v = lex->tok.value;
 943             for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 944                 if (!strcmp(v, lex->frames[frame].name)) {
 945                     lex->framevalue = lex->frames[frame].value;
 946                     return lex_do(lex);
 947                 }
 948             }
 949             lexerror(lex, "unknown framename `%s`", v);
 950             return lex_do(lex);
 951         }
 952
 953         if (!strcmp(v, "modelname"))
 954         {
 955             int rc;
 956
 957             lex_token_new(lex);
 958
 959             rc = lex_parse_frame(lex);
 960
 961             if (rc > 0) {
 962                 lexerror(lex, "$modelname requires a parameter");
 963                 return lex_do(lex);
 964             }
 965             if (rc < 0)
 966                 return (lex->tok.ttype = TOKEN_FATAL);
 967
 968             v = lex->tok.value;
 969             if (lex->modelname) {
 970                 frame_macro m;
 971                 m.value = lex->framevalue;
 972                 m.name = lex->modelname;
 973                 lex->modelname = NULL;
 974                 vec_push(lex->frames, m);
 975             }
 976             lex->modelname = lex->tok.value;
 977             lex->tok.value = NULL;
 978             return lex_do(lex);
 979         }
 980
 981         if (!strcmp(v, "flush"))
 982         {
 983             size_t frame;
 984             for (frame = 0; frame < vec_size(lex->frames); ++frame)
 985                 mem_d(lex->frames[frame].name);
 986             vec_free(lex->frames);
 987             /* skip line (fteqcc does it too) */
 988             ch = lex_getch(lex);
 989             while (ch != EOF && ch != '\n')
 990                 ch = lex_getch(lex);
 991             return lex_do(lex);
 992         }
 993
 994         if (!strcmp(v, "cd") ||
 995             !strcmp(v, "origin") ||
 996             !strcmp(v, "base") ||
 997             !strcmp(v, "flags") ||
 998             !strcmp(v, "scale") ||
 999             !strcmp(v, "skin"))
1000         {
1001             /* skip line */
1002             ch = lex_getch(lex);
1003             while (ch != EOF && ch != '\n')
1004                 ch = lex_getch(lex);
1005             return lex_do(lex);
1006         }
1007
1008         for (frame = 0; frame < vec_size(lex->frames); ++frame) {
1009             if (!strcmp(v, lex->frames[frame].name)) {
1010                 lex->tok.constval.i = lex->frames[frame].value;
1011                 return (lex->tok.ttype = TOKEN_INTCONST);
1012             }
1013         }
1014
1015         lexerror(lex, "invalid frame macro");
1016         return lex_do(lex);
1017     }
1018
1019     /* single-character tokens */
1020     switch (ch)
1021     {
1022         case '[':
1023         case '(':
1024         case ':':
1025             lex_tokench(lex, ch);
1026             lex_endtoken(lex);
1027             if (lex->flags.noops)
1028                 return (lex->tok.ttype = ch);
1029             else
1030                 return (lex->tok.ttype = TOKEN_OPERATOR);
1031         case ')':
1032         case ';':
1033         case '{':
1034         case '}':
1035         case ']':
1036
1037         case '#':
1038             lex_tokench(lex, ch);
1039             lex_endtoken(lex);
1040             return (lex->tok.ttype = ch);
1041         default:
1042             break;
1043     }
1044
1045     if (lex->flags.noops)
1046     {
1047         /* Detect characters early which are normally
1048          * operators OR PART of an operator.
1049          */
1050         switch (ch)
1051         {
1052             case '+':
1053             case '-':
1054             case '*':
1055             case '/':
1056             case '<':
1057             case '>':
1058             case '=':
1059             case '&':
1060             case '|':
1061             case '^':
1062             case '~':
1063             case ',':
1064             case '!':
1065                 lex_tokench(lex, ch);
1066                 lex_endtoken(lex);
1067                 return (lex->tok.ttype = ch);
1068             default:
1069                 break;
1070         }
1071
1072         if (ch == '.')
1073         {
1074             lex_tokench(lex, ch);
1075             /* peak ahead once */
1076             nextch = lex_getch(lex);
1077             if (nextch != '.') {
1078                 lex_ungetch(lex, nextch);
1079                 lex_endtoken(lex);
1080                 return (lex->tok.ttype = ch);
1081             }
1082             /* peak ahead again */
1083             nextch = lex_getch(lex);
1084             if (nextch != '.') {
1085                 lex_ungetch(lex, nextch);
1086                 lex_ungetch(lex, nextch);
1087                 lex_endtoken(lex);
1088                 return (lex->tok.ttype = ch);
1089             }
1090             /* fill the token to be "..." */
1091             lex_tokench(lex, ch);
1092             lex_tokench(lex, ch);
1093             lex_endtoken(lex);
1094             return (lex->tok.ttype = TOKEN_DOTS);
1095         }
1096     }
1097
1098     if (ch == ',' || ch == '.') {
1099         lex_tokench(lex, ch);
1100         lex_endtoken(lex);
1101         return (lex->tok.ttype = TOKEN_OPERATOR);
1102     }
1103
1104     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
1105         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
1106         ch == '=' || ch == '!' || /* ==, != */
1107         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
1108     {
1109         lex_tokench(lex, ch);
1110
1111         nextch = lex_getch(lex);
1112         if (nextch == ch || nextch == '=') {
1113             lex_tokench(lex, nextch);
1114         } else if (ch == '-' && nextch == '>') {
1115             lex_tokench(lex, nextch);
1116         } else
1117             lex_ungetch(lex, nextch);
1118
1119         lex_endtoken(lex);
1120         return (lex->tok.ttype = TOKEN_OPERATOR);
1121     }
1122
1123     /*
1124     if (ch == '^' || ch == '~' || ch == '!')
1125     {
1126         lex_tokench(lex, ch);
1127         lex_endtoken(lex);
1128         return (lex->tok.ttype = TOKEN_OPERATOR);
1129     }
1130     */
1131
1132     if (ch == '*' || ch == '/') /* *=, /= */
1133     {
1134         lex_tokench(lex, ch);
1135
1136         nextch = lex_getch(lex);
1137         if (nextch == '=') {
1138             lex_tokench(lex, nextch);
1139         } else
1140             lex_ungetch(lex, nextch);
1141
1142         lex_endtoken(lex);
1143         return (lex->tok.ttype = TOKEN_OPERATOR);
1144     }
1145
1146     if (isident_start(ch))
1147     {
1148         const char *v;
1149
1150         lex_tokench(lex, ch);
1151         if (!lex_finish_ident(lex)) {
1152             /* error? */
1153             return (lex->tok.ttype = TOKEN_ERROR);
1154         }
1155         lex_endtoken(lex);
1156         lex->tok.ttype = TOKEN_IDENT;
1157
1158         v = lex->tok.value;
1159         if (!strcmp(v, "void")) {
1160             lex->tok.ttype = TOKEN_TYPENAME;
1161             lex->tok.constval.t = TYPE_VOID;
1162         } else if (!strcmp(v, "int")) {
1163             lex->tok.ttype = TOKEN_TYPENAME;
1164             lex->tok.constval.t = TYPE_INTEGER;
1165         } else if (!strcmp(v, "float")) {
1166             lex->tok.ttype = TOKEN_TYPENAME;
1167             lex->tok.constval.t = TYPE_FLOAT;
1168         } else if (!strcmp(v, "string")) {
1169             lex->tok.ttype = TOKEN_TYPENAME;
1170             lex->tok.constval.t = TYPE_STRING;
1171         } else if (!strcmp(v, "entity")) {
1172             lex->tok.ttype = TOKEN_TYPENAME;
1173             lex->tok.constval.t = TYPE_ENTITY;
1174         } else if (!strcmp(v, "vector")) {
1175             lex->tok.ttype = TOKEN_TYPENAME;
1176             lex->tok.constval.t = TYPE_VECTOR;
1177         } else {
1178             size_t kw;
1179             for (kw = 0; kw < num_keywords_qc; ++kw) {
1180                 if (!strcmp(v, keywords_qc[kw]))
1181                     return (lex->tok.ttype = TOKEN_KEYWORD);
1182             }
1183             if (opts_standard != COMPILER_QCC) {
1184                 for (kw = 0; kw < num_keywords_fg; ++kw) {
1185                     if (!strcmp(v, keywords_fg[kw]))
1186                         return (lex->tok.ttype = TOKEN_KEYWORD);
1187                 }
1188             }
1189         }
1190
1191         return lex->tok.ttype;
1192     }
1193
1194     if (ch == '"')
1195     {
1196         lex->flags.nodigraphs = true;
1197         if (lex->flags.preprocessing)
1198             lex_tokench(lex, ch);
1199         lex->tok.ttype = lex_finish_string(lex, '"');
1200         if (lex->flags.preprocessing)
1201             lex_tokench(lex, ch);
1202         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1203         {
1204             /* Allow c style "string" "continuation" */
1205             ch = lex_skipwhite(lex);
1206             if (ch != '"') {
1207                 lex_ungetch(lex, ch);
1208                 break;
1209             }
1210
1211             lex->tok.ttype = lex_finish_string(lex, '"');
1212         }
1213         lex->flags.nodigraphs = false;
1214         lex_endtoken(lex);
1215         return lex->tok.ttype;
1216     }
1217
1218     if (ch == '\'')
1219     {
1220         /* we parse character constants like string,
1221          * but return TOKEN_CHARCONST, or a vector type if it fits...
1222          * Likewise actual unescaping has to be done by the parser.
1223          * The difference is we don't allow 'char' 'continuation'.
1224          */
1225         if (lex->flags.preprocessing)
1226             lex_tokench(lex, ch);
1227         lex->tok.ttype = lex_finish_string(lex, '\'');
1228         if (lex->flags.preprocessing)
1229             lex_tokench(lex, ch);
1230         lex_endtoken(lex);
1231
1232          /* It's a vector if we can successfully scan 3 floats */
1233 #ifdef WIN32
1234         if (sscanf_s(lex->tok.value, " %f %f %f ",
1235                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1236 #else
1237         if (sscanf(lex->tok.value, " %f %f %f ",
1238                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1239 #endif
1240
1241         {
1242              lex->tok.ttype = TOKEN_VECTORCONST;
1243         }
1244
1245         return lex->tok.ttype;
1246     }
1247
1248     if (isdigit(ch))
1249     {
1250         lex->tok.ttype = lex_finish_digit(lex, ch);
1251         lex_endtoken(lex);
1252         return lex->tok.ttype;
1253     }
1254
1255     lexerror(lex, "unknown token");
1256     return (lex->tok.ttype = TOKEN_ERROR);
1257 }