lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 char* *lex_filenames;
  10
  11 void lexerror(lex_file *lex, const char *fmt, ...)
  12 {
  13         va_list ap;
  14
  15         va_start(ap, fmt);
  16         if (lex)
  17         con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  18     else
  19         con_vprintmsg(LVL_ERROR, "", 0, "parse error", fmt, ap);
  20         va_end(ap);
  21 }
  22
  23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  24 {
  25         va_list ap;
  26         int lvl = LVL_WARNING;
  27
  28     if (!OPTS_WARN(warntype))
  29         return false;
  30
  31     if (opts_werror)
  32             lvl = LVL_ERROR;
  33
  34         va_start(ap, fmt);
  35     con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  36         va_end(ap);
  37
  38         return opts_werror;
  39 }
  40
  41
  42 #if 0
  43 token* token_new()
  44 {
  45     token *tok = (token*)mem_a(sizeof(token));
  46     if (!tok)
  47         return NULL;
  48     memset(tok, 0, sizeof(*tok));
  49     return tok;
  50 }
  51
  52 void token_delete(token *self)
  53 {
  54     if (self->next && self->next->prev == self)
  55         self->next->prev = self->prev;
  56     if (self->prev && self->prev->next == self)
  57         self->prev->next = self->next;
  58     MEM_VECTOR_CLEAR(self, value);
  59     mem_d(self);
  60 }
  61
  62 token* token_copy(const token *cp)
  63 {
  64     token* self = token_new();
  65     if (!self)
  66         return NULL;
  67     /* copy the value */
  68     self->value_alloc = cp->value_count + 1;
  69     self->value_count = cp->value_count;
  70     self->value = (char*)mem_a(self->value_alloc);
  71     if (!self->value) {
  72         mem_d(self);
  73         return NULL;
  74     }
  75     memcpy(self->value, cp->value, cp->value_count);
  76     self->value[self->value_alloc-1] = 0;
  77
  78     /* rest */
  79     self->ctx = cp->ctx;
  80     self->ttype = cp->ttype;
  81     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  82     return self;
  83 }
  84
  85 void token_delete_all(token *t)
  86 {
  87     token *n;
  88
  89     do {
  90         n = t->next;
  91         token_delete(t);
  92         t = n;
  93     } while(t);
  94 }
  95
  96 token* token_copy_all(const token *cp)
  97 {
  98     token *cur;
  99     token *out;
 100
 101     out = cur = token_copy(cp);
 102     if (!out)
 103         return NULL;
 104
 105     while (cp->next) {
 106         cp = cp->next;
 107         cur->next = token_copy(cp);
 108         if (!cur->next) {
 109             token_delete_all(out);
 110             return NULL;
 111         }
 112         cur->next->prev = cur;
 113         cur = cur->next;
 114     }
 115
 116     return out;
 117 }
 118 #else
 119 static void lex_token_new(lex_file *lex)
 120 {
 121 #if 0
 122     if (lex->tok)
 123         token_delete(lex->tok);
 124     lex->tok = token_new();
 125 #else
 126     if (lex->tok.value)
 127         vec_shrinkto(lex->tok.value, 0);
 128     lex->tok.constval.t  = 0;
 129     lex->tok.ctx.line = lex->sline;
 130     lex->tok.ctx.file = lex->name;
 131 #endif
 132 }
 133 #endif
 134
 135 lex_file* lex_open(const char *file)
 136 {
 137     lex_file *lex;
 138     FILE *in = util_fopen(file, "rb");
 139
 140     if (!in) {
 141         lexerror(NULL, "open failed: '%s'\n", file);
 142         return NULL;
 143     }
 144
 145     lex = (lex_file*)mem_a(sizeof(*lex));
 146     if (!lex) {
 147         fclose(in);
 148         lexerror(NULL, "out of memory\n");
 149         return NULL;
 150     }
 151
 152     memset(lex, 0, sizeof(*lex));
 153
 154     lex->file = in;
 155     lex->name = util_strdup(file);
 156     lex->line = 1; /* we start counting at 1 */
 157
 158     lex->peekpos = 0;
 159     lex->eof = false;
 160
 161     vec_push(lex_filenames, lex->name);
 162     return lex;
 163 }
 164
 165 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 166 {
 167     lex_file *lex;
 168
 169     lex = (lex_file*)mem_a(sizeof(*lex));
 170     if (!lex) {
 171         lexerror(NULL, "out of memory\n");
 172         return NULL;
 173     }
 174
 175     memset(lex, 0, sizeof(*lex));
 176
 177     lex->file = NULL;
 178     lex->open_string        = str;
 179     lex->open_string_length = len;
 180     lex->open_string_pos    = 0;
 181
 182     lex->name = util_strdup(name ? name : "<string-source>");
 183     lex->line = 1; /* we start counting at 1 */
 184
 185     lex->peekpos = 0;
 186     lex->eof = false;
 187
 188     vec_push(lex_filenames, lex->name);
 189
 190     return lex;
 191 }
 192
 193 void lex_cleanup(void)
 194 {
 195     size_t i;
 196     for (i = 0; i < vec_size(lex_filenames); ++i)
 197         mem_d(lex_filenames[i]);
 198     vec_free(lex_filenames);
 199 }
 200
 201 void lex_close(lex_file *lex)
 202 {
 203     size_t i;
 204     for (i = 0; i < vec_size(lex->frames); ++i)
 205         mem_d(lex->frames[i].name);
 206     vec_free(lex->frames);
 207
 208     if (lex->modelname)
 209         vec_free(lex->modelname);
 210
 211     if (lex->file)
 212         fclose(lex->file);
 213 #if 0
 214     if (lex->tok)
 215         token_delete(lex->tok);
 216 #else
 217     vec_free(lex->tok.value);
 218 #endif
 219     /* mem_d(lex->name); collected in lex_filenames */
 220     mem_d(lex);
 221 }
 222
 223 static int lex_fgetc(lex_file *lex)
 224 {
 225     if (lex->file)
 226         return fgetc(lex->file);
 227     if (lex->open_string) {
 228         if (lex->open_string_pos >= lex->open_string_length)
 229             return EOF;
 230         return lex->open_string[lex->open_string_pos++];
 231     }
 232     return EOF;
 233 }
 234
 235 /* Get or put-back data
 236  * The following to functions do NOT understand what kind of data they
 237  * are working on.
 238  * The are merely wrapping get/put in order to count line numbers.
 239  */
 240 static void lex_ungetch(lex_file *lex, int ch);
 241 static int lex_try_trigraph(lex_file *lex, int old)
 242 {
 243     int c2, c3;
 244     c2 = lex_fgetc(lex);
 245     if (c2 != '?') {
 246         lex_ungetch(lex, c2);
 247         return old;
 248     }
 249
 250     c3 = lex_fgetc(lex);
 251     switch (c3) {
 252         case '=': return '#';
 253         case '/': return '\\';
 254         case '\'': return '^';
 255         case '(': return '[';
 256         case ')': return ']';
 257         case '!': return '|';
 258         case '<': return '{';
 259         case '>': return '}';
 260         case '-': return '~';
 261         default:
 262             lex_ungetch(lex, c3);
 263             lex_ungetch(lex, c2);
 264             return old;
 265     }
 266 }
 267
 268 static int lex_try_digraph(lex_file *lex, int ch)
 269 {
 270     int c2;
 271     c2 = lex_fgetc(lex);
 272     if      (ch == '<' && c2 == ':')
 273         return '[';
 274     else if (ch == ':' && c2 == '>')
 275         return ']';
 276     else if (ch == '<' && c2 == '%')
 277         return '{';
 278     else if (ch == '%' && c2 == '>')
 279         return '}';
 280     else if (ch == '%' && c2 == ':')
 281         return '#';
 282     lex_ungetch(lex, c2);
 283     return ch;
 284 }
 285
 286 static int lex_getch(lex_file *lex)
 287 {
 288     int ch;
 289
 290     if (lex->peekpos) {
 291         lex->peekpos--;
 292         if (!lex->push_line && lex->peek[lex->peekpos] == '\n')
 293             lex->line++;
 294         return lex->peek[lex->peekpos];
 295     }
 296
 297     ch = lex_fgetc(lex);
 298     if (!lex->push_line && ch == '\n')
 299         lex->line++;
 300     else if (ch == '?')
 301         return lex_try_trigraph(lex, ch);
 302     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 303         return lex_try_digraph(lex, ch);
 304     return ch;
 305 }
 306
 307 static void lex_ungetch(lex_file *lex, int ch)
 308 {
 309     lex->peek[lex->peekpos++] = ch;
 310     if (!lex->push_line && ch == '\n')
 311         lex->line--;
 312 }
 313
 314 /* classify characters
 315  * some additions to the is*() functions of ctype.h
 316  */
 317
 318 /* Idents are alphanumberic, but they start with alpha or _ */
 319 static bool isident_start(int ch)
 320 {
 321     return isalpha(ch) || ch == '_';
 322 }
 323
 324 static bool isident(int ch)
 325 {
 326     return isident_start(ch) || isdigit(ch);
 327 }
 328
 329 /* isxdigit_only is used when we already know it's not a digit
 330  * and want to see if it's a hex digit anyway.
 331  */
 332 static bool isxdigit_only(int ch)
 333 {
 334     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 335 }
 336
 337 /* Append a character to the token buffer */
 338 static void lex_tokench(lex_file *lex, int ch)
 339 {
 340     vec_push(lex->tok.value, ch);
 341 }
 342
 343 /* Append a trailing null-byte */
 344 static void lex_endtoken(lex_file *lex)
 345 {
 346     vec_push(lex->tok.value, 0);
 347     vec_shrinkby(lex->tok.value, 1);
 348 }
 349
 350 static bool lex_try_pragma(lex_file *lex)
 351 {
 352     int ch;
 353     char *pragma  = NULL;
 354     char *command = NULL;
 355     char *param   = NULL;
 356     size_t line;
 357
 358     if (lex->flags.preprocessing)
 359         return false;
 360
 361     line = lex->line;
 362
 363     ch = lex_getch(lex);
 364     if (ch != '#') {
 365         lex_ungetch(lex, ch);
 366         return false;
 367     }
 368
 369     for (ch = lex_getch(lex); vec_size(pragma) < 8 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 370         vec_push(pragma, ch);
 371     vec_push(pragma, 0);
 372
 373     if (ch != ' ' || strcmp(pragma, "pragma")) {
 374         lex_ungetch(lex, ch);
 375         goto unroll;
 376     }
 377
 378     for (ch = lex_getch(lex); vec_size(command) < 32 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 379         vec_push(command, ch);
 380     vec_push(command, 0);
 381
 382     if (ch != '(') {
 383         lex_ungetch(lex, ch);
 384         goto unroll;
 385     }
 386
 387     for (ch = lex_getch(lex); vec_size(param) < 32 && ch != ')' && ch != '\n'; ch = lex_getch(lex))
 388         vec_push(param, ch);
 389     vec_push(param, 0);
 390
 391     if (ch != ')') {
 392         lex_ungetch(lex, ch);
 393         goto unroll;
 394     }
 395
 396     if (!strcmp(command, "push")) {
 397         if (!strcmp(param, "line")) {
 398             lex->push_line++;
 399             --line;
 400         }
 401         else
 402             goto unroll;
 403     }
 404     else if (!strcmp(command, "pop")) {
 405         if (!strcmp(param, "line")) {
 406             if (lex->push_line)
 407                 lex->push_line--;
 408             --line;
 409         }
 410         else
 411             goto unroll;
 412     }
 413     else if (!strcmp(command, "file")) {
 414         lex->name = util_strdup(param);
 415         vec_push(lex_filenames, lex->name);
 416     }
 417     else if (!strcmp(command, "line")) {
 418         line = strtol(param, NULL, 0)-1;
 419     }
 420     else
 421         goto unroll;
 422
 423     lex->line = line;
 424     while (ch != '\n')
 425         ch = lex_getch(lex);
 426     return true;
 427
 428 unroll:
 429     if (command) {
 430         vec_pop(command);
 431         while (vec_size(command)) {
 432             lex_ungetch(lex, vec_last(command));
 433             vec_pop(command);
 434         }
 435         vec_free(command);
 436     }
 437     if (command) {
 438         vec_pop(command);
 439         while (vec_size(command)) {
 440             lex_ungetch(lex, vec_last(command));
 441             vec_pop(command);
 442         }
 443         vec_free(command);
 444     }
 445     if (pragma) {
 446         vec_pop(pragma);
 447         while (vec_size(pragma)) {
 448             lex_ungetch(lex, vec_last(pragma));
 449             vec_pop(pragma);
 450         }
 451         vec_free(pragma);
 452     }
 453     lex_ungetch(lex, '#');
 454
 455     lex->line = line;
 456     return false;
 457 }
 458
 459 /* Skip whitespace and comments and return the first
 460  * non-white character.
 461  * As this makes use of the above getch() ungetch() functions,
 462  * we don't need to care at all about line numbering anymore.
 463  *
 464  * In theory, this function should only be used at the beginning
 465  * of lexing, or when we *know* the next character is part of the token.
 466  * Otherwise, if the parser throws an error, the linenumber may not be
 467  * the line of the error, but the line of the next token AFTER the error.
 468  *
 469  * This is currently only problematic when using c-like string-continuation,
 470  * since comments and whitespaces are allowed between 2 such strings.
 471  * Example:
 472 printf(   "line one\n"
 473 // A comment
 474           "A continuation of the previous string"
 475 // This line is skipped
 476       , foo);
 477
 478  * In this case, if the parse decides it didn't actually want a string,
 479  * and uses lex->line to print an error, it will show the ', foo);' line's
 480  * linenumber.
 481  *
 482  * On the other hand, the parser is supposed to remember the line of the next
 483  * token's beginning. In this case we would want skipwhite() to be called
 484  * AFTER reading a token, so that the parser, before reading the NEXT token,
 485  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 486  *
 487  * THIS SOLUTION
 488  *    here is to store the line of the first character after skipping
 489  *    the initial whitespace in lex->sline, this happens in lex_do.
 490  */
 491 static int lex_skipwhite(lex_file *lex)
 492 {
 493     int ch = 0;
 494     bool haswhite = false;
 495
 496     do
 497     {
 498         ch = lex_getch(lex);
 499         while (ch != EOF && isspace(ch)) {
 500             if (ch == '\n') {
 501                 if (lex_try_pragma(lex)) {
 502                     ch = lex_getch(lex);
 503                     continue;
 504                 }
 505             }
 506             if (lex->flags.preprocessing) {
 507                 if (ch == '\n') {
 508                     /* end-of-line */
 509                     /* see if there was whitespace first */
 510                     if (haswhite) { /* (vec_size(lex->tok.value)) { */
 511                         lex_ungetch(lex, ch);
 512                         lex_endtoken(lex);
 513                         return TOKEN_WHITE;
 514                     }
 515                     /* otherwise return EOL */
 516                     return TOKEN_EOL;
 517                 }
 518                 haswhite = true;
 519                 lex_tokench(lex, ch);
 520             }
 521             ch = lex_getch(lex);
 522         }
 523
 524         if (ch == '/') {
 525             ch = lex_getch(lex);
 526             if (ch == '/')
 527             {
 528                 /* one line comment */
 529                 ch = lex_getch(lex);
 530
 531                 if (lex->flags.preprocessing) {
 532                     haswhite = true;
 533                     /*
 534                     lex_tokench(lex, '/');
 535                     lex_tokench(lex, '/');
 536                     */
 537                     lex_tokench(lex, ' ');
 538                     lex_tokench(lex, ' ');
 539                 }
 540
 541                 while (ch != EOF && ch != '\n') {
 542                     if (lex->flags.preprocessing)
 543                         lex_tokench(lex, ' '); /* ch); */
 544                     ch = lex_getch(lex);
 545                 }
 546                 if (lex->flags.preprocessing) {
 547                     lex_ungetch(lex, '\n');
 548                     lex_endtoken(lex);
 549                     return TOKEN_WHITE;
 550                 }
 551                 continue;
 552             }
 553             if (ch == '*')
 554             {
 555                 /* multiline comment */
 556                 if (lex->flags.preprocessing) {
 557                     haswhite = true;
 558                     /*
 559                     lex_tokench(lex, '/');
 560                     lex_tokench(lex, '*');
 561                     */
 562                     lex_tokench(lex, ' ');
 563                     lex_tokench(lex, ' ');
 564                 }
 565
 566                 while (ch != EOF)
 567                 {
 568                     ch = lex_getch(lex);
 569                     if (ch == '*') {
 570                         ch = lex_getch(lex);
 571                         if (ch == '/') {
 572                             if (lex->flags.preprocessing) {
 573                                 /*
 574                                 lex_tokench(lex, '*');
 575                                 lex_tokench(lex, '/');
 576                                 */
 577                                 lex_tokench(lex, ' ');
 578                                 lex_tokench(lex, ' ');
 579                             }
 580                             break;
 581                         }
 582                     }
 583                     if (lex->flags.preprocessing) {
 584                         lex_tokench(lex, ' '); /* ch); */
 585                     }
 586                 }
 587                 ch = ' '; /* cause TRUE in the isspace check */
 588                 continue;
 589             }
 590             /* Otherwise roll back to the slash and break out of the loop */
 591             lex_ungetch(lex, ch);
 592             ch = '/';
 593             break;
 594         }
 595     } while (ch != EOF && isspace(ch));
 596
 597     if (haswhite) {
 598         lex_endtoken(lex);
 599         lex_ungetch(lex, ch);
 600         return TOKEN_WHITE;
 601     }
 602     return ch;
 603 }
 604
 605 /* Get a token */
 606 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 607 {
 608     int ch;
 609
 610     ch = lex_getch(lex);
 611     while (ch != EOF && isident(ch))
 612     {
 613         lex_tokench(lex, ch);
 614         ch = lex_getch(lex);
 615     }
 616
 617     /* last ch was not an ident ch: */
 618     lex_ungetch(lex, ch);
 619
 620     return true;
 621 }
 622
 623 /* read one ident for the frame list */
 624 static int lex_parse_frame(lex_file *lex)
 625 {
 626     int ch;
 627
 628     lex_token_new(lex);
 629
 630     ch = lex_getch(lex);
 631     while (ch != EOF && ch != '\n' && isspace(ch))
 632         ch = lex_getch(lex);
 633
 634     if (ch == '\n')
 635         return 1;
 636
 637     if (!isident_start(ch)) {
 638         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 639         return -1;
 640     }
 641
 642     lex_tokench(lex, ch);
 643     if (!lex_finish_ident(lex))
 644         return -1;
 645     lex_endtoken(lex);
 646     return 0;
 647 }
 648
 649 /* read a list of $frames */
 650 static bool lex_finish_frames(lex_file *lex)
 651 {
 652     do {
 653         size_t i;
 654         int    rc;
 655         frame_macro m;
 656
 657         rc = lex_parse_frame(lex);
 658         if (rc > 0) /* end of line */
 659             return true;
 660         if (rc < 0) /* error */
 661             return false;
 662
 663         for (i = 0; i < vec_size(lex->frames); ++i) {
 664             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 665                 lex->frames[i].value = lex->framevalue++;
 666                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 667                     return false;
 668                 break;
 669             }
 670         }
 671         if (i < vec_size(lex->frames))
 672             continue;
 673
 674         m.value = lex->framevalue++;
 675         m.name = util_strdup(lex->tok.value);
 676         vec_shrinkto(lex->tok.value, 0);
 677         vec_push(lex->frames, m);
 678     } while (true);
 679 }
 680
 681 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 682 {
 683     int ch = 0;
 684
 685     while (ch != EOF)
 686     {
 687         ch = lex_getch(lex);
 688         if (ch == quote)
 689             return TOKEN_STRINGCONST;
 690
 691         if (lex->flags.preprocessing && ch == '\\') {
 692             lex_tokench(lex, ch);
 693             ch = lex_getch(lex);
 694             if (ch == EOF) {
 695                 lexerror(lex, "unexpected end of file");
 696                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 697                 return (lex->tok.ttype = TOKEN_ERROR);
 698             }
 699             lex_tokench(lex, ch);
 700         }
 701         else if (ch == '\\') {
 702             ch = lex_getch(lex);
 703             if (ch == EOF) {
 704                 lexerror(lex, "unexpected end of file");
 705                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 706                 return (lex->tok.ttype = TOKEN_ERROR);
 707             }
 708
 709             switch (ch) {
 710             case '\\': break;
 711             case '\'': break;
 712             case '"':  break;
 713             case 'a':  ch = '\a'; break;
 714             case 'b':  ch = '\b'; break;
 715             case 'r':  ch = '\r'; break;
 716             case 'n':  ch = '\n'; break;
 717             case 't':  ch = '\t'; break;
 718             case 'f':  ch = '\f'; break;
 719             case 'v':  ch = '\v'; break;
 720             default:
 721                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 722                 /* so we just add the character plus backslash no matter what it actually is */
 723                 lex_tokench(lex, '\\');
 724             }
 725             /* add the character finally */
 726             lex_tokench(lex, ch);
 727         }
 728         else
 729             lex_tokench(lex, ch);
 730     }
 731     lexerror(lex, "unexpected end of file within string constant");
 732     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 733     return (lex->tok.ttype = TOKEN_ERROR);
 734 }
 735
 736 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 737 {
 738     bool ishex = false;
 739
 740     int  ch = lastch;
 741
 742     /* parse a number... */
 743     lex->tok.ttype = TOKEN_INTCONST;
 744
 745     lex_tokench(lex, ch);
 746
 747     ch = lex_getch(lex);
 748     if (ch != '.' && !isdigit(ch))
 749     {
 750         if (lastch != '0' || ch != 'x')
 751         {
 752             /* end of the number or EOF */
 753             lex_ungetch(lex, ch);
 754             lex_endtoken(lex);
 755
 756             lex->tok.constval.i = lastch - '0';
 757             return lex->tok.ttype;
 758         }
 759
 760         ishex = true;
 761     }
 762
 763     /* EOF would have been caught above */
 764
 765     if (ch != '.')
 766     {
 767         lex_tokench(lex, ch);
 768         ch = lex_getch(lex);
 769         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 770         {
 771             lex_tokench(lex, ch);
 772             ch = lex_getch(lex);
 773         }
 774     }
 775     /* NOT else, '.' can come from above as well */
 776     if (ch == '.' && !ishex)
 777     {
 778         /* Allow floating comma in non-hex mode */
 779         lex->tok.ttype = TOKEN_FLOATCONST;
 780         lex_tokench(lex, ch);
 781
 782         /* continue digits-only */
 783         ch = lex_getch(lex);
 784         while (isdigit(ch))
 785         {
 786             lex_tokench(lex, ch);
 787             ch = lex_getch(lex);
 788         }
 789     }
 790     /* put back the last character */
 791     /* but do not put back the trailing 'f' or a float */
 792     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 793         ch = lex_getch(lex);
 794
 795     /* generally we don't want words to follow numbers: */
 796     if (isident(ch)) {
 797         lexerror(lex, "unexpected trailing characters after number");
 798         return (lex->tok.ttype = TOKEN_ERROR);
 799     }
 800     lex_ungetch(lex, ch);
 801
 802     lex_endtoken(lex);
 803     if (lex->tok.ttype == TOKEN_FLOATCONST)
 804         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 805     else
 806         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 807     return lex->tok.ttype;
 808 }
 809
 810 int lex_do(lex_file *lex)
 811 {
 812     int ch, nextch;
 813
 814     lex_token_new(lex);
 815 #if 0
 816     if (!lex->tok)
 817         return TOKEN_FATAL;
 818 #endif
 819
 820     while (true) {
 821         ch = lex_skipwhite(lex);
 822         if (!lex->flags.mergelines || ch != '\\')
 823             break;
 824         ch = lex_getch(lex);
 825         if (ch != '\n') {
 826             lex_ungetch(lex, ch);
 827             ch = '\\';
 828             break;
 829         }
 830         /* we reached a linemerge */
 831         lex_tokench(lex, '\n');
 832         continue;
 833     }
 834
 835     lex->sline = lex->line;
 836     lex->tok.ctx.line = lex->sline;
 837     lex->tok.ctx.file = lex->name;
 838
 839     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 840         return (lex->tok.ttype = ch);
 841     }
 842
 843     if (lex->eof)
 844         return (lex->tok.ttype = TOKEN_FATAL);
 845
 846     if (ch == EOF) {
 847         lex->eof = true;
 848         return (lex->tok.ttype = TOKEN_EOF);
 849     }
 850
 851     /* modelgen / spiritgen commands */
 852     if (ch == '$') {
 853         const char *v;
 854         size_t frame;
 855
 856         ch = lex_getch(lex);
 857         if (!isident_start(ch)) {
 858             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 859             return lex_do(lex);
 860         }
 861         lex_tokench(lex, ch);
 862         if (!lex_finish_ident(lex))
 863             return (lex->tok.ttype = TOKEN_ERROR);
 864         lex_endtoken(lex);
 865         /* skip the known commands */
 866         v = lex->tok.value;
 867
 868         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 869         {
 870             /* frame/framesave command works like an enum
 871              * similar to fteqcc we handle this in the lexer.
 872              * The reason for this is that it is sensitive to newlines,
 873              * which the parser is unaware of
 874              */
 875             if (!lex_finish_frames(lex))
 876                  return (lex->tok.ttype = TOKEN_ERROR);
 877             return lex_do(lex);
 878         }
 879
 880         if (!strcmp(v, "framevalue"))
 881         {
 882             ch = lex_getch(lex);
 883             while (ch != EOF && isspace(ch) && ch != '\n')
 884                 ch = lex_getch(lex);
 885
 886             if (!isdigit(ch)) {
 887                 lexerror(lex, "$framevalue requires an integer parameter");
 888                 return lex_do(lex);
 889             }
 890
 891             lex_token_new(lex);
 892             lex->tok.ttype = lex_finish_digit(lex, ch);
 893             lex_endtoken(lex);
 894             if (lex->tok.ttype != TOKEN_INTCONST) {
 895                 lexerror(lex, "$framevalue requires an integer parameter");
 896                 return lex_do(lex);
 897             }
 898             lex->framevalue = lex->tok.constval.i;
 899             return lex_do(lex);
 900         }
 901
 902         if (!strcmp(v, "framerestore"))
 903         {
 904             int rc;
 905
 906             lex_token_new(lex);
 907
 908             rc = lex_parse_frame(lex);
 909
 910             if (rc > 0) {
 911                 lexerror(lex, "$framerestore requires a framename parameter");
 912                 return lex_do(lex);
 913             }
 914             if (rc < 0)
 915                 return (lex->tok.ttype = TOKEN_FATAL);
 916
 917             v = lex->tok.value;
 918             for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 919                 if (!strcmp(v, lex->frames[frame].name)) {
 920                     lex->framevalue = lex->frames[frame].value;
 921                     return lex_do(lex);
 922                 }
 923             }
 924             lexerror(lex, "unknown framename `%s`", v);
 925             return lex_do(lex);
 926         }
 927
 928         if (!strcmp(v, "modelname"))
 929         {
 930             int rc;
 931
 932             lex_token_new(lex);
 933
 934             rc = lex_parse_frame(lex);
 935
 936             if (rc > 0) {
 937                 lexerror(lex, "$modelname requires a parameter");
 938                 return lex_do(lex);
 939             }
 940             if (rc < 0)
 941                 return (lex->tok.ttype = TOKEN_FATAL);
 942
 943             v = lex->tok.value;
 944             if (lex->modelname) {
 945                 frame_macro m;
 946                 m.value = lex->framevalue;
 947                 m.name = lex->modelname;
 948                 lex->modelname = NULL;
 949                 vec_push(lex->frames, m);
 950             }
 951             lex->modelname = lex->tok.value;
 952             lex->tok.value = NULL;
 953             return lex_do(lex);
 954         }
 955
 956         if (!strcmp(v, "flush"))
 957         {
 958             size_t frame;
 959             for (frame = 0; frame < vec_size(lex->frames); ++frame)
 960                 mem_d(lex->frames[frame].name);
 961             vec_free(lex->frames);
 962             /* skip line (fteqcc does it too) */
 963             ch = lex_getch(lex);
 964             while (ch != EOF && ch != '\n')
 965                 ch = lex_getch(lex);
 966             return lex_do(lex);
 967         }
 968
 969         if (!strcmp(v, "cd") ||
 970             !strcmp(v, "origin") ||
 971             !strcmp(v, "base") ||
 972             !strcmp(v, "flags") ||
 973             !strcmp(v, "scale") ||
 974             !strcmp(v, "skin"))
 975         {
 976             /* skip line */
 977             ch = lex_getch(lex);
 978             while (ch != EOF && ch != '\n')
 979                 ch = lex_getch(lex);
 980             return lex_do(lex);
 981         }
 982
 983         for (frame = 0; frame < vec_size(lex->frames); ++frame) {
 984             if (!strcmp(v, lex->frames[frame].name)) {
 985                 lex->tok.constval.i = lex->frames[frame].value;
 986                 return (lex->tok.ttype = TOKEN_INTCONST);
 987             }
 988         }
 989
 990         lexerror(lex, "invalid frame macro");
 991         return lex_do(lex);
 992     }
 993
 994     /* single-character tokens */
 995     switch (ch)
 996     {
 997         case '[':
 998         case '(':
 999             lex_tokench(lex, ch);
1000             lex_endtoken(lex);
1001             if (lex->flags.noops)
1002                 return (lex->tok.ttype = ch);
1003             else
1004                 return (lex->tok.ttype = TOKEN_OPERATOR);
1005         case ')':
1006         case ';':
1007         case ':':
1008         case '{':
1009         case '}':
1010         case ']':
1011
1012         case '#':
1013             lex_tokench(lex, ch);
1014             lex_endtoken(lex);
1015             return (lex->tok.ttype = ch);
1016         default:
1017             break;
1018     }
1019
1020     if (lex->flags.noops)
1021     {
1022         /* Detect characters early which are normally
1023          * operators OR PART of an operator.
1024          */
1025         switch (ch)
1026         {
1027             case '+':
1028             case '-':
1029             case '*':
1030             case '/':
1031             case '<':
1032             case '>':
1033             case '=':
1034             case '&':
1035             case '|':
1036             case '^':
1037             case '~':
1038             case ',':
1039             case '!':
1040                 lex_tokench(lex, ch);
1041                 lex_endtoken(lex);
1042                 return (lex->tok.ttype = ch);
1043             default:
1044                 break;
1045         }
1046
1047         if (ch == '.')
1048         {
1049             lex_tokench(lex, ch);
1050             /* peak ahead once */
1051             nextch = lex_getch(lex);
1052             if (nextch != '.') {
1053                 lex_ungetch(lex, nextch);
1054                 lex_endtoken(lex);
1055                 return (lex->tok.ttype = ch);
1056             }
1057             /* peak ahead again */
1058             nextch = lex_getch(lex);
1059             if (nextch != '.') {
1060                 lex_ungetch(lex, nextch);
1061                 lex_ungetch(lex, nextch);
1062                 lex_endtoken(lex);
1063                 return (lex->tok.ttype = ch);
1064             }
1065             /* fill the token to be "..." */
1066             lex_tokench(lex, ch);
1067             lex_tokench(lex, ch);
1068             lex_endtoken(lex);
1069             return (lex->tok.ttype = TOKEN_DOTS);
1070         }
1071     }
1072
1073     if (ch == ',' || ch == '.') {
1074         lex_tokench(lex, ch);
1075         lex_endtoken(lex);
1076         return (lex->tok.ttype = TOKEN_OPERATOR);
1077     }
1078
1079     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
1080         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
1081         ch == '=' || ch == '!' || /* ==, != */
1082         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
1083     {
1084         lex_tokench(lex, ch);
1085
1086         nextch = lex_getch(lex);
1087         if (nextch == ch || nextch == '=') {
1088             lex_tokench(lex, nextch);
1089         } else if (ch == '-' && nextch == '>') {
1090             lex_tokench(lex, nextch);
1091         } else
1092             lex_ungetch(lex, nextch);
1093
1094         lex_endtoken(lex);
1095         return (lex->tok.ttype = TOKEN_OPERATOR);
1096     }
1097
1098     /*
1099     if (ch == '^' || ch == '~' || ch == '!')
1100     {
1101         lex_tokench(lex, ch);
1102         lex_endtoken(lex);
1103         return (lex->tok.ttype = TOKEN_OPERATOR);
1104     }
1105     */
1106
1107     if (ch == '*' || ch == '/') /* *=, /= */
1108     {
1109         lex_tokench(lex, ch);
1110
1111         nextch = lex_getch(lex);
1112         if (nextch == '=') {
1113             lex_tokench(lex, nextch);
1114         } else
1115             lex_ungetch(lex, nextch);
1116
1117         lex_endtoken(lex);
1118         return (lex->tok.ttype = TOKEN_OPERATOR);
1119     }
1120
1121     if (isident_start(ch))
1122     {
1123         const char *v;
1124
1125         lex_tokench(lex, ch);
1126         if (!lex_finish_ident(lex)) {
1127             /* error? */
1128             return (lex->tok.ttype = TOKEN_ERROR);
1129         }
1130         lex_endtoken(lex);
1131         lex->tok.ttype = TOKEN_IDENT;
1132
1133         v = lex->tok.value;
1134         if (!strcmp(v, "void")) {
1135             lex->tok.ttype = TOKEN_TYPENAME;
1136             lex->tok.constval.t = TYPE_VOID;
1137         } else if (!strcmp(v, "int")) {
1138             lex->tok.ttype = TOKEN_TYPENAME;
1139             lex->tok.constval.t = TYPE_INTEGER;
1140         } else if (!strcmp(v, "float")) {
1141             lex->tok.ttype = TOKEN_TYPENAME;
1142             lex->tok.constval.t = TYPE_FLOAT;
1143         } else if (!strcmp(v, "string")) {
1144             lex->tok.ttype = TOKEN_TYPENAME;
1145             lex->tok.constval.t = TYPE_STRING;
1146         } else if (!strcmp(v, "entity")) {
1147             lex->tok.ttype = TOKEN_TYPENAME;
1148             lex->tok.constval.t = TYPE_ENTITY;
1149         } else if (!strcmp(v, "vector")) {
1150             lex->tok.ttype = TOKEN_TYPENAME;
1151             lex->tok.constval.t = TYPE_VECTOR;
1152         } else if (!strcmp(v, "for")  ||
1153                  !strcmp(v, "while")  ||
1154                  !strcmp(v, "do")     ||
1155                  !strcmp(v, "if")     ||
1156                  !strcmp(v, "else")   ||
1157                  !strcmp(v, "local")  ||
1158                  !strcmp(v, "return") ||
1159                  !strcmp(v, "not")    ||
1160                  !strcmp(v, "const"))
1161         {
1162             lex->tok.ttype = TOKEN_KEYWORD;
1163         }
1164         else if (opts_standard != COMPILER_QCC)
1165         {
1166             /* other standards reserve these keywords */
1167             if (!strcmp(v, "switch") ||
1168                 !strcmp(v, "struct") ||
1169                 !strcmp(v, "union")  ||
1170                 !strcmp(v, "break")  ||
1171                 !strcmp(v, "continue") ||
1172                 !strcmp(v, "var"))
1173             {
1174                 lex->tok.ttype = TOKEN_KEYWORD;
1175             }
1176         }
1177
1178         return lex->tok.ttype;
1179     }
1180
1181     if (ch == '"')
1182     {
1183         lex->flags.nodigraphs = true;
1184         if (lex->flags.preprocessing)
1185             lex_tokench(lex, ch);
1186         lex->tok.ttype = lex_finish_string(lex, '"');
1187         if (lex->flags.preprocessing)
1188             lex_tokench(lex, ch);
1189         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1190         {
1191             /* Allow c style "string" "continuation" */
1192             ch = lex_skipwhite(lex);
1193             if (ch != '"') {
1194                 lex_ungetch(lex, ch);
1195                 break;
1196             }
1197
1198             lex->tok.ttype = lex_finish_string(lex, '"');
1199         }
1200         lex->flags.nodigraphs = false;
1201         lex_endtoken(lex);
1202         return lex->tok.ttype;
1203     }
1204
1205     if (ch == '\'')
1206     {
1207         /* we parse character constants like string,
1208          * but return TOKEN_CHARCONST, or a vector type if it fits...
1209          * Likewise actual unescaping has to be done by the parser.
1210          * The difference is we don't allow 'char' 'continuation'.
1211          */
1212         if (lex->flags.preprocessing)
1213             lex_tokench(lex, ch);
1214         lex->tok.ttype = lex_finish_string(lex, '\'');
1215         if (lex->flags.preprocessing)
1216             lex_tokench(lex, ch);
1217         lex_endtoken(lex);
1218
1219          /* It's a vector if we can successfully scan 3 floats */
1220 #ifdef WIN32
1221         if (sscanf_s(lex->tok.value, " %f %f %f ",
1222                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1223 #else
1224         if (sscanf(lex->tok.value, " %f %f %f ",
1225                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1226 #endif
1227
1228         {
1229              lex->tok.ttype = TOKEN_VECTORCONST;
1230         }
1231
1232         return lex->tok.ttype;
1233     }
1234
1235     if (isdigit(ch))
1236     {
1237         lex->tok.ttype = lex_finish_digit(lex, ch);
1238         lex_endtoken(lex);
1239         return lex->tok.ttype;
1240     }
1241
1242     lexerror(lex, "unknown token");
1243     return (lex->tok.ttype = TOKEN_ERROR);
1244 }