lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16         va_list ap;
  17
  18         va_start(ap, fmt);
  19     vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  20         va_end(ap);
  21 }
  22
  23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  24 {
  25         va_list ap;
  26         int lvl = LVL_WARNING;
  27
  28     if (!OPTS_WARN(warntype))
  29         return false;
  30
  31     if (opts_werror)
  32             lvl = LVL_ERROR;
  33
  34         va_start(ap, fmt);
  35     vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  36         va_end(ap);
  37
  38         return opts_werror;
  39 }
  40
  41
  42 #if 0
  43 token* token_new()
  44 {
  45     token *tok = (token*)mem_a(sizeof(token));
  46     if (!tok)
  47         return NULL;
  48     memset(tok, 0, sizeof(*tok));
  49     return tok;
  50 }
  51
  52 void token_delete(token *self)
  53 {
  54     if (self->next && self->next->prev == self)
  55         self->next->prev = self->prev;
  56     if (self->prev && self->prev->next == self)
  57         self->prev->next = self->next;
  58     MEM_VECTOR_CLEAR(self, value);
  59     mem_d(self);
  60 }
  61
  62 token* token_copy(const token *cp)
  63 {
  64     token* self = token_new();
  65     if (!self)
  66         return NULL;
  67     /* copy the value */
  68     self->value_alloc = cp->value_count + 1;
  69     self->value_count = cp->value_count;
  70     self->value = (char*)mem_a(self->value_alloc);
  71     if (!self->value) {
  72         mem_d(self);
  73         return NULL;
  74     }
  75     memcpy(self->value, cp->value, cp->value_count);
  76     self->value[self->value_alloc-1] = 0;
  77
  78     /* rest */
  79     self->ctx = cp->ctx;
  80     self->ttype = cp->ttype;
  81     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  82     return self;
  83 }
  84
  85 void token_delete_all(token *t)
  86 {
  87     token *n;
  88
  89     do {
  90         n = t->next;
  91         token_delete(t);
  92         t = n;
  93     } while(t);
  94 }
  95
  96 token* token_copy_all(const token *cp)
  97 {
  98     token *cur;
  99     token *out;
 100
 101     out = cur = token_copy(cp);
 102     if (!out)
 103         return NULL;
 104
 105     while (cp->next) {
 106         cp = cp->next;
 107         cur->next = token_copy(cp);
 108         if (!cur->next) {
 109             token_delete_all(out);
 110             return NULL;
 111         }
 112         cur->next->prev = cur;
 113         cur = cur->next;
 114     }
 115
 116     return out;
 117 }
 118 #else
 119 static void lex_token_new(lex_file *lex)
 120 {
 121 #if 0
 122     if (lex->tok)
 123         token_delete(lex->tok);
 124     lex->tok = token_new();
 125 #else
 126     lex->tok.value_count = 0;
 127     lex->tok.constval.t  = 0;
 128     lex->tok.ctx.line = lex->sline;
 129     lex->tok.ctx.file = lex->name;
 130 #endif
 131 }
 132 #endif
 133
 134 lex_file* lex_open(const char *file)
 135 {
 136     lex_file *lex;
 137     FILE *in = util_fopen(file, "rb");
 138
 139     if (!in) {
 140         lexerror(NULL, "open failed: '%s'\n", file);
 141         return NULL;
 142     }
 143
 144     lex = (lex_file*)mem_a(sizeof(*lex));
 145     if (!lex) {
 146         fclose(in);
 147         lexerror(NULL, "out of memory\n");
 148         return NULL;
 149     }
 150
 151     memset(lex, 0, sizeof(*lex));
 152
 153     lex->file = in;
 154     lex->name = util_strdup(file);
 155     lex->line = 1; /* we start counting at 1 */
 156
 157     lex->peekpos = 0;
 158     lex->eof = false;
 159
 160     lex_filenames_add(lex->name);
 161
 162     return lex;
 163 }
 164
 165 void lex_cleanup(void)
 166 {
 167     size_t i;
 168     for (i = 0; i < lex_filenames_elements; ++i)
 169         mem_d(lex_filenames_data[i]);
 170     mem_d(lex_filenames_data);
 171 }
 172
 173 void lex_close(lex_file *lex)
 174 {
 175     size_t i;
 176     for (i = 0; i < lex->frames_count; ++i)
 177         mem_d(lex->frames[i].name);
 178     MEM_VECTOR_CLEAR(lex, frames);
 179
 180     if (lex->modelname)
 181         mem_d(lex->modelname);
 182
 183     if (lex->file)
 184         fclose(lex->file);
 185 #if 0
 186     if (lex->tok)
 187         token_delete(lex->tok);
 188 #else
 189     MEM_VECTOR_CLEAR(&(lex->tok), value);
 190 #endif
 191     /* mem_d(lex->name); collected in lex_filenames */
 192     mem_d(lex);
 193 }
 194
 195 /* Get or put-back data
 196  * The following to functions do NOT understand what kind of data they
 197  * are working on.
 198  * The are merely wrapping get/put in order to count line numbers.
 199  */
 200 static void lex_ungetch(lex_file *lex, int ch);
 201 static int lex_try_trigraph(lex_file *lex, int old)
 202 {
 203     int c2, c3;
 204     c2 = fgetc(lex->file);
 205     if (c2 != '?') {
 206         lex_ungetch(lex, c2);
 207         return old;
 208     }
 209
 210     c3 = fgetc(lex->file);
 211     switch (c3) {
 212         case '=': return '#';
 213         case '/': return '\\';
 214         case '\'': return '^';
 215         case '(': return '[';
 216         case ')': return ']';
 217         case '!': return '|';
 218         case '<': return '{';
 219         case '>': return '}';
 220         case '-': return '~';
 221         default:
 222             lex_ungetch(lex, c3);
 223             lex_ungetch(lex, c2);
 224             return old;
 225     }
 226 }
 227
 228 static int lex_try_digraph(lex_file *lex, int ch)
 229 {
 230     int c2;
 231     c2 = fgetc(lex->file);
 232     if      (ch == '<' && c2 == ':')
 233         return '[';
 234     else if (ch == ':' && c2 == '>')
 235         return ']';
 236     else if (ch == '<' && c2 == '%')
 237         return '{';
 238     else if (ch == '%' && c2 == '>')
 239         return '}';
 240     else if (ch == '%' && c2 == ':')
 241         return '#';
 242     lex_ungetch(lex, c2);
 243     return ch;
 244 }
 245
 246 static int lex_getch(lex_file *lex)
 247 {
 248     int ch;
 249
 250     if (lex->peekpos) {
 251         lex->peekpos--;
 252         if (lex->peek[lex->peekpos] == '\n')
 253             lex->line++;
 254         return lex->peek[lex->peekpos];
 255     }
 256
 257     ch = fgetc(lex->file);
 258     if (ch == '\n')
 259         lex->line++;
 260     else if (ch == '?')
 261         return lex_try_trigraph(lex, ch);
 262     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 263         return lex_try_digraph(lex, ch);
 264     return ch;
 265 }
 266
 267 static void lex_ungetch(lex_file *lex, int ch)
 268 {
 269     lex->peek[lex->peekpos++] = ch;
 270     if (ch == '\n')
 271         lex->line--;
 272 }
 273
 274 /* classify characters
 275  * some additions to the is*() functions of ctype.h
 276  */
 277
 278 /* Idents are alphanumberic, but they start with alpha or _ */
 279 static bool isident_start(int ch)
 280 {
 281     return isalpha(ch) || ch == '_';
 282 }
 283
 284 static bool isident(int ch)
 285 {
 286     return isident_start(ch) || isdigit(ch);
 287 }
 288
 289 /* isxdigit_only is used when we already know it's not a digit
 290  * and want to see if it's a hex digit anyway.
 291  */
 292 static bool isxdigit_only(int ch)
 293 {
 294     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 295 }
 296
 297 /* Append a character to the token buffer */
 298 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 299 {
 300     if (!token_value_add(&lex->tok, ch)) {
 301         lexerror(lex, "out of memory");
 302         return false;
 303     }
 304     return true;
 305 }
 306
 307 /* Append a trailing null-byte */
 308 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 309 {
 310     if (!token_value_add(&lex->tok, 0)) {
 311         lexerror(lex, "out of memory");
 312         return false;
 313     }
 314     lex->tok.value_count--;
 315     return true;
 316 }
 317
 318 /* Skip whitespace and comments and return the first
 319  * non-white character.
 320  * As this makes use of the above getch() ungetch() functions,
 321  * we don't need to care at all about line numbering anymore.
 322  *
 323  * In theory, this function should only be used at the beginning
 324  * of lexing, or when we *know* the next character is part of the token.
 325  * Otherwise, if the parser throws an error, the linenumber may not be
 326  * the line of the error, but the line of the next token AFTER the error.
 327  *
 328  * This is currently only problematic when using c-like string-continuation,
 329  * since comments and whitespaces are allowed between 2 such strings.
 330  * Example:
 331 printf(   "line one\n"
 332 // A comment
 333           "A continuation of the previous string"
 334 // This line is skipped
 335       , foo);
 336
 337  * In this case, if the parse decides it didn't actually want a string,
 338  * and uses lex->line to print an error, it will show the ', foo);' line's
 339  * linenumber.
 340  *
 341  * On the other hand, the parser is supposed to remember the line of the next
 342  * token's beginning. In this case we would want skipwhite() to be called
 343  * AFTER reading a token, so that the parser, before reading the NEXT token,
 344  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 345  *
 346  * THIS SOLUTION
 347  *    here is to store the line of the first character after skipping
 348  *    the initial whitespace in lex->sline, this happens in lex_do.
 349  */
 350 static int lex_skipwhite(lex_file *lex)
 351 {
 352     int ch = 0;
 353     bool haswhite = false;
 354
 355     do
 356     {
 357         ch = lex_getch(lex);
 358         while (ch != EOF && isspace(ch)) {
 359             if (lex->flags.preprocessing) {
 360                 if (ch == '\n') {
 361                     /* end-of-line */
 362                     /* see if there was whitespace first */
 363                     if (haswhite) { /* (lex->tok.value_count) { */
 364                         lex_ungetch(lex, ch);
 365                         if (!lex_endtoken(lex))
 366                             return TOKEN_FATAL;
 367                         return TOKEN_WHITE;
 368                     }
 369                     /* otherwise return EOL */
 370                     return TOKEN_EOL;
 371                 }
 372                 haswhite = true;
 373                 if (!lex_tokench(lex, ch))
 374                     return TOKEN_FATAL;
 375             }
 376             ch = lex_getch(lex);
 377         }
 378
 379         if (ch == '/') {
 380             ch = lex_getch(lex);
 381             if (ch == '/')
 382             {
 383                 /* one line comment */
 384                 ch = lex_getch(lex);
 385
 386                 if (lex->flags.preprocessing) {
 387                     haswhite = true;
 388                     if (!lex_tokench(lex, '/') ||
 389                         !lex_tokench(lex, '/'))
 390                     {
 391                         return TOKEN_FATAL;
 392                     }
 393                 }
 394
 395                 while (ch != EOF && ch != '\n') {
 396                     if (lex->flags.preprocessing && !lex_tokench(lex, ch))
 397                         return TOKEN_FATAL;
 398                     ch = lex_getch(lex);
 399                 }
 400                 if (lex->flags.preprocessing) {
 401                     lex_ungetch(lex, '\n');
 402                     if (!lex_endtoken(lex))
 403                         return TOKEN_FATAL;
 404                     return TOKEN_WHITE;
 405                 }
 406                 continue;
 407             }
 408             if (ch == '*')
 409             {
 410                 /* multiline comment */
 411                 if (lex->flags.preprocessing) {
 412                     haswhite = true;
 413                     if (!lex_tokench(lex, '/') ||
 414                         !lex_tokench(lex, '*'))
 415                     {
 416                         return TOKEN_FATAL;
 417                     }
 418                 }
 419
 420                 while (ch != EOF)
 421                 {
 422                     ch = lex_getch(lex);
 423                     if (ch == '*') {
 424                         ch = lex_getch(lex);
 425                         if (ch == '/') {
 426                             if (lex->flags.preprocessing) {
 427                                 if (!lex_tokench(lex, '*') ||
 428                                     !lex_tokench(lex, '/'))
 429                                 {
 430                                     return TOKEN_FATAL;
 431                                 }
 432                             }
 433                             break;
 434                         }
 435                     }
 436                     if (lex->flags.preprocessing) {
 437                         if (!lex_tokench(lex, ch))
 438                             return TOKEN_FATAL;
 439                     }
 440                 }
 441                 ch = ' '; /* cause TRUE in the isspace check */
 442                 continue;
 443             }
 444             /* Otherwise roll back to the slash and break out of the loop */
 445             lex_ungetch(lex, ch);
 446             ch = '/';
 447             break;
 448         }
 449     } while (ch != EOF && isspace(ch));
 450
 451     if (haswhite) {
 452         if (!lex_endtoken(lex))
 453             return TOKEN_FATAL;
 454         lex_ungetch(lex, ch);
 455         return TOKEN_WHITE;
 456     }
 457     return ch;
 458 }
 459
 460 /* Get a token */
 461 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 462 {
 463     int ch;
 464
 465     ch = lex_getch(lex);
 466     while (ch != EOF && isident(ch))
 467     {
 468         if (!lex_tokench(lex, ch))
 469             return (lex->tok.ttype = TOKEN_FATAL);
 470         ch = lex_getch(lex);
 471     }
 472
 473     /* last ch was not an ident ch: */
 474     lex_ungetch(lex, ch);
 475
 476     return true;
 477 }
 478
 479 /* read one ident for the frame list */
 480 static int lex_parse_frame(lex_file *lex)
 481 {
 482     int ch;
 483
 484     lex_token_new(lex);
 485
 486     ch = lex_getch(lex);
 487     while (ch != EOF && ch != '\n' && isspace(ch))
 488         ch = lex_getch(lex);
 489
 490     if (ch == '\n')
 491         return 1;
 492
 493     if (!isident_start(ch)) {
 494         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 495         return -1;
 496     }
 497
 498     if (!lex_tokench(lex, ch))
 499         return -1;
 500     if (!lex_finish_ident(lex))
 501         return -1;
 502     if (!lex_endtoken(lex))
 503         return -1;
 504     return 0;
 505 }
 506
 507 /* read a list of $frames */
 508 static bool lex_finish_frames(lex_file *lex)
 509 {
 510     do {
 511         size_t i;
 512         int    rc;
 513         frame_macro m;
 514
 515         rc = lex_parse_frame(lex);
 516         if (rc > 0) /* end of line */
 517             return true;
 518         if (rc < 0) /* error */
 519             return false;
 520
 521         for (i = 0; i < lex->frames_count; ++i) {
 522             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 523                 lex->frames[i].value = lex->framevalue++;
 524                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 525                     return false;
 526                 break;
 527             }
 528         }
 529         if (i < lex->frames_count)
 530             continue;
 531
 532         m.value = lex->framevalue++;
 533         m.name = lex->tok.value;
 534         lex->tok.value = NULL;
 535         lex->tok.value_alloc = lex->tok.value_count = 0;
 536         if (!lex_file_frames_add(lex, m)) {
 537             lexerror(lex, "out of memory");
 538             return false;
 539         }
 540     } while (true);
 541 }
 542
 543 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 544 {
 545     int ch = 0;
 546
 547     while (ch != EOF)
 548     {
 549         ch = lex_getch(lex);
 550         if (ch == quote)
 551             return TOKEN_STRINGCONST;
 552
 553         if (!lex->flags.preprocessing && ch == '\\') {
 554             ch = lex_getch(lex);
 555             if (ch == EOF) {
 556                 lexerror(lex, "unexpected end of file");
 557                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 558                 return (lex->tok.ttype = TOKEN_ERROR);
 559             }
 560
 561             switch (ch) {
 562             case '\\': break;
 563             case 'a':  ch = '\a'; break;
 564             case 'b':  ch = '\b'; break;
 565             case 'r':  ch = '\r'; break;
 566             case 'n':  ch = '\n'; break;
 567             case 't':  ch = '\t'; break;
 568             case 'f':  ch = '\f'; break;
 569             case 'v':  ch = '\v'; break;
 570             default:
 571                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 572                 /* so we just add the character plus backslash no matter what it actually is */
 573                 if (!lex_tokench(lex, '\\'))
 574                     return (lex->tok.ttype = TOKEN_FATAL);
 575             }
 576             /* add the character finally */
 577             if (!lex_tokench(lex, ch))
 578                 return (lex->tok.ttype = TOKEN_FATAL);
 579         }
 580         else if (!lex_tokench(lex, ch))
 581             return (lex->tok.ttype = TOKEN_FATAL);
 582     }
 583     lexerror(lex, "unexpected end of file within string constant");
 584     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 585     return (lex->tok.ttype = TOKEN_ERROR);
 586 }
 587
 588 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 589 {
 590     bool ishex = false;
 591
 592     int  ch = lastch;
 593
 594     /* parse a number... */
 595     lex->tok.ttype = TOKEN_INTCONST;
 596
 597     if (!lex_tokench(lex, ch))
 598         return (lex->tok.ttype = TOKEN_FATAL);
 599
 600     ch = lex_getch(lex);
 601     if (ch != '.' && !isdigit(ch))
 602     {
 603         if (lastch != '0' || ch != 'x')
 604         {
 605             /* end of the number or EOF */
 606             lex_ungetch(lex, ch);
 607             if (!lex_endtoken(lex))
 608                 return (lex->tok.ttype = TOKEN_FATAL);
 609
 610             lex->tok.constval.i = lastch - '0';
 611             return lex->tok.ttype;
 612         }
 613
 614         ishex = true;
 615     }
 616
 617     /* EOF would have been caught above */
 618
 619     if (ch != '.')
 620     {
 621         if (!lex_tokench(lex, ch))
 622             return (lex->tok.ttype = TOKEN_FATAL);
 623         ch = lex_getch(lex);
 624         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 625         {
 626             if (!lex_tokench(lex, ch))
 627                 return (lex->tok.ttype = TOKEN_FATAL);
 628             ch = lex_getch(lex);
 629         }
 630     }
 631     /* NOT else, '.' can come from above as well */
 632     if (ch == '.' && !ishex)
 633     {
 634         /* Allow floating comma in non-hex mode */
 635         lex->tok.ttype = TOKEN_FLOATCONST;
 636         if (!lex_tokench(lex, ch))
 637             return (lex->tok.ttype = TOKEN_FATAL);
 638
 639         /* continue digits-only */
 640         ch = lex_getch(lex);
 641         while (isdigit(ch))
 642         {
 643             if (!lex_tokench(lex, ch))
 644                 return (lex->tok.ttype = TOKEN_FATAL);
 645             ch = lex_getch(lex);
 646         }
 647     }
 648     /* put back the last character */
 649     /* but do not put back the trailing 'f' or a float */
 650     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 651         ch = lex_getch(lex);
 652
 653     /* generally we don't want words to follow numbers: */
 654     if (isident(ch)) {
 655         lexerror(lex, "unexpected trailing characters after number");
 656         return (lex->tok.ttype = TOKEN_ERROR);
 657     }
 658     lex_ungetch(lex, ch);
 659
 660     if (!lex_endtoken(lex))
 661         return (lex->tok.ttype = TOKEN_FATAL);
 662     if (lex->tok.ttype == TOKEN_FLOATCONST)
 663         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 664     else
 665         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 666     return lex->tok.ttype;
 667 }
 668
 669 int lex_do(lex_file *lex)
 670 {
 671     int ch, nextch;
 672
 673     lex_token_new(lex);
 674 #if 0
 675     if (!lex->tok)
 676         return TOKEN_FATAL;
 677 #endif
 678
 679     ch = lex_skipwhite(lex);
 680     lex->sline = lex->line;
 681     lex->tok.ctx.line = lex->sline;
 682     lex->tok.ctx.file = lex->name;
 683
 684     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 685         return (lex->tok.ttype = ch);
 686     }
 687
 688     if (lex->eof)
 689         return (lex->tok.ttype = TOKEN_FATAL);
 690
 691     if (ch == EOF) {
 692         lex->eof = true;
 693         return (lex->tok.ttype = TOKEN_EOF);
 694     }
 695
 696     /* modelgen / spiritgen commands */
 697     if (ch == '$') {
 698         const char *v;
 699         size_t frame;
 700
 701         ch = lex_getch(lex);
 702         if (!isident_start(ch)) {
 703             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 704             return lex_do(lex);
 705         }
 706         if (!lex_tokench(lex, ch))
 707             return (lex->tok.ttype = TOKEN_FATAL);
 708         if (!lex_finish_ident(lex))
 709             return (lex->tok.ttype = TOKEN_ERROR);
 710         if (!lex_endtoken(lex))
 711             return (lex->tok.ttype = TOKEN_FATAL);
 712         /* skip the known commands */
 713         v = lex->tok.value;
 714
 715         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 716         {
 717             /* frame/framesave command works like an enum
 718              * similar to fteqcc we handle this in the lexer.
 719              * The reason for this is that it is sensitive to newlines,
 720              * which the parser is unaware of
 721              */
 722             if (!lex_finish_frames(lex))
 723                  return (lex->tok.ttype = TOKEN_ERROR);
 724             return lex_do(lex);
 725         }
 726
 727         if (!strcmp(v, "framevalue"))
 728         {
 729             ch = lex_getch(lex);
 730             while (ch != EOF && isspace(ch) && ch != '\n')
 731                 ch = lex_getch(lex);
 732
 733             if (!isdigit(ch)) {
 734                 lexerror(lex, "$framevalue requires an integer parameter");
 735                 return lex_do(lex);
 736             }
 737
 738             lex_token_new(lex);
 739             lex->tok.ttype = lex_finish_digit(lex, ch);
 740             if (!lex_endtoken(lex))
 741                 return (lex->tok.ttype = TOKEN_FATAL);
 742             if (lex->tok.ttype != TOKEN_INTCONST) {
 743                 lexerror(lex, "$framevalue requires an integer parameter");
 744                 return lex_do(lex);
 745             }
 746             lex->framevalue = lex->tok.constval.i;
 747             return lex_do(lex);
 748         }
 749
 750         if (!strcmp(v, "framerestore"))
 751         {
 752             int rc;
 753
 754             lex_token_new(lex);
 755
 756             rc = lex_parse_frame(lex);
 757
 758             if (rc > 0) {
 759                 lexerror(lex, "$framerestore requires a framename parameter");
 760                 return lex_do(lex);
 761             }
 762             if (rc < 0)
 763                 return (lex->tok.ttype = TOKEN_FATAL);
 764
 765             v = lex->tok.value;
 766             for (frame = 0; frame < lex->frames_count; ++frame) {
 767                 if (!strcmp(v, lex->frames[frame].name)) {
 768                     lex->framevalue = lex->frames[frame].value;
 769                     return lex_do(lex);
 770                 }
 771             }
 772             lexerror(lex, "unknown framename `%s`", v);
 773             return lex_do(lex);
 774         }
 775
 776         if (!strcmp(v, "modelname"))
 777         {
 778             int rc;
 779
 780             lex_token_new(lex);
 781
 782             rc = lex_parse_frame(lex);
 783
 784             if (rc > 0) {
 785                 lexerror(lex, "$framerestore requires a framename parameter");
 786                 return lex_do(lex);
 787             }
 788             if (rc < 0)
 789                 return (lex->tok.ttype = TOKEN_FATAL);
 790
 791             v = lex->tok.value;
 792             if (lex->modelname) {
 793                 frame_macro m;
 794                 m.value = lex->framevalue;
 795                 m.name = lex->modelname;
 796                 lex->modelname = NULL;
 797                 if (!lex_file_frames_add(lex, m)) {
 798                     lexerror(lex, "out of memory");
 799                     return (lex->tok.ttype = TOKEN_FATAL);
 800                 }
 801             }
 802             lex->modelname = lex->tok.value;
 803             lex->tok.value = NULL;
 804             lex->tok.value_alloc = lex->tok.value_count = 0;
 805             for (frame = 0; frame < lex->frames_count; ++frame) {
 806                 if (!strcmp(v, lex->frames[frame].name)) {
 807                     lex->framevalue = lex->frames[frame].value;
 808                     break;
 809                 }
 810             }
 811             return lex_do(lex);
 812         }
 813
 814         if (!strcmp(v, "flush"))
 815         {
 816             size_t frame;
 817             for (frame = 0; frame < lex->frames_count; ++frame)
 818                 mem_d(lex->frames[frame].name);
 819             MEM_VECTOR_CLEAR(lex, frames);
 820             /* skip line (fteqcc does it too) */
 821             ch = lex_getch(lex);
 822             while (ch != EOF && ch != '\n')
 823                 ch = lex_getch(lex);
 824             return lex_do(lex);
 825         }
 826
 827         if (!strcmp(v, "cd") ||
 828             !strcmp(v, "origin") ||
 829             !strcmp(v, "base") ||
 830             !strcmp(v, "flags") ||
 831             !strcmp(v, "scale") ||
 832             !strcmp(v, "skin"))
 833         {
 834             /* skip line */
 835             ch = lex_getch(lex);
 836             while (ch != EOF && ch != '\n')
 837                 ch = lex_getch(lex);
 838             return lex_do(lex);
 839         }
 840
 841         for (frame = 0; frame < lex->frames_count; ++frame) {
 842             if (!strcmp(v, lex->frames[frame].name)) {
 843                 lex->tok.constval.i = lex->frames[frame].value;
 844                 return (lex->tok.ttype = TOKEN_INTCONST);
 845             }
 846         }
 847
 848         lexerror(lex, "invalid frame macro");
 849         return lex_do(lex);
 850     }
 851
 852     /* single-character tokens */
 853     switch (ch)
 854     {
 855         case '(':
 856             if (!lex_tokench(lex, ch) ||
 857                 !lex_endtoken(lex))
 858             {
 859                 return (lex->tok.ttype = TOKEN_FATAL);
 860             }
 861             if (lex->flags.noops)
 862                 return (lex->tok.ttype = ch);
 863             else
 864                 return (lex->tok.ttype = TOKEN_OPERATOR);
 865         case ')':
 866         case ';':
 867         case '{':
 868         case '}':
 869         case '[':
 870             if (!lex_tokench(lex, ch) ||
 871                 !lex_endtoken(lex))
 872             {
 873                 return (lex->tok.ttype = TOKEN_FATAL);
 874             }
 875             if (!lex->flags.noops)
 876                 return (lex->tok.ttype = TOKEN_OPERATOR);
 877         case ']':
 878
 879         case '#':
 880             if (!lex_tokench(lex, ch) ||
 881                 !lex_endtoken(lex))
 882             {
 883                 return (lex->tok.ttype = TOKEN_FATAL);
 884             }
 885             return (lex->tok.ttype = ch);
 886         default:
 887             break;
 888     }
 889
 890     if (lex->flags.noops)
 891     {
 892         /* Detect characters early which are normally
 893          * operators OR PART of an operator.
 894          */
 895         switch (ch)
 896         {
 897             case '+':
 898             case '-':
 899             case '*':
 900             case '/':
 901             case '<':
 902             case '>':
 903             case '=':
 904             case '&':
 905             case '|':
 906             case '^':
 907             case '~':
 908             case ',':
 909             case '!':
 910                 if (!lex_tokench(lex, ch) ||
 911                     !lex_endtoken(lex))
 912                 {
 913                     return (lex->tok.ttype = TOKEN_FATAL);
 914                 }
 915                 return (lex->tok.ttype = ch);
 916             default:
 917                 break;
 918         }
 919
 920         if (ch == '.')
 921         {
 922             if (!lex_tokench(lex, ch))
 923                 return (lex->tok.ttype = TOKEN_FATAL);
 924             /* peak ahead once */
 925             nextch = lex_getch(lex);
 926             if (nextch != '.') {
 927                 lex_ungetch(lex, nextch);
 928                 if (!lex_endtoken(lex))
 929                     return (lex->tok.ttype = TOKEN_FATAL);
 930                 return (lex->tok.ttype = ch);
 931             }
 932             /* peak ahead again */
 933             nextch = lex_getch(lex);
 934             if (nextch != '.') {
 935                 lex_ungetch(lex, nextch);
 936                 lex_ungetch(lex, nextch);
 937                 if (!lex_endtoken(lex))
 938                     return (lex->tok.ttype = TOKEN_FATAL);
 939                 return (lex->tok.ttype = ch);
 940             }
 941             /* fill the token to be "..." */
 942             if (!lex_tokench(lex, ch) ||
 943                 !lex_tokench(lex, ch) ||
 944                 !lex_endtoken(lex))
 945             {
 946                 return (lex->tok.ttype = TOKEN_FATAL);
 947             }
 948             return (lex->tok.ttype = TOKEN_DOTS);
 949         }
 950     }
 951
 952     if (ch == ',' || ch == '.') {
 953         if (!lex_tokench(lex, ch) ||
 954             !lex_endtoken(lex))
 955         {
 956             return (lex->tok.ttype = TOKEN_FATAL);
 957         }
 958         return (lex->tok.ttype = TOKEN_OPERATOR);
 959     }
 960
 961     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 962         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 963         ch == '=' || ch == '!' || /* ==, != */
 964         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 965     {
 966         if (!lex_tokench(lex, ch))
 967             return (lex->tok.ttype = TOKEN_FATAL);
 968
 969         nextch = lex_getch(lex);
 970         if (nextch == ch || nextch == '=') {
 971             if (!lex_tokench(lex, nextch))
 972                 return (lex->tok.ttype = TOKEN_FATAL);
 973         } else if (ch == '-' && nextch == '>') {
 974             if (!lex_tokench(lex, nextch))
 975                 return (lex->tok.ttype = TOKEN_FATAL);
 976         } else
 977             lex_ungetch(lex, nextch);
 978
 979         if (!lex_endtoken(lex))
 980             return (lex->tok.ttype = TOKEN_FATAL);
 981         return (lex->tok.ttype = TOKEN_OPERATOR);
 982     }
 983
 984     /*
 985     if (ch == '^' || ch == '~' || ch == '!')
 986     {
 987         if (!lex_tokench(lex, ch) ||
 988             !lex_endtoken(lex))
 989         {
 990             return (lex->tok.ttype = TOKEN_FATAL);
 991         }
 992         return (lex->tok.ttype = TOKEN_OPERATOR);
 993     }
 994     */
 995
 996     if (ch == '*' || ch == '/') /* *=, /= */
 997     {
 998         if (!lex_tokench(lex, ch))
 999             return (lex->tok.ttype = TOKEN_FATAL);
1000
1001         nextch = lex_getch(lex);
1002         if (nextch == '=') {
1003             if (!lex_tokench(lex, nextch))
1004                 return (lex->tok.ttype = TOKEN_FATAL);
1005         } else
1006             lex_ungetch(lex, nextch);
1007
1008         if (!lex_endtoken(lex))
1009             return (lex->tok.ttype = TOKEN_FATAL);
1010         return (lex->tok.ttype = TOKEN_OPERATOR);
1011     }
1012
1013     if (isident_start(ch))
1014     {
1015         const char *v;
1016
1017         if (!lex_tokench(lex, ch))
1018             return (lex->tok.ttype = TOKEN_FATAL);
1019         if (!lex_finish_ident(lex)) {
1020             /* error? */
1021             return (lex->tok.ttype = TOKEN_ERROR);
1022         }
1023         if (!lex_endtoken(lex))
1024             return (lex->tok.ttype = TOKEN_FATAL);
1025         lex->tok.ttype = TOKEN_IDENT;
1026
1027         v = lex->tok.value;
1028         if (!strcmp(v, "void")) {
1029             lex->tok.ttype = TOKEN_TYPENAME;
1030             lex->tok.constval.t = TYPE_VOID;
1031         } else if (!strcmp(v, "int")) {
1032             lex->tok.ttype = TOKEN_TYPENAME;
1033             lex->tok.constval.t = TYPE_INTEGER;
1034         } else if (!strcmp(v, "float")) {
1035             lex->tok.ttype = TOKEN_TYPENAME;
1036             lex->tok.constval.t = TYPE_FLOAT;
1037         } else if (!strcmp(v, "string")) {
1038             lex->tok.ttype = TOKEN_TYPENAME;
1039             lex->tok.constval.t = TYPE_STRING;
1040         } else if (!strcmp(v, "entity")) {
1041             lex->tok.ttype = TOKEN_TYPENAME;
1042             lex->tok.constval.t = TYPE_ENTITY;
1043         } else if (!strcmp(v, "vector")) {
1044             lex->tok.ttype = TOKEN_TYPENAME;
1045             lex->tok.constval.t = TYPE_VECTOR;
1046         } else if (!strcmp(v, "for")  ||
1047                  !strcmp(v, "while")  ||
1048                  !strcmp(v, "do")     ||
1049                  !strcmp(v, "if")     ||
1050                  !strcmp(v, "else")   ||
1051                  !strcmp(v, "local")  ||
1052                  !strcmp(v, "return") ||
1053                  !strcmp(v, "const"))
1054         {
1055             lex->tok.ttype = TOKEN_KEYWORD;
1056         }
1057         else if (opts_standard != COMPILER_QCC)
1058         {
1059             /* other standards reserve these keywords */
1060             if (!strcmp(v, "switch") ||
1061                 !strcmp(v, "struct") ||
1062                 !strcmp(v, "union")  ||
1063                 !strcmp(v, "break")  ||
1064                 !strcmp(v, "continue") ||
1065                 !strcmp(v, "var"))
1066             {
1067                 lex->tok.ttype = TOKEN_KEYWORD;
1068             }
1069         }
1070
1071         return lex->tok.ttype;
1072     }
1073
1074     if (ch == '"')
1075     {
1076         lex->flags.nodigraphs = true;
1077         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1078             return TOKEN_FATAL;
1079         lex->tok.ttype = lex_finish_string(lex, '"');
1080         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1081             return TOKEN_FATAL;
1082         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1083         {
1084             /* Allow c style "string" "continuation" */
1085             ch = lex_skipwhite(lex);
1086             if (ch != '"') {
1087                 lex_ungetch(lex, ch);
1088                 break;
1089             }
1090
1091             lex->tok.ttype = lex_finish_string(lex, '"');
1092         }
1093         lex->flags.nodigraphs = false;
1094         if (!lex_endtoken(lex))
1095             return (lex->tok.ttype = TOKEN_FATAL);
1096         return lex->tok.ttype;
1097     }
1098
1099     if (ch == '\'')
1100     {
1101         /* we parse character constants like string,
1102          * but return TOKEN_CHARCONST, or a vector type if it fits...
1103          * Likewise actual unescaping has to be done by the parser.
1104          * The difference is we don't allow 'char' 'continuation'.
1105          */
1106         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1107             return TOKEN_FATAL;
1108         lex->tok.ttype = lex_finish_string(lex, '\'');
1109         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1110             return TOKEN_FATAL;
1111         if (!lex_endtoken(lex))
1112             return (lex->tok.ttype = TOKEN_FATAL);
1113
1114          /* It's a vector if we can successfully scan 3 floats */
1115 #ifdef WIN32
1116         if (sscanf_s(lex->tok.value, " %f %f %f ",
1117                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1118 #else
1119         if (sscanf(lex->tok.value, " %f %f %f ",
1120                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1121 #endif
1122
1123         {
1124              lex->tok.ttype = TOKEN_VECTORCONST;
1125         }
1126
1127         return lex->tok.ttype;
1128     }
1129
1130     if (isdigit(ch))
1131     {
1132         lex->tok.ttype = lex_finish_digit(lex, ch);
1133         if (!lex_endtoken(lex))
1134             return (lex->tok.ttype = TOKEN_FATAL);
1135         return lex->tok.ttype;
1136     }
1137
1138     lexerror(lex, "unknown token");
1139     return (lex->tok.ttype = TOKEN_ERROR);
1140 }