lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16         va_list ap;
  17
  18         va_start(ap, fmt);
  19     vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  20         va_end(ap);
  21 }
  22
  23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  24 {
  25         va_list ap;
  26         int lvl = LVL_WARNING;
  27
  28     if (!OPTS_WARN(warntype))
  29         return false;
  30
  31     if (opts_werror)
  32             lvl = LVL_ERROR;
  33
  34         va_start(ap, fmt);
  35     vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  36         va_end(ap);
  37
  38         return opts_werror;
  39 }
  40
  41
  42 #if 0
  43 token* token_new()
  44 {
  45     token *tok = (token*)mem_a(sizeof(token));
  46     if (!tok)
  47         return NULL;
  48     memset(tok, 0, sizeof(*tok));
  49     return tok;
  50 }
  51
  52 void token_delete(token *self)
  53 {
  54     if (self->next && self->next->prev == self)
  55         self->next->prev = self->prev;
  56     if (self->prev && self->prev->next == self)
  57         self->prev->next = self->next;
  58     MEM_VECTOR_CLEAR(self, value);
  59     mem_d(self);
  60 }
  61
  62 token* token_copy(const token *cp)
  63 {
  64     token* self = token_new();
  65     if (!self)
  66         return NULL;
  67     /* copy the value */
  68     self->value_alloc = cp->value_count + 1;
  69     self->value_count = cp->value_count;
  70     self->value = (char*)mem_a(self->value_alloc);
  71     if (!self->value) {
  72         mem_d(self);
  73         return NULL;
  74     }
  75     memcpy(self->value, cp->value, cp->value_count);
  76     self->value[self->value_alloc-1] = 0;
  77
  78     /* rest */
  79     self->ctx = cp->ctx;
  80     self->ttype = cp->ttype;
  81     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  82     return self;
  83 }
  84
  85 void token_delete_all(token *t)
  86 {
  87     token *n;
  88
  89     do {
  90         n = t->next;
  91         token_delete(t);
  92         t = n;
  93     } while(t);
  94 }
  95
  96 token* token_copy_all(const token *cp)
  97 {
  98     token *cur;
  99     token *out;
 100
 101     out = cur = token_copy(cp);
 102     if (!out)
 103         return NULL;
 104
 105     while (cp->next) {
 106         cp = cp->next;
 107         cur->next = token_copy(cp);
 108         if (!cur->next) {
 109             token_delete_all(out);
 110             return NULL;
 111         }
 112         cur->next->prev = cur;
 113         cur = cur->next;
 114     }
 115
 116     return out;
 117 }
 118 #else
 119 static void lex_token_new(lex_file *lex)
 120 {
 121 #if 0
 122     if (lex->tok)
 123         token_delete(lex->tok);
 124     lex->tok = token_new();
 125 #else
 126     lex->tok.value_count = 0;
 127     lex->tok.constval.t  = 0;
 128     lex->tok.ctx.line = lex->sline;
 129     lex->tok.ctx.file = lex->name;
 130 #endif
 131 }
 132 #endif
 133
 134 lex_file* lex_open(const char *file)
 135 {
 136     lex_file *lex;
 137     FILE *in = util_fopen(file, "rb");
 138
 139     if (!in) {
 140         lexerror(NULL, "open failed: '%s'\n", file);
 141         return NULL;
 142     }
 143
 144     lex = (lex_file*)mem_a(sizeof(*lex));
 145     if (!lex) {
 146         fclose(in);
 147         lexerror(NULL, "out of memory\n");
 148         return NULL;
 149     }
 150
 151     memset(lex, 0, sizeof(*lex));
 152
 153     lex->file = in;
 154     lex->name = util_strdup(file);
 155     lex->line = 1; /* we start counting at 1 */
 156
 157     lex->peekpos = 0;
 158     lex->eof = false;
 159
 160     lex_filenames_add(lex->name);
 161
 162     return lex;
 163 }
 164
 165 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 166 {
 167     lex_file *lex;
 168
 169     lex = (lex_file*)mem_a(sizeof(*lex));
 170     if (!lex) {
 171         lexerror(NULL, "out of memory\n");
 172         return NULL;
 173     }
 174
 175     memset(lex, 0, sizeof(*lex));
 176
 177     lex->file = NULL;
 178     lex->open_string        = str;
 179     lex->open_string_length = len;
 180     lex->open_string_pos    = 0;
 181
 182     lex->name = util_strdup(name ? name : "<string-source>");
 183     lex->line = 1; /* we start counting at 1 */
 184
 185     lex->peekpos = 0;
 186     lex->eof = false;
 187
 188     lex_filenames_add(lex->name);
 189
 190     return lex;
 191 }
 192
 193 void lex_cleanup(void)
 194 {
 195     size_t i;
 196     for (i = 0; i < lex_filenames_elements; ++i)
 197         mem_d(lex_filenames_data[i]);
 198     mem_d(lex_filenames_data);
 199 }
 200
 201 void lex_close(lex_file *lex)
 202 {
 203     size_t i;
 204     for (i = 0; i < lex->frames_count; ++i)
 205         mem_d(lex->frames[i].name);
 206     MEM_VECTOR_CLEAR(lex, frames);
 207
 208     if (lex->modelname)
 209         mem_d(lex->modelname);
 210
 211     if (lex->file)
 212         fclose(lex->file);
 213 #if 0
 214     if (lex->tok)
 215         token_delete(lex->tok);
 216 #else
 217     MEM_VECTOR_CLEAR(&(lex->tok), value);
 218 #endif
 219     /* mem_d(lex->name); collected in lex_filenames */
 220     mem_d(lex);
 221 }
 222
 223 static int lex_fgetc(lex_file *lex)
 224 {
 225     if (lex->file)
 226         return fgetc(lex->file);
 227     if (lex->open_string) {
 228         if (lex->open_string_pos >= lex->open_string_length)
 229             return EOF;
 230         return lex->open_string[lex->open_string_pos++];
 231     }
 232     return EOF;
 233 }
 234
 235 /* Get or put-back data
 236  * The following to functions do NOT understand what kind of data they
 237  * are working on.
 238  * The are merely wrapping get/put in order to count line numbers.
 239  */
 240 static void lex_ungetch(lex_file *lex, int ch);
 241 static int lex_try_trigraph(lex_file *lex, int old)
 242 {
 243     int c2, c3;
 244     c2 = lex_fgetc(lex);
 245     if (c2 != '?') {
 246         lex_ungetch(lex, c2);
 247         return old;
 248     }
 249
 250     c3 = lex_fgetc(lex);
 251     switch (c3) {
 252         case '=': return '#';
 253         case '/': return '\\';
 254         case '\'': return '^';
 255         case '(': return '[';
 256         case ')': return ']';
 257         case '!': return '|';
 258         case '<': return '{';
 259         case '>': return '}';
 260         case '-': return '~';
 261         default:
 262             lex_ungetch(lex, c3);
 263             lex_ungetch(lex, c2);
 264             return old;
 265     }
 266 }
 267
 268 static int lex_try_digraph(lex_file *lex, int ch)
 269 {
 270     int c2;
 271     c2 = lex_fgetc(lex);
 272     if      (ch == '<' && c2 == ':')
 273         return '[';
 274     else if (ch == ':' && c2 == '>')
 275         return ']';
 276     else if (ch == '<' && c2 == '%')
 277         return '{';
 278     else if (ch == '%' && c2 == '>')
 279         return '}';
 280     else if (ch == '%' && c2 == ':')
 281         return '#';
 282     lex_ungetch(lex, c2);
 283     return ch;
 284 }
 285
 286 static int lex_getch(lex_file *lex)
 287 {
 288     int ch;
 289
 290     if (lex->peekpos) {
 291         lex->peekpos--;
 292         if (lex->peek[lex->peekpos] == '\n')
 293             lex->line++;
 294         return lex->peek[lex->peekpos];
 295     }
 296
 297     ch = lex_fgetc(lex);
 298     if (ch == '\n')
 299         lex->line++;
 300     else if (ch == '?')
 301         return lex_try_trigraph(lex, ch);
 302     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 303         return lex_try_digraph(lex, ch);
 304     return ch;
 305 }
 306
 307 static void lex_ungetch(lex_file *lex, int ch)
 308 {
 309     lex->peek[lex->peekpos++] = ch;
 310     if (ch == '\n')
 311         lex->line--;
 312 }
 313
 314 /* classify characters
 315  * some additions to the is*() functions of ctype.h
 316  */
 317
 318 /* Idents are alphanumberic, but they start with alpha or _ */
 319 static bool isident_start(int ch)
 320 {
 321     return isalpha(ch) || ch == '_';
 322 }
 323
 324 static bool isident(int ch)
 325 {
 326     return isident_start(ch) || isdigit(ch);
 327 }
 328
 329 /* isxdigit_only is used when we already know it's not a digit
 330  * and want to see if it's a hex digit anyway.
 331  */
 332 static bool isxdigit_only(int ch)
 333 {
 334     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 335 }
 336
 337 /* Append a character to the token buffer */
 338 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 339 {
 340     if (!token_value_add(&lex->tok, ch)) {
 341         lexerror(lex, "out of memory");
 342         return false;
 343     }
 344     return true;
 345 }
 346
 347 /* Append a trailing null-byte */
 348 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 349 {
 350     if (!token_value_add(&lex->tok, 0)) {
 351         lexerror(lex, "out of memory");
 352         return false;
 353     }
 354     lex->tok.value_count--;
 355     return true;
 356 }
 357
 358 /* Skip whitespace and comments and return the first
 359  * non-white character.
 360  * As this makes use of the above getch() ungetch() functions,
 361  * we don't need to care at all about line numbering anymore.
 362  *
 363  * In theory, this function should only be used at the beginning
 364  * of lexing, or when we *know* the next character is part of the token.
 365  * Otherwise, if the parser throws an error, the linenumber may not be
 366  * the line of the error, but the line of the next token AFTER the error.
 367  *
 368  * This is currently only problematic when using c-like string-continuation,
 369  * since comments and whitespaces are allowed between 2 such strings.
 370  * Example:
 371 printf(   "line one\n"
 372 // A comment
 373           "A continuation of the previous string"
 374 // This line is skipped
 375       , foo);
 376
 377  * In this case, if the parse decides it didn't actually want a string,
 378  * and uses lex->line to print an error, it will show the ', foo);' line's
 379  * linenumber.
 380  *
 381  * On the other hand, the parser is supposed to remember the line of the next
 382  * token's beginning. In this case we would want skipwhite() to be called
 383  * AFTER reading a token, so that the parser, before reading the NEXT token,
 384  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 385  *
 386  * THIS SOLUTION
 387  *    here is to store the line of the first character after skipping
 388  *    the initial whitespace in lex->sline, this happens in lex_do.
 389  */
 390 static int lex_skipwhite(lex_file *lex)
 391 {
 392     int ch = 0;
 393     bool haswhite = false;
 394
 395     do
 396     {
 397         ch = lex_getch(lex);
 398         while (ch != EOF && isspace(ch)) {
 399             if (lex->flags.preprocessing) {
 400                 if (ch == '\n') {
 401                     /* end-of-line */
 402                     /* see if there was whitespace first */
 403                     if (haswhite) { /* (lex->tok.value_count) { */
 404                         lex_ungetch(lex, ch);
 405                         if (!lex_endtoken(lex))
 406                             return TOKEN_FATAL;
 407                         return TOKEN_WHITE;
 408                     }
 409                     /* otherwise return EOL */
 410                     return TOKEN_EOL;
 411                 }
 412                 haswhite = true;
 413                 if (!lex_tokench(lex, ch))
 414                     return TOKEN_FATAL;
 415             }
 416             ch = lex_getch(lex);
 417         }
 418
 419         if (ch == '/') {
 420             ch = lex_getch(lex);
 421             if (ch == '/')
 422             {
 423                 /* one line comment */
 424                 ch = lex_getch(lex);
 425
 426                 if (lex->flags.preprocessing) {
 427                     haswhite = true;
 428                     if (!lex_tokench(lex, '/') ||
 429                         !lex_tokench(lex, '/'))
 430                     {
 431                         return TOKEN_FATAL;
 432                     }
 433                 }
 434
 435                 while (ch != EOF && ch != '\n') {
 436                     if (lex->flags.preprocessing && !lex_tokench(lex, ch))
 437                         return TOKEN_FATAL;
 438                     ch = lex_getch(lex);
 439                 }
 440                 if (lex->flags.preprocessing) {
 441                     lex_ungetch(lex, '\n');
 442                     if (!lex_endtoken(lex))
 443                         return TOKEN_FATAL;
 444                     return TOKEN_WHITE;
 445                 }
 446                 continue;
 447             }
 448             if (ch == '*')
 449             {
 450                 /* multiline comment */
 451                 if (lex->flags.preprocessing) {
 452                     haswhite = true;
 453                     if (!lex_tokench(lex, '/') ||
 454                         !lex_tokench(lex, '*'))
 455                     {
 456                         return TOKEN_FATAL;
 457                     }
 458                 }
 459
 460                 while (ch != EOF)
 461                 {
 462                     ch = lex_getch(lex);
 463                     if (ch == '*') {
 464                         ch = lex_getch(lex);
 465                         if (ch == '/') {
 466                             if (lex->flags.preprocessing) {
 467                                 if (!lex_tokench(lex, '*') ||
 468                                     !lex_tokench(lex, '/'))
 469                                 {
 470                                     return TOKEN_FATAL;
 471                                 }
 472                             }
 473                             break;
 474                         }
 475                     }
 476                     if (lex->flags.preprocessing) {
 477                         if (!lex_tokench(lex, ch))
 478                             return TOKEN_FATAL;
 479                     }
 480                 }
 481                 ch = ' '; /* cause TRUE in the isspace check */
 482                 continue;
 483             }
 484             /* Otherwise roll back to the slash and break out of the loop */
 485             lex_ungetch(lex, ch);
 486             ch = '/';
 487             break;
 488         }
 489     } while (ch != EOF && isspace(ch));
 490
 491     if (haswhite) {
 492         if (!lex_endtoken(lex))
 493             return TOKEN_FATAL;
 494         lex_ungetch(lex, ch);
 495         return TOKEN_WHITE;
 496     }
 497     return ch;
 498 }
 499
 500 /* Get a token */
 501 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 502 {
 503     int ch;
 504
 505     ch = lex_getch(lex);
 506     while (ch != EOF && isident(ch))
 507     {
 508         if (!lex_tokench(lex, ch))
 509             return (lex->tok.ttype = TOKEN_FATAL);
 510         ch = lex_getch(lex);
 511     }
 512
 513     /* last ch was not an ident ch: */
 514     lex_ungetch(lex, ch);
 515
 516     return true;
 517 }
 518
 519 /* read one ident for the frame list */
 520 static int lex_parse_frame(lex_file *lex)
 521 {
 522     int ch;
 523
 524     lex_token_new(lex);
 525
 526     ch = lex_getch(lex);
 527     while (ch != EOF && ch != '\n' && isspace(ch))
 528         ch = lex_getch(lex);
 529
 530     if (ch == '\n')
 531         return 1;
 532
 533     if (!isident_start(ch)) {
 534         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 535         return -1;
 536     }
 537
 538     if (!lex_tokench(lex, ch))
 539         return -1;
 540     if (!lex_finish_ident(lex))
 541         return -1;
 542     if (!lex_endtoken(lex))
 543         return -1;
 544     return 0;
 545 }
 546
 547 /* read a list of $frames */
 548 static bool lex_finish_frames(lex_file *lex)
 549 {
 550     do {
 551         size_t i;
 552         int    rc;
 553         frame_macro m;
 554
 555         rc = lex_parse_frame(lex);
 556         if (rc > 0) /* end of line */
 557             return true;
 558         if (rc < 0) /* error */
 559             return false;
 560
 561         for (i = 0; i < lex->frames_count; ++i) {
 562             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 563                 lex->frames[i].value = lex->framevalue++;
 564                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 565                     return false;
 566                 break;
 567             }
 568         }
 569         if (i < lex->frames_count)
 570             continue;
 571
 572         m.value = lex->framevalue++;
 573         m.name = lex->tok.value;
 574         lex->tok.value = NULL;
 575         lex->tok.value_alloc = lex->tok.value_count = 0;
 576         if (!lex_file_frames_add(lex, m)) {
 577             lexerror(lex, "out of memory");
 578             return false;
 579         }
 580     } while (true);
 581 }
 582
 583 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 584 {
 585     int ch = 0;
 586
 587     while (ch != EOF)
 588     {
 589         ch = lex_getch(lex);
 590         if (ch == quote)
 591             return TOKEN_STRINGCONST;
 592
 593         if (!lex->flags.preprocessing && ch == '\\') {
 594             ch = lex_getch(lex);
 595             if (ch == EOF) {
 596                 lexerror(lex, "unexpected end of file");
 597                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 598                 return (lex->tok.ttype = TOKEN_ERROR);
 599             }
 600
 601             switch (ch) {
 602             case '\\': break;
 603             case 'a':  ch = '\a'; break;
 604             case 'b':  ch = '\b'; break;
 605             case 'r':  ch = '\r'; break;
 606             case 'n':  ch = '\n'; break;
 607             case 't':  ch = '\t'; break;
 608             case 'f':  ch = '\f'; break;
 609             case 'v':  ch = '\v'; break;
 610             default:
 611                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 612                 /* so we just add the character plus backslash no matter what it actually is */
 613                 if (!lex_tokench(lex, '\\'))
 614                     return (lex->tok.ttype = TOKEN_FATAL);
 615             }
 616             /* add the character finally */
 617             if (!lex_tokench(lex, ch))
 618                 return (lex->tok.ttype = TOKEN_FATAL);
 619         }
 620         else if (!lex_tokench(lex, ch))
 621             return (lex->tok.ttype = TOKEN_FATAL);
 622     }
 623     lexerror(lex, "unexpected end of file within string constant");
 624     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 625     return (lex->tok.ttype = TOKEN_ERROR);
 626 }
 627
 628 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 629 {
 630     bool ishex = false;
 631
 632     int  ch = lastch;
 633
 634     /* parse a number... */
 635     lex->tok.ttype = TOKEN_INTCONST;
 636
 637     if (!lex_tokench(lex, ch))
 638         return (lex->tok.ttype = TOKEN_FATAL);
 639
 640     ch = lex_getch(lex);
 641     if (ch != '.' && !isdigit(ch))
 642     {
 643         if (lastch != '0' || ch != 'x')
 644         {
 645             /* end of the number or EOF */
 646             lex_ungetch(lex, ch);
 647             if (!lex_endtoken(lex))
 648                 return (lex->tok.ttype = TOKEN_FATAL);
 649
 650             lex->tok.constval.i = lastch - '0';
 651             return lex->tok.ttype;
 652         }
 653
 654         ishex = true;
 655     }
 656
 657     /* EOF would have been caught above */
 658
 659     if (ch != '.')
 660     {
 661         if (!lex_tokench(lex, ch))
 662             return (lex->tok.ttype = TOKEN_FATAL);
 663         ch = lex_getch(lex);
 664         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 665         {
 666             if (!lex_tokench(lex, ch))
 667                 return (lex->tok.ttype = TOKEN_FATAL);
 668             ch = lex_getch(lex);
 669         }
 670     }
 671     /* NOT else, '.' can come from above as well */
 672     if (ch == '.' && !ishex)
 673     {
 674         /* Allow floating comma in non-hex mode */
 675         lex->tok.ttype = TOKEN_FLOATCONST;
 676         if (!lex_tokench(lex, ch))
 677             return (lex->tok.ttype = TOKEN_FATAL);
 678
 679         /* continue digits-only */
 680         ch = lex_getch(lex);
 681         while (isdigit(ch))
 682         {
 683             if (!lex_tokench(lex, ch))
 684                 return (lex->tok.ttype = TOKEN_FATAL);
 685             ch = lex_getch(lex);
 686         }
 687     }
 688     /* put back the last character */
 689     /* but do not put back the trailing 'f' or a float */
 690     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 691         ch = lex_getch(lex);
 692
 693     /* generally we don't want words to follow numbers: */
 694     if (isident(ch)) {
 695         lexerror(lex, "unexpected trailing characters after number");
 696         return (lex->tok.ttype = TOKEN_ERROR);
 697     }
 698     lex_ungetch(lex, ch);
 699
 700     if (!lex_endtoken(lex))
 701         return (lex->tok.ttype = TOKEN_FATAL);
 702     if (lex->tok.ttype == TOKEN_FLOATCONST)
 703         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 704     else
 705         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 706     return lex->tok.ttype;
 707 }
 708
 709 int lex_do(lex_file *lex)
 710 {
 711     int ch, nextch;
 712
 713     lex_token_new(lex);
 714 #if 0
 715     if (!lex->tok)
 716         return TOKEN_FATAL;
 717 #endif
 718
 719     ch = lex_skipwhite(lex);
 720     lex->sline = lex->line;
 721     lex->tok.ctx.line = lex->sline;
 722     lex->tok.ctx.file = lex->name;
 723
 724     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 725         return (lex->tok.ttype = ch);
 726     }
 727
 728     if (lex->eof)
 729         return (lex->tok.ttype = TOKEN_FATAL);
 730
 731     if (ch == EOF) {
 732         lex->eof = true;
 733         return (lex->tok.ttype = TOKEN_EOF);
 734     }
 735
 736     /* modelgen / spiritgen commands */
 737     if (ch == '$') {
 738         const char *v;
 739         size_t frame;
 740
 741         ch = lex_getch(lex);
 742         if (!isident_start(ch)) {
 743             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 744             return lex_do(lex);
 745         }
 746         if (!lex_tokench(lex, ch))
 747             return (lex->tok.ttype = TOKEN_FATAL);
 748         if (!lex_finish_ident(lex))
 749             return (lex->tok.ttype = TOKEN_ERROR);
 750         if (!lex_endtoken(lex))
 751             return (lex->tok.ttype = TOKEN_FATAL);
 752         /* skip the known commands */
 753         v = lex->tok.value;
 754
 755         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 756         {
 757             /* frame/framesave command works like an enum
 758              * similar to fteqcc we handle this in the lexer.
 759              * The reason for this is that it is sensitive to newlines,
 760              * which the parser is unaware of
 761              */
 762             if (!lex_finish_frames(lex))
 763                  return (lex->tok.ttype = TOKEN_ERROR);
 764             return lex_do(lex);
 765         }
 766
 767         if (!strcmp(v, "framevalue"))
 768         {
 769             ch = lex_getch(lex);
 770             while (ch != EOF && isspace(ch) && ch != '\n')
 771                 ch = lex_getch(lex);
 772
 773             if (!isdigit(ch)) {
 774                 lexerror(lex, "$framevalue requires an integer parameter");
 775                 return lex_do(lex);
 776             }
 777
 778             lex_token_new(lex);
 779             lex->tok.ttype = lex_finish_digit(lex, ch);
 780             if (!lex_endtoken(lex))
 781                 return (lex->tok.ttype = TOKEN_FATAL);
 782             if (lex->tok.ttype != TOKEN_INTCONST) {
 783                 lexerror(lex, "$framevalue requires an integer parameter");
 784                 return lex_do(lex);
 785             }
 786             lex->framevalue = lex->tok.constval.i;
 787             return lex_do(lex);
 788         }
 789
 790         if (!strcmp(v, "framerestore"))
 791         {
 792             int rc;
 793
 794             lex_token_new(lex);
 795
 796             rc = lex_parse_frame(lex);
 797
 798             if (rc > 0) {
 799                 lexerror(lex, "$framerestore requires a framename parameter");
 800                 return lex_do(lex);
 801             }
 802             if (rc < 0)
 803                 return (lex->tok.ttype = TOKEN_FATAL);
 804
 805             v = lex->tok.value;
 806             for (frame = 0; frame < lex->frames_count; ++frame) {
 807                 if (!strcmp(v, lex->frames[frame].name)) {
 808                     lex->framevalue = lex->frames[frame].value;
 809                     return lex_do(lex);
 810                 }
 811             }
 812             lexerror(lex, "unknown framename `%s`", v);
 813             return lex_do(lex);
 814         }
 815
 816         if (!strcmp(v, "modelname"))
 817         {
 818             int rc;
 819
 820             lex_token_new(lex);
 821
 822             rc = lex_parse_frame(lex);
 823
 824             if (rc > 0) {
 825                 lexerror(lex, "$framerestore requires a framename parameter");
 826                 return lex_do(lex);
 827             }
 828             if (rc < 0)
 829                 return (lex->tok.ttype = TOKEN_FATAL);
 830
 831             v = lex->tok.value;
 832             if (lex->modelname) {
 833                 frame_macro m;
 834                 m.value = lex->framevalue;
 835                 m.name = lex->modelname;
 836                 lex->modelname = NULL;
 837                 if (!lex_file_frames_add(lex, m)) {
 838                     lexerror(lex, "out of memory");
 839                     return (lex->tok.ttype = TOKEN_FATAL);
 840                 }
 841             }
 842             lex->modelname = lex->tok.value;
 843             lex->tok.value = NULL;
 844             lex->tok.value_alloc = lex->tok.value_count = 0;
 845             for (frame = 0; frame < lex->frames_count; ++frame) {
 846                 if (!strcmp(v, lex->frames[frame].name)) {
 847                     lex->framevalue = lex->frames[frame].value;
 848                     break;
 849                 }
 850             }
 851             return lex_do(lex);
 852         }
 853
 854         if (!strcmp(v, "flush"))
 855         {
 856             size_t frame;
 857             for (frame = 0; frame < lex->frames_count; ++frame)
 858                 mem_d(lex->frames[frame].name);
 859             MEM_VECTOR_CLEAR(lex, frames);
 860             /* skip line (fteqcc does it too) */
 861             ch = lex_getch(lex);
 862             while (ch != EOF && ch != '\n')
 863                 ch = lex_getch(lex);
 864             return lex_do(lex);
 865         }
 866
 867         if (!strcmp(v, "cd") ||
 868             !strcmp(v, "origin") ||
 869             !strcmp(v, "base") ||
 870             !strcmp(v, "flags") ||
 871             !strcmp(v, "scale") ||
 872             !strcmp(v, "skin"))
 873         {
 874             /* skip line */
 875             ch = lex_getch(lex);
 876             while (ch != EOF && ch != '\n')
 877                 ch = lex_getch(lex);
 878             return lex_do(lex);
 879         }
 880
 881         for (frame = 0; frame < lex->frames_count; ++frame) {
 882             if (!strcmp(v, lex->frames[frame].name)) {
 883                 lex->tok.constval.i = lex->frames[frame].value;
 884                 return (lex->tok.ttype = TOKEN_INTCONST);
 885             }
 886         }
 887
 888         lexerror(lex, "invalid frame macro");
 889         return lex_do(lex);
 890     }
 891
 892     /* single-character tokens */
 893     switch (ch)
 894     {
 895         case '[':
 896         case '(':
 897             if (!lex_tokench(lex, ch) ||
 898                 !lex_endtoken(lex))
 899             {
 900                 return (lex->tok.ttype = TOKEN_FATAL);
 901             }
 902             if (lex->flags.noops)
 903                 return (lex->tok.ttype = ch);
 904             else
 905                 return (lex->tok.ttype = TOKEN_OPERATOR);
 906         case ')':
 907         case ';':
 908         case '{':
 909         case '}':
 910         case ']':
 911
 912         case '#':
 913             if (!lex_tokench(lex, ch) ||
 914                 !lex_endtoken(lex))
 915             {
 916                 return (lex->tok.ttype = TOKEN_FATAL);
 917             }
 918             return (lex->tok.ttype = ch);
 919         default:
 920             break;
 921     }
 922
 923     if (lex->flags.noops)
 924     {
 925         /* Detect characters early which are normally
 926          * operators OR PART of an operator.
 927          */
 928         switch (ch)
 929         {
 930             case '+':
 931             case '-':
 932             case '*':
 933             case '/':
 934             case '<':
 935             case '>':
 936             case '=':
 937             case '&':
 938             case '|':
 939             case '^':
 940             case '~':
 941             case ',':
 942             case '!':
 943                 if (!lex_tokench(lex, ch) ||
 944                     !lex_endtoken(lex))
 945                 {
 946                     return (lex->tok.ttype = TOKEN_FATAL);
 947                 }
 948                 return (lex->tok.ttype = ch);
 949             default:
 950                 break;
 951         }
 952
 953         if (ch == '.')
 954         {
 955             if (!lex_tokench(lex, ch))
 956                 return (lex->tok.ttype = TOKEN_FATAL);
 957             /* peak ahead once */
 958             nextch = lex_getch(lex);
 959             if (nextch != '.') {
 960                 lex_ungetch(lex, nextch);
 961                 if (!lex_endtoken(lex))
 962                     return (lex->tok.ttype = TOKEN_FATAL);
 963                 return (lex->tok.ttype = ch);
 964             }
 965             /* peak ahead again */
 966             nextch = lex_getch(lex);
 967             if (nextch != '.') {
 968                 lex_ungetch(lex, nextch);
 969                 lex_ungetch(lex, nextch);
 970                 if (!lex_endtoken(lex))
 971                     return (lex->tok.ttype = TOKEN_FATAL);
 972                 return (lex->tok.ttype = ch);
 973             }
 974             /* fill the token to be "..." */
 975             if (!lex_tokench(lex, ch) ||
 976                 !lex_tokench(lex, ch) ||
 977                 !lex_endtoken(lex))
 978             {
 979                 return (lex->tok.ttype = TOKEN_FATAL);
 980             }
 981             return (lex->tok.ttype = TOKEN_DOTS);
 982         }
 983     }
 984
 985     if (ch == ',' || ch == '.') {
 986         if (!lex_tokench(lex, ch) ||
 987             !lex_endtoken(lex))
 988         {
 989             return (lex->tok.ttype = TOKEN_FATAL);
 990         }
 991         return (lex->tok.ttype = TOKEN_OPERATOR);
 992     }
 993
 994     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 995         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 996         ch == '=' || ch == '!' || /* ==, != */
 997         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 998     {
 999         if (!lex_tokench(lex, ch))
1000             return (lex->tok.ttype = TOKEN_FATAL);
1001
1002         nextch = lex_getch(lex);
1003         if (nextch == ch || nextch == '=') {
1004             if (!lex_tokench(lex, nextch))
1005                 return (lex->tok.ttype = TOKEN_FATAL);
1006         } else if (ch == '-' && nextch == '>') {
1007             if (!lex_tokench(lex, nextch))
1008                 return (lex->tok.ttype = TOKEN_FATAL);
1009         } else
1010             lex_ungetch(lex, nextch);
1011
1012         if (!lex_endtoken(lex))
1013             return (lex->tok.ttype = TOKEN_FATAL);
1014         return (lex->tok.ttype = TOKEN_OPERATOR);
1015     }
1016
1017     /*
1018     if (ch == '^' || ch == '~' || ch == '!')
1019     {
1020         if (!lex_tokench(lex, ch) ||
1021             !lex_endtoken(lex))
1022         {
1023             return (lex->tok.ttype = TOKEN_FATAL);
1024         }
1025         return (lex->tok.ttype = TOKEN_OPERATOR);
1026     }
1027     */
1028
1029     if (ch == '*' || ch == '/') /* *=, /= */
1030     {
1031         if (!lex_tokench(lex, ch))
1032             return (lex->tok.ttype = TOKEN_FATAL);
1033
1034         nextch = lex_getch(lex);
1035         if (nextch == '=') {
1036             if (!lex_tokench(lex, nextch))
1037                 return (lex->tok.ttype = TOKEN_FATAL);
1038         } else
1039             lex_ungetch(lex, nextch);
1040
1041         if (!lex_endtoken(lex))
1042             return (lex->tok.ttype = TOKEN_FATAL);
1043         return (lex->tok.ttype = TOKEN_OPERATOR);
1044     }
1045
1046     if (isident_start(ch))
1047     {
1048         const char *v;
1049
1050         if (!lex_tokench(lex, ch))
1051             return (lex->tok.ttype = TOKEN_FATAL);
1052         if (!lex_finish_ident(lex)) {
1053             /* error? */
1054             return (lex->tok.ttype = TOKEN_ERROR);
1055         }
1056         if (!lex_endtoken(lex))
1057             return (lex->tok.ttype = TOKEN_FATAL);
1058         lex->tok.ttype = TOKEN_IDENT;
1059
1060         v = lex->tok.value;
1061         if (!strcmp(v, "void")) {
1062             lex->tok.ttype = TOKEN_TYPENAME;
1063             lex->tok.constval.t = TYPE_VOID;
1064         } else if (!strcmp(v, "int")) {
1065             lex->tok.ttype = TOKEN_TYPENAME;
1066             lex->tok.constval.t = TYPE_INTEGER;
1067         } else if (!strcmp(v, "float")) {
1068             lex->tok.ttype = TOKEN_TYPENAME;
1069             lex->tok.constval.t = TYPE_FLOAT;
1070         } else if (!strcmp(v, "string")) {
1071             lex->tok.ttype = TOKEN_TYPENAME;
1072             lex->tok.constval.t = TYPE_STRING;
1073         } else if (!strcmp(v, "entity")) {
1074             lex->tok.ttype = TOKEN_TYPENAME;
1075             lex->tok.constval.t = TYPE_ENTITY;
1076         } else if (!strcmp(v, "vector")) {
1077             lex->tok.ttype = TOKEN_TYPENAME;
1078             lex->tok.constval.t = TYPE_VECTOR;
1079         } else if (!strcmp(v, "for")  ||
1080                  !strcmp(v, "while")  ||
1081                  !strcmp(v, "do")     ||
1082                  !strcmp(v, "if")     ||
1083                  !strcmp(v, "else")   ||
1084                  !strcmp(v, "local")  ||
1085                  !strcmp(v, "return") ||
1086                  !strcmp(v, "not")    ||
1087                  !strcmp(v, "const"))
1088         {
1089             lex->tok.ttype = TOKEN_KEYWORD;
1090         }
1091         else if (opts_standard != COMPILER_QCC)
1092         {
1093             /* other standards reserve these keywords */
1094             if (!strcmp(v, "switch") ||
1095                 !strcmp(v, "struct") ||
1096                 !strcmp(v, "union")  ||
1097                 !strcmp(v, "break")  ||
1098                 !strcmp(v, "continue") ||
1099                 !strcmp(v, "var"))
1100             {
1101                 lex->tok.ttype = TOKEN_KEYWORD;
1102             }
1103         }
1104
1105         return lex->tok.ttype;
1106     }
1107
1108     if (ch == '"')
1109     {
1110         lex->flags.nodigraphs = true;
1111         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1112             return TOKEN_FATAL;
1113         lex->tok.ttype = lex_finish_string(lex, '"');
1114         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1115             return TOKEN_FATAL;
1116         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1117         {
1118             /* Allow c style "string" "continuation" */
1119             ch = lex_skipwhite(lex);
1120             if (ch != '"') {
1121                 lex_ungetch(lex, ch);
1122                 break;
1123             }
1124
1125             lex->tok.ttype = lex_finish_string(lex, '"');
1126         }
1127         lex->flags.nodigraphs = false;
1128         if (!lex_endtoken(lex))
1129             return (lex->tok.ttype = TOKEN_FATAL);
1130         return lex->tok.ttype;
1131     }
1132
1133     if (ch == '\'')
1134     {
1135         /* we parse character constants like string,
1136          * but return TOKEN_CHARCONST, or a vector type if it fits...
1137          * Likewise actual unescaping has to be done by the parser.
1138          * The difference is we don't allow 'char' 'continuation'.
1139          */
1140         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1141             return TOKEN_FATAL;
1142         lex->tok.ttype = lex_finish_string(lex, '\'');
1143         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1144             return TOKEN_FATAL;
1145         if (!lex_endtoken(lex))
1146             return (lex->tok.ttype = TOKEN_FATAL);
1147
1148          /* It's a vector if we can successfully scan 3 floats */
1149 #ifdef WIN32
1150         if (sscanf_s(lex->tok.value, " %f %f %f ",
1151                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1152 #else
1153         if (sscanf(lex->tok.value, " %f %f %f ",
1154                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1155 #endif
1156
1157         {
1158              lex->tok.ttype = TOKEN_VECTORCONST;
1159         }
1160
1161         return lex->tok.ttype;
1162     }
1163
1164     if (isdigit(ch))
1165     {
1166         lex->tok.ttype = lex_finish_digit(lex, ch);
1167         if (!lex_endtoken(lex))
1168             return (lex->tok.ttype = TOKEN_FATAL);
1169         return lex->tok.ttype;
1170     }
1171
1172     lexerror(lex, "unknown token");
1173     return (lex->tok.ttype = TOKEN_ERROR);
1174 }