lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 char* *lex_filenames;
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16         va_list ap;
  17
  18         va_start(ap, fmt);
  19     con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  20         va_end(ap);
  21 }
  22
  23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  24 {
  25         va_list ap;
  26         int lvl = LVL_WARNING;
  27
  28     if (!OPTS_WARN(warntype))
  29         return false;
  30
  31     if (opts_werror)
  32             lvl = LVL_ERROR;
  33
  34         va_start(ap, fmt);
  35     con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  36         va_end(ap);
  37
  38         return opts_werror;
  39 }
  40
  41
  42 #if 0
  43 token* token_new()
  44 {
  45     token *tok = (token*)mem_a(sizeof(token));
  46     if (!tok)
  47         return NULL;
  48     memset(tok, 0, sizeof(*tok));
  49     return tok;
  50 }
  51
  52 void token_delete(token *self)
  53 {
  54     if (self->next && self->next->prev == self)
  55         self->next->prev = self->prev;
  56     if (self->prev && self->prev->next == self)
  57         self->prev->next = self->next;
  58     MEM_VECTOR_CLEAR(self, value);
  59     mem_d(self);
  60 }
  61
  62 token* token_copy(const token *cp)
  63 {
  64     token* self = token_new();
  65     if (!self)
  66         return NULL;
  67     /* copy the value */
  68     self->value_alloc = cp->value_count + 1;
  69     self->value_count = cp->value_count;
  70     self->value = (char*)mem_a(self->value_alloc);
  71     if (!self->value) {
  72         mem_d(self);
  73         return NULL;
  74     }
  75     memcpy(self->value, cp->value, cp->value_count);
  76     self->value[self->value_alloc-1] = 0;
  77
  78     /* rest */
  79     self->ctx = cp->ctx;
  80     self->ttype = cp->ttype;
  81     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  82     return self;
  83 }
  84
  85 void token_delete_all(token *t)
  86 {
  87     token *n;
  88
  89     do {
  90         n = t->next;
  91         token_delete(t);
  92         t = n;
  93     } while(t);
  94 }
  95
  96 token* token_copy_all(const token *cp)
  97 {
  98     token *cur;
  99     token *out;
 100
 101     out = cur = token_copy(cp);
 102     if (!out)
 103         return NULL;
 104
 105     while (cp->next) {
 106         cp = cp->next;
 107         cur->next = token_copy(cp);
 108         if (!cur->next) {
 109             token_delete_all(out);
 110             return NULL;
 111         }
 112         cur->next->prev = cur;
 113         cur = cur->next;
 114     }
 115
 116     return out;
 117 }
 118 #else
 119 static void lex_token_new(lex_file *lex)
 120 {
 121 #if 0
 122     if (lex->tok)
 123         token_delete(lex->tok);
 124     lex->tok = token_new();
 125 #else
 126     lex->tok.value_count = 0;
 127     lex->tok.constval.t  = 0;
 128     lex->tok.ctx.line = lex->sline;
 129     lex->tok.ctx.file = lex->name;
 130 #endif
 131 }
 132 #endif
 133
 134 lex_file* lex_open(const char *file)
 135 {
 136     lex_file *lex;
 137     FILE *in = util_fopen(file, "rb");
 138
 139     if (!in) {
 140         lexerror(NULL, "open failed: '%s'\n", file);
 141         return NULL;
 142     }
 143
 144     lex = (lex_file*)mem_a(sizeof(*lex));
 145     if (!lex) {
 146         fclose(in);
 147         lexerror(NULL, "out of memory\n");
 148         return NULL;
 149     }
 150
 151     memset(lex, 0, sizeof(*lex));
 152
 153     lex->file = in;
 154     lex->name = util_strdup(file);
 155     lex->line = 1; /* we start counting at 1 */
 156
 157     lex->peekpos = 0;
 158     lex->eof = false;
 159
 160     vec_push(lex_filenames, lex->name);
 161     return lex;
 162 }
 163
 164 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 165 {
 166     lex_file *lex;
 167
 168     lex = (lex_file*)mem_a(sizeof(*lex));
 169     if (!lex) {
 170         lexerror(NULL, "out of memory\n");
 171         return NULL;
 172     }
 173
 174     memset(lex, 0, sizeof(*lex));
 175
 176     lex->file = NULL;
 177     lex->open_string        = str;
 178     lex->open_string_length = len;
 179     lex->open_string_pos    = 0;
 180
 181     lex->name = util_strdup(name ? name : "<string-source>");
 182     lex->line = 1; /* we start counting at 1 */
 183
 184     lex->peekpos = 0;
 185     lex->eof = false;
 186
 187     vec_push(lex_filenames, lex->name);
 188
 189     return lex;
 190 }
 191
 192 void lex_cleanup(void)
 193 {
 194     size_t i;
 195     for (i = 0; i < vec_size(lex_filenames); ++i)
 196         mem_d(lex_filenames[i]);
 197     vec_free(lex_filenames);
 198 }
 199
 200 void lex_close(lex_file *lex)
 201 {
 202     size_t i;
 203     for (i = 0; i < lex->frames_count; ++i)
 204         mem_d(lex->frames[i].name);
 205     MEM_VECTOR_CLEAR(lex, frames);
 206
 207     if (lex->modelname)
 208         mem_d(lex->modelname);
 209
 210     if (lex->file)
 211         fclose(lex->file);
 212 #if 0
 213     if (lex->tok)
 214         token_delete(lex->tok);
 215 #else
 216     MEM_VECTOR_CLEAR(&(lex->tok), value);
 217 #endif
 218     /* mem_d(lex->name); collected in lex_filenames */
 219     mem_d(lex);
 220 }
 221
 222 static int lex_fgetc(lex_file *lex)
 223 {
 224     if (lex->file)
 225         return fgetc(lex->file);
 226     if (lex->open_string) {
 227         if (lex->open_string_pos >= lex->open_string_length)
 228             return EOF;
 229         return lex->open_string[lex->open_string_pos++];
 230     }
 231     return EOF;
 232 }
 233
 234 /* Get or put-back data
 235  * The following to functions do NOT understand what kind of data they
 236  * are working on.
 237  * The are merely wrapping get/put in order to count line numbers.
 238  */
 239 static void lex_ungetch(lex_file *lex, int ch);
 240 static int lex_try_trigraph(lex_file *lex, int old)
 241 {
 242     int c2, c3;
 243     c2 = lex_fgetc(lex);
 244     if (c2 != '?') {
 245         lex_ungetch(lex, c2);
 246         return old;
 247     }
 248
 249     c3 = lex_fgetc(lex);
 250     switch (c3) {
 251         case '=': return '#';
 252         case '/': return '\\';
 253         case '\'': return '^';
 254         case '(': return '[';
 255         case ')': return ']';
 256         case '!': return '|';
 257         case '<': return '{';
 258         case '>': return '}';
 259         case '-': return '~';
 260         default:
 261             lex_ungetch(lex, c3);
 262             lex_ungetch(lex, c2);
 263             return old;
 264     }
 265 }
 266
 267 static int lex_try_digraph(lex_file *lex, int ch)
 268 {
 269     int c2;
 270     c2 = lex_fgetc(lex);
 271     if      (ch == '<' && c2 == ':')
 272         return '[';
 273     else if (ch == ':' && c2 == '>')
 274         return ']';
 275     else if (ch == '<' && c2 == '%')
 276         return '{';
 277     else if (ch == '%' && c2 == '>')
 278         return '}';
 279     else if (ch == '%' && c2 == ':')
 280         return '#';
 281     lex_ungetch(lex, c2);
 282     return ch;
 283 }
 284
 285 static int lex_getch(lex_file *lex)
 286 {
 287     int ch;
 288
 289     if (lex->peekpos) {
 290         lex->peekpos--;
 291         if (lex->peek[lex->peekpos] == '\n')
 292             lex->line++;
 293         return lex->peek[lex->peekpos];
 294     }
 295
 296     ch = lex_fgetc(lex);
 297     if (ch == '\n')
 298         lex->line++;
 299     else if (ch == '?')
 300         return lex_try_trigraph(lex, ch);
 301     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 302         return lex_try_digraph(lex, ch);
 303     return ch;
 304 }
 305
 306 static void lex_ungetch(lex_file *lex, int ch)
 307 {
 308     lex->peek[lex->peekpos++] = ch;
 309     if (ch == '\n')
 310         lex->line--;
 311 }
 312
 313 /* classify characters
 314  * some additions to the is*() functions of ctype.h
 315  */
 316
 317 /* Idents are alphanumberic, but they start with alpha or _ */
 318 static bool isident_start(int ch)
 319 {
 320     return isalpha(ch) || ch == '_';
 321 }
 322
 323 static bool isident(int ch)
 324 {
 325     return isident_start(ch) || isdigit(ch);
 326 }
 327
 328 /* isxdigit_only is used when we already know it's not a digit
 329  * and want to see if it's a hex digit anyway.
 330  */
 331 static bool isxdigit_only(int ch)
 332 {
 333     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 334 }
 335
 336 /* Append a character to the token buffer */
 337 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 338 {
 339     if (!token_value_add(&lex->tok, ch)) {
 340         lexerror(lex, "out of memory");
 341         return false;
 342     }
 343     return true;
 344 }
 345
 346 /* Append a trailing null-byte */
 347 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 348 {
 349     if (!token_value_add(&lex->tok, 0)) {
 350         lexerror(lex, "out of memory");
 351         return false;
 352     }
 353     lex->tok.value_count--;
 354     return true;
 355 }
 356
 357 /* Skip whitespace and comments and return the first
 358  * non-white character.
 359  * As this makes use of the above getch() ungetch() functions,
 360  * we don't need to care at all about line numbering anymore.
 361  *
 362  * In theory, this function should only be used at the beginning
 363  * of lexing, or when we *know* the next character is part of the token.
 364  * Otherwise, if the parser throws an error, the linenumber may not be
 365  * the line of the error, but the line of the next token AFTER the error.
 366  *
 367  * This is currently only problematic when using c-like string-continuation,
 368  * since comments and whitespaces are allowed between 2 such strings.
 369  * Example:
 370 printf(   "line one\n"
 371 // A comment
 372           "A continuation of the previous string"
 373 // This line is skipped
 374       , foo);
 375
 376  * In this case, if the parse decides it didn't actually want a string,
 377  * and uses lex->line to print an error, it will show the ', foo);' line's
 378  * linenumber.
 379  *
 380  * On the other hand, the parser is supposed to remember the line of the next
 381  * token's beginning. In this case we would want skipwhite() to be called
 382  * AFTER reading a token, so that the parser, before reading the NEXT token,
 383  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 384  *
 385  * THIS SOLUTION
 386  *    here is to store the line of the first character after skipping
 387  *    the initial whitespace in lex->sline, this happens in lex_do.
 388  */
 389 static int lex_skipwhite(lex_file *lex)
 390 {
 391     int ch = 0;
 392     bool haswhite = false;
 393
 394     do
 395     {
 396         ch = lex_getch(lex);
 397         while (ch != EOF && isspace(ch)) {
 398             if (lex->flags.preprocessing) {
 399                 if (ch == '\n') {
 400                     /* end-of-line */
 401                     /* see if there was whitespace first */
 402                     if (haswhite) { /* (lex->tok.value_count) { */
 403                         lex_ungetch(lex, ch);
 404                         if (!lex_endtoken(lex))
 405                             return TOKEN_FATAL;
 406                         return TOKEN_WHITE;
 407                     }
 408                     /* otherwise return EOL */
 409                     return TOKEN_EOL;
 410                 }
 411                 haswhite = true;
 412                 if (!lex_tokench(lex, ch))
 413                     return TOKEN_FATAL;
 414             }
 415             ch = lex_getch(lex);
 416         }
 417
 418         if (ch == '/') {
 419             ch = lex_getch(lex);
 420             if (ch == '/')
 421             {
 422                 /* one line comment */
 423                 ch = lex_getch(lex);
 424
 425                 if (lex->flags.preprocessing) {
 426                     haswhite = true;
 427                     if (!lex_tokench(lex, '/') ||
 428                         !lex_tokench(lex, '/'))
 429                     {
 430                         return TOKEN_FATAL;
 431                     }
 432                 }
 433
 434                 while (ch != EOF && ch != '\n') {
 435                     if (lex->flags.preprocessing && !lex_tokench(lex, ch))
 436                         return TOKEN_FATAL;
 437                     ch = lex_getch(lex);
 438                 }
 439                 if (lex->flags.preprocessing) {
 440                     lex_ungetch(lex, '\n');
 441                     if (!lex_endtoken(lex))
 442                         return TOKEN_FATAL;
 443                     return TOKEN_WHITE;
 444                 }
 445                 continue;
 446             }
 447             if (ch == '*')
 448             {
 449                 /* multiline comment */
 450                 if (lex->flags.preprocessing) {
 451                     haswhite = true;
 452                     if (!lex_tokench(lex, '/') ||
 453                         !lex_tokench(lex, '*'))
 454                     {
 455                         return TOKEN_FATAL;
 456                     }
 457                 }
 458
 459                 while (ch != EOF)
 460                 {
 461                     ch = lex_getch(lex);
 462                     if (ch == '*') {
 463                         ch = lex_getch(lex);
 464                         if (ch == '/') {
 465                             if (lex->flags.preprocessing) {
 466                                 if (!lex_tokench(lex, '*') ||
 467                                     !lex_tokench(lex, '/'))
 468                                 {
 469                                     return TOKEN_FATAL;
 470                                 }
 471                             }
 472                             break;
 473                         }
 474                     }
 475                     if (lex->flags.preprocessing) {
 476                         if (!lex_tokench(lex, ch))
 477                             return TOKEN_FATAL;
 478                     }
 479                 }
 480                 ch = ' '; /* cause TRUE in the isspace check */
 481                 continue;
 482             }
 483             /* Otherwise roll back to the slash and break out of the loop */
 484             lex_ungetch(lex, ch);
 485             ch = '/';
 486             break;
 487         }
 488     } while (ch != EOF && isspace(ch));
 489
 490     if (haswhite) {
 491         if (!lex_endtoken(lex))
 492             return TOKEN_FATAL;
 493         lex_ungetch(lex, ch);
 494         return TOKEN_WHITE;
 495     }
 496     return ch;
 497 }
 498
 499 /* Get a token */
 500 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 501 {
 502     int ch;
 503
 504     ch = lex_getch(lex);
 505     while (ch != EOF && isident(ch))
 506     {
 507         if (!lex_tokench(lex, ch))
 508             return (lex->tok.ttype = TOKEN_FATAL);
 509         ch = lex_getch(lex);
 510     }
 511
 512     /* last ch was not an ident ch: */
 513     lex_ungetch(lex, ch);
 514
 515     return true;
 516 }
 517
 518 /* read one ident for the frame list */
 519 static int lex_parse_frame(lex_file *lex)
 520 {
 521     int ch;
 522
 523     lex_token_new(lex);
 524
 525     ch = lex_getch(lex);
 526     while (ch != EOF && ch != '\n' && isspace(ch))
 527         ch = lex_getch(lex);
 528
 529     if (ch == '\n')
 530         return 1;
 531
 532     if (!isident_start(ch)) {
 533         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 534         return -1;
 535     }
 536
 537     if (!lex_tokench(lex, ch))
 538         return -1;
 539     if (!lex_finish_ident(lex))
 540         return -1;
 541     if (!lex_endtoken(lex))
 542         return -1;
 543     return 0;
 544 }
 545
 546 /* read a list of $frames */
 547 static bool lex_finish_frames(lex_file *lex)
 548 {
 549     do {
 550         size_t i;
 551         int    rc;
 552         frame_macro m;
 553
 554         rc = lex_parse_frame(lex);
 555         if (rc > 0) /* end of line */
 556             return true;
 557         if (rc < 0) /* error */
 558             return false;
 559
 560         for (i = 0; i < lex->frames_count; ++i) {
 561             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 562                 lex->frames[i].value = lex->framevalue++;
 563                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 564                     return false;
 565                 break;
 566             }
 567         }
 568         if (i < lex->frames_count)
 569             continue;
 570
 571         m.value = lex->framevalue++;
 572         m.name = lex->tok.value;
 573         lex->tok.value = NULL;
 574         lex->tok.value_alloc = lex->tok.value_count = 0;
 575         if (!lex_file_frames_add(lex, m)) {
 576             lexerror(lex, "out of memory");
 577             return false;
 578         }
 579     } while (true);
 580 }
 581
 582 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 583 {
 584     int ch = 0;
 585
 586     while (ch != EOF)
 587     {
 588         ch = lex_getch(lex);
 589         if (ch == quote)
 590             return TOKEN_STRINGCONST;
 591
 592         if (!lex->flags.preprocessing && ch == '\\') {
 593             ch = lex_getch(lex);
 594             if (ch == EOF) {
 595                 lexerror(lex, "unexpected end of file");
 596                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 597                 return (lex->tok.ttype = TOKEN_ERROR);
 598             }
 599
 600             switch (ch) {
 601             case '\\': break;
 602             case 'a':  ch = '\a'; break;
 603             case 'b':  ch = '\b'; break;
 604             case 'r':  ch = '\r'; break;
 605             case 'n':  ch = '\n'; break;
 606             case 't':  ch = '\t'; break;
 607             case 'f':  ch = '\f'; break;
 608             case 'v':  ch = '\v'; break;
 609             default:
 610                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 611                 /* so we just add the character plus backslash no matter what it actually is */
 612                 if (!lex_tokench(lex, '\\'))
 613                     return (lex->tok.ttype = TOKEN_FATAL);
 614             }
 615             /* add the character finally */
 616             if (!lex_tokench(lex, ch))
 617                 return (lex->tok.ttype = TOKEN_FATAL);
 618         }
 619         else if (!lex_tokench(lex, ch))
 620             return (lex->tok.ttype = TOKEN_FATAL);
 621     }
 622     lexerror(lex, "unexpected end of file within string constant");
 623     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 624     return (lex->tok.ttype = TOKEN_ERROR);
 625 }
 626
 627 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 628 {
 629     bool ishex = false;
 630
 631     int  ch = lastch;
 632
 633     /* parse a number... */
 634     lex->tok.ttype = TOKEN_INTCONST;
 635
 636     if (!lex_tokench(lex, ch))
 637         return (lex->tok.ttype = TOKEN_FATAL);
 638
 639     ch = lex_getch(lex);
 640     if (ch != '.' && !isdigit(ch))
 641     {
 642         if (lastch != '0' || ch != 'x')
 643         {
 644             /* end of the number or EOF */
 645             lex_ungetch(lex, ch);
 646             if (!lex_endtoken(lex))
 647                 return (lex->tok.ttype = TOKEN_FATAL);
 648
 649             lex->tok.constval.i = lastch - '0';
 650             return lex->tok.ttype;
 651         }
 652
 653         ishex = true;
 654     }
 655
 656     /* EOF would have been caught above */
 657
 658     if (ch != '.')
 659     {
 660         if (!lex_tokench(lex, ch))
 661             return (lex->tok.ttype = TOKEN_FATAL);
 662         ch = lex_getch(lex);
 663         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 664         {
 665             if (!lex_tokench(lex, ch))
 666                 return (lex->tok.ttype = TOKEN_FATAL);
 667             ch = lex_getch(lex);
 668         }
 669     }
 670     /* NOT else, '.' can come from above as well */
 671     if (ch == '.' && !ishex)
 672     {
 673         /* Allow floating comma in non-hex mode */
 674         lex->tok.ttype = TOKEN_FLOATCONST;
 675         if (!lex_tokench(lex, ch))
 676             return (lex->tok.ttype = TOKEN_FATAL);
 677
 678         /* continue digits-only */
 679         ch = lex_getch(lex);
 680         while (isdigit(ch))
 681         {
 682             if (!lex_tokench(lex, ch))
 683                 return (lex->tok.ttype = TOKEN_FATAL);
 684             ch = lex_getch(lex);
 685         }
 686     }
 687     /* put back the last character */
 688     /* but do not put back the trailing 'f' or a float */
 689     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 690         ch = lex_getch(lex);
 691
 692     /* generally we don't want words to follow numbers: */
 693     if (isident(ch)) {
 694         lexerror(lex, "unexpected trailing characters after number");
 695         return (lex->tok.ttype = TOKEN_ERROR);
 696     }
 697     lex_ungetch(lex, ch);
 698
 699     if (!lex_endtoken(lex))
 700         return (lex->tok.ttype = TOKEN_FATAL);
 701     if (lex->tok.ttype == TOKEN_FLOATCONST)
 702         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 703     else
 704         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 705     return lex->tok.ttype;
 706 }
 707
 708 int lex_do(lex_file *lex)
 709 {
 710     int ch, nextch;
 711
 712     lex_token_new(lex);
 713 #if 0
 714     if (!lex->tok)
 715         return TOKEN_FATAL;
 716 #endif
 717
 718     ch = lex_skipwhite(lex);
 719     lex->sline = lex->line;
 720     lex->tok.ctx.line = lex->sline;
 721     lex->tok.ctx.file = lex->name;
 722
 723     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 724         return (lex->tok.ttype = ch);
 725     }
 726
 727     if (lex->eof)
 728         return (lex->tok.ttype = TOKEN_FATAL);
 729
 730     if (ch == EOF) {
 731         lex->eof = true;
 732         return (lex->tok.ttype = TOKEN_EOF);
 733     }
 734
 735     /* modelgen / spiritgen commands */
 736     if (ch == '$') {
 737         const char *v;
 738         size_t frame;
 739
 740         ch = lex_getch(lex);
 741         if (!isident_start(ch)) {
 742             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 743             return lex_do(lex);
 744         }
 745         if (!lex_tokench(lex, ch))
 746             return (lex->tok.ttype = TOKEN_FATAL);
 747         if (!lex_finish_ident(lex))
 748             return (lex->tok.ttype = TOKEN_ERROR);
 749         if (!lex_endtoken(lex))
 750             return (lex->tok.ttype = TOKEN_FATAL);
 751         /* skip the known commands */
 752         v = lex->tok.value;
 753
 754         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 755         {
 756             /* frame/framesave command works like an enum
 757              * similar to fteqcc we handle this in the lexer.
 758              * The reason for this is that it is sensitive to newlines,
 759              * which the parser is unaware of
 760              */
 761             if (!lex_finish_frames(lex))
 762                  return (lex->tok.ttype = TOKEN_ERROR);
 763             return lex_do(lex);
 764         }
 765
 766         if (!strcmp(v, "framevalue"))
 767         {
 768             ch = lex_getch(lex);
 769             while (ch != EOF && isspace(ch) && ch != '\n')
 770                 ch = lex_getch(lex);
 771
 772             if (!isdigit(ch)) {
 773                 lexerror(lex, "$framevalue requires an integer parameter");
 774                 return lex_do(lex);
 775             }
 776
 777             lex_token_new(lex);
 778             lex->tok.ttype = lex_finish_digit(lex, ch);
 779             if (!lex_endtoken(lex))
 780                 return (lex->tok.ttype = TOKEN_FATAL);
 781             if (lex->tok.ttype != TOKEN_INTCONST) {
 782                 lexerror(lex, "$framevalue requires an integer parameter");
 783                 return lex_do(lex);
 784             }
 785             lex->framevalue = lex->tok.constval.i;
 786             return lex_do(lex);
 787         }
 788
 789         if (!strcmp(v, "framerestore"))
 790         {
 791             int rc;
 792
 793             lex_token_new(lex);
 794
 795             rc = lex_parse_frame(lex);
 796
 797             if (rc > 0) {
 798                 lexerror(lex, "$framerestore requires a framename parameter");
 799                 return lex_do(lex);
 800             }
 801             if (rc < 0)
 802                 return (lex->tok.ttype = TOKEN_FATAL);
 803
 804             v = lex->tok.value;
 805             for (frame = 0; frame < lex->frames_count; ++frame) {
 806                 if (!strcmp(v, lex->frames[frame].name)) {
 807                     lex->framevalue = lex->frames[frame].value;
 808                     return lex_do(lex);
 809                 }
 810             }
 811             lexerror(lex, "unknown framename `%s`", v);
 812             return lex_do(lex);
 813         }
 814
 815         if (!strcmp(v, "modelname"))
 816         {
 817             int rc;
 818
 819             lex_token_new(lex);
 820
 821             rc = lex_parse_frame(lex);
 822
 823             if (rc > 0) {
 824                 lexerror(lex, "$framerestore requires a framename parameter");
 825                 return lex_do(lex);
 826             }
 827             if (rc < 0)
 828                 return (lex->tok.ttype = TOKEN_FATAL);
 829
 830             v = lex->tok.value;
 831             if (lex->modelname) {
 832                 frame_macro m;
 833                 m.value = lex->framevalue;
 834                 m.name = lex->modelname;
 835                 lex->modelname = NULL;
 836                 if (!lex_file_frames_add(lex, m)) {
 837                     lexerror(lex, "out of memory");
 838                     return (lex->tok.ttype = TOKEN_FATAL);
 839                 }
 840             }
 841             lex->modelname = lex->tok.value;
 842             lex->tok.value = NULL;
 843             lex->tok.value_alloc = lex->tok.value_count = 0;
 844             for (frame = 0; frame < lex->frames_count; ++frame) {
 845                 if (!strcmp(v, lex->frames[frame].name)) {
 846                     lex->framevalue = lex->frames[frame].value;
 847                     break;
 848                 }
 849             }
 850             return lex_do(lex);
 851         }
 852
 853         if (!strcmp(v, "flush"))
 854         {
 855             size_t frame;
 856             for (frame = 0; frame < lex->frames_count; ++frame)
 857                 mem_d(lex->frames[frame].name);
 858             MEM_VECTOR_CLEAR(lex, frames);
 859             /* skip line (fteqcc does it too) */
 860             ch = lex_getch(lex);
 861             while (ch != EOF && ch != '\n')
 862                 ch = lex_getch(lex);
 863             return lex_do(lex);
 864         }
 865
 866         if (!strcmp(v, "cd") ||
 867             !strcmp(v, "origin") ||
 868             !strcmp(v, "base") ||
 869             !strcmp(v, "flags") ||
 870             !strcmp(v, "scale") ||
 871             !strcmp(v, "skin"))
 872         {
 873             /* skip line */
 874             ch = lex_getch(lex);
 875             while (ch != EOF && ch != '\n')
 876                 ch = lex_getch(lex);
 877             return lex_do(lex);
 878         }
 879
 880         for (frame = 0; frame < lex->frames_count; ++frame) {
 881             if (!strcmp(v, lex->frames[frame].name)) {
 882                 lex->tok.constval.i = lex->frames[frame].value;
 883                 return (lex->tok.ttype = TOKEN_INTCONST);
 884             }
 885         }
 886
 887         lexerror(lex, "invalid frame macro");
 888         return lex_do(lex);
 889     }
 890
 891     /* single-character tokens */
 892     switch (ch)
 893     {
 894         case '[':
 895         case '(':
 896             if (!lex_tokench(lex, ch) ||
 897                 !lex_endtoken(lex))
 898             {
 899                 return (lex->tok.ttype = TOKEN_FATAL);
 900             }
 901             if (lex->flags.noops)
 902                 return (lex->tok.ttype = ch);
 903             else
 904                 return (lex->tok.ttype = TOKEN_OPERATOR);
 905         case ')':
 906         case ';':
 907         case '{':
 908         case '}':
 909         case ']':
 910
 911         case '#':
 912             if (!lex_tokench(lex, ch) ||
 913                 !lex_endtoken(lex))
 914             {
 915                 return (lex->tok.ttype = TOKEN_FATAL);
 916             }
 917             return (lex->tok.ttype = ch);
 918         default:
 919             break;
 920     }
 921
 922     if (lex->flags.noops)
 923     {
 924         /* Detect characters early which are normally
 925          * operators OR PART of an operator.
 926          */
 927         switch (ch)
 928         {
 929             case '+':
 930             case '-':
 931             case '*':
 932             case '/':
 933             case '<':
 934             case '>':
 935             case '=':
 936             case '&':
 937             case '|':
 938             case '^':
 939             case '~':
 940             case ',':
 941             case '!':
 942                 if (!lex_tokench(lex, ch) ||
 943                     !lex_endtoken(lex))
 944                 {
 945                     return (lex->tok.ttype = TOKEN_FATAL);
 946                 }
 947                 return (lex->tok.ttype = ch);
 948             default:
 949                 break;
 950         }
 951
 952         if (ch == '.')
 953         {
 954             if (!lex_tokench(lex, ch))
 955                 return (lex->tok.ttype = TOKEN_FATAL);
 956             /* peak ahead once */
 957             nextch = lex_getch(lex);
 958             if (nextch != '.') {
 959                 lex_ungetch(lex, nextch);
 960                 if (!lex_endtoken(lex))
 961                     return (lex->tok.ttype = TOKEN_FATAL);
 962                 return (lex->tok.ttype = ch);
 963             }
 964             /* peak ahead again */
 965             nextch = lex_getch(lex);
 966             if (nextch != '.') {
 967                 lex_ungetch(lex, nextch);
 968                 lex_ungetch(lex, nextch);
 969                 if (!lex_endtoken(lex))
 970                     return (lex->tok.ttype = TOKEN_FATAL);
 971                 return (lex->tok.ttype = ch);
 972             }
 973             /* fill the token to be "..." */
 974             if (!lex_tokench(lex, ch) ||
 975                 !lex_tokench(lex, ch) ||
 976                 !lex_endtoken(lex))
 977             {
 978                 return (lex->tok.ttype = TOKEN_FATAL);
 979             }
 980             return (lex->tok.ttype = TOKEN_DOTS);
 981         }
 982     }
 983
 984     if (ch == ',' || ch == '.') {
 985         if (!lex_tokench(lex, ch) ||
 986             !lex_endtoken(lex))
 987         {
 988             return (lex->tok.ttype = TOKEN_FATAL);
 989         }
 990         return (lex->tok.ttype = TOKEN_OPERATOR);
 991     }
 992
 993     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 994         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 995         ch == '=' || ch == '!' || /* ==, != */
 996         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 997     {
 998         if (!lex_tokench(lex, ch))
 999             return (lex->tok.ttype = TOKEN_FATAL);
1000
1001         nextch = lex_getch(lex);
1002         if (nextch == ch || nextch == '=') {
1003             if (!lex_tokench(lex, nextch))
1004                 return (lex->tok.ttype = TOKEN_FATAL);
1005         } else if (ch == '-' && nextch == '>') {
1006             if (!lex_tokench(lex, nextch))
1007                 return (lex->tok.ttype = TOKEN_FATAL);
1008         } else
1009             lex_ungetch(lex, nextch);
1010
1011         if (!lex_endtoken(lex))
1012             return (lex->tok.ttype = TOKEN_FATAL);
1013         return (lex->tok.ttype = TOKEN_OPERATOR);
1014     }
1015
1016     /*
1017     if (ch == '^' || ch == '~' || ch == '!')
1018     {
1019         if (!lex_tokench(lex, ch) ||
1020             !lex_endtoken(lex))
1021         {
1022             return (lex->tok.ttype = TOKEN_FATAL);
1023         }
1024         return (lex->tok.ttype = TOKEN_OPERATOR);
1025     }
1026     */
1027
1028     if (ch == '*' || ch == '/') /* *=, /= */
1029     {
1030         if (!lex_tokench(lex, ch))
1031             return (lex->tok.ttype = TOKEN_FATAL);
1032
1033         nextch = lex_getch(lex);
1034         if (nextch == '=') {
1035             if (!lex_tokench(lex, nextch))
1036                 return (lex->tok.ttype = TOKEN_FATAL);
1037         } else
1038             lex_ungetch(lex, nextch);
1039
1040         if (!lex_endtoken(lex))
1041             return (lex->tok.ttype = TOKEN_FATAL);
1042         return (lex->tok.ttype = TOKEN_OPERATOR);
1043     }
1044
1045     if (isident_start(ch))
1046     {
1047         const char *v;
1048
1049         if (!lex_tokench(lex, ch))
1050             return (lex->tok.ttype = TOKEN_FATAL);
1051         if (!lex_finish_ident(lex)) {
1052             /* error? */
1053             return (lex->tok.ttype = TOKEN_ERROR);
1054         }
1055         if (!lex_endtoken(lex))
1056             return (lex->tok.ttype = TOKEN_FATAL);
1057         lex->tok.ttype = TOKEN_IDENT;
1058
1059         v = lex->tok.value;
1060         if (!strcmp(v, "void")) {
1061             lex->tok.ttype = TOKEN_TYPENAME;
1062             lex->tok.constval.t = TYPE_VOID;
1063         } else if (!strcmp(v, "int")) {
1064             lex->tok.ttype = TOKEN_TYPENAME;
1065             lex->tok.constval.t = TYPE_INTEGER;
1066         } else if (!strcmp(v, "float")) {
1067             lex->tok.ttype = TOKEN_TYPENAME;
1068             lex->tok.constval.t = TYPE_FLOAT;
1069         } else if (!strcmp(v, "string")) {
1070             lex->tok.ttype = TOKEN_TYPENAME;
1071             lex->tok.constval.t = TYPE_STRING;
1072         } else if (!strcmp(v, "entity")) {
1073             lex->tok.ttype = TOKEN_TYPENAME;
1074             lex->tok.constval.t = TYPE_ENTITY;
1075         } else if (!strcmp(v, "vector")) {
1076             lex->tok.ttype = TOKEN_TYPENAME;
1077             lex->tok.constval.t = TYPE_VECTOR;
1078         } else if (!strcmp(v, "for")  ||
1079                  !strcmp(v, "while")  ||
1080                  !strcmp(v, "do")     ||
1081                  !strcmp(v, "if")     ||
1082                  !strcmp(v, "else")   ||
1083                  !strcmp(v, "local")  ||
1084                  !strcmp(v, "return") ||
1085                  !strcmp(v, "not")    ||
1086                  !strcmp(v, "const"))
1087         {
1088             lex->tok.ttype = TOKEN_KEYWORD;
1089         }
1090         else if (opts_standard != COMPILER_QCC)
1091         {
1092             /* other standards reserve these keywords */
1093             if (!strcmp(v, "switch") ||
1094                 !strcmp(v, "struct") ||
1095                 !strcmp(v, "union")  ||
1096                 !strcmp(v, "break")  ||
1097                 !strcmp(v, "continue") ||
1098                 !strcmp(v, "var"))
1099             {
1100                 lex->tok.ttype = TOKEN_KEYWORD;
1101             }
1102         }
1103
1104         return lex->tok.ttype;
1105     }
1106
1107     if (ch == '"')
1108     {
1109         lex->flags.nodigraphs = true;
1110         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1111             return TOKEN_FATAL;
1112         lex->tok.ttype = lex_finish_string(lex, '"');
1113         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1114             return TOKEN_FATAL;
1115         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1116         {
1117             /* Allow c style "string" "continuation" */
1118             ch = lex_skipwhite(lex);
1119             if (ch != '"') {
1120                 lex_ungetch(lex, ch);
1121                 break;
1122             }
1123
1124             lex->tok.ttype = lex_finish_string(lex, '"');
1125         }
1126         lex->flags.nodigraphs = false;
1127         if (!lex_endtoken(lex))
1128             return (lex->tok.ttype = TOKEN_FATAL);
1129         return lex->tok.ttype;
1130     }
1131
1132     if (ch == '\'')
1133     {
1134         /* we parse character constants like string,
1135          * but return TOKEN_CHARCONST, or a vector type if it fits...
1136          * Likewise actual unescaping has to be done by the parser.
1137          * The difference is we don't allow 'char' 'continuation'.
1138          */
1139         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1140             return TOKEN_FATAL;
1141         lex->tok.ttype = lex_finish_string(lex, '\'');
1142         if (lex->flags.preprocessing && !lex_tokench(lex, ch))
1143             return TOKEN_FATAL;
1144         if (!lex_endtoken(lex))
1145             return (lex->tok.ttype = TOKEN_FATAL);
1146
1147          /* It's a vector if we can successfully scan 3 floats */
1148 #ifdef WIN32
1149         if (sscanf_s(lex->tok.value, " %f %f %f ",
1150                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1151 #else
1152         if (sscanf(lex->tok.value, " %f %f %f ",
1153                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1154 #endif
1155
1156         {
1157              lex->tok.ttype = TOKEN_VECTORCONST;
1158         }
1159
1160         return lex->tok.ttype;
1161     }
1162
1163     if (isdigit(ch))
1164     {
1165         lex->tok.ttype = lex_finish_digit(lex, ch);
1166         if (!lex_endtoken(lex))
1167             return (lex->tok.ttype = TOKEN_FATAL);
1168         return lex->tok.ttype;
1169     }
1170
1171     lexerror(lex, "unknown token");
1172     return (lex->tok.ttype = TOKEN_ERROR);
1173 }