lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16         va_list ap;
  17
  18         va_start(ap, fmt);
  19     vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  20         va_end(ap);
  21 }
  22
  23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  24 {
  25         va_list ap;
  26         int lvl = LVL_WARNING;
  27
  28     if (!OPTS_WARN(warntype))
  29         return false;
  30
  31     if (opts_werror)
  32             lvl = LVL_ERROR;
  33
  34         va_start(ap, fmt);
  35     vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  36         va_end(ap);
  37
  38         return opts_werror;
  39 }
  40
  41
  42 #if 0
  43 token* token_new()
  44 {
  45     token *tok = (token*)mem_a(sizeof(token));
  46     if (!tok)
  47         return NULL;
  48     memset(tok, 0, sizeof(*tok));
  49     return tok;
  50 }
  51
  52 void token_delete(token *self)
  53 {
  54     if (self->next && self->next->prev == self)
  55         self->next->prev = self->prev;
  56     if (self->prev && self->prev->next == self)
  57         self->prev->next = self->next;
  58     MEM_VECTOR_CLEAR(self, value);
  59     mem_d(self);
  60 }
  61
  62 token* token_copy(const token *cp)
  63 {
  64     token* self = token_new();
  65     if (!self)
  66         return NULL;
  67     /* copy the value */
  68     self->value_alloc = cp->value_count + 1;
  69     self->value_count = cp->value_count;
  70     self->value = (char*)mem_a(self->value_alloc);
  71     if (!self->value) {
  72         mem_d(self);
  73         return NULL;
  74     }
  75     memcpy(self->value, cp->value, cp->value_count);
  76     self->value[self->value_alloc-1] = 0;
  77
  78     /* rest */
  79     self->ctx = cp->ctx;
  80     self->ttype = cp->ttype;
  81     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  82     return self;
  83 }
  84
  85 void token_delete_all(token *t)
  86 {
  87     token *n;
  88
  89     do {
  90         n = t->next;
  91         token_delete(t);
  92         t = n;
  93     } while(t);
  94 }
  95
  96 token* token_copy_all(const token *cp)
  97 {
  98     token *cur;
  99     token *out;
 100
 101     out = cur = token_copy(cp);
 102     if (!out)
 103         return NULL;
 104
 105     while (cp->next) {
 106         cp = cp->next;
 107         cur->next = token_copy(cp);
 108         if (!cur->next) {
 109             token_delete_all(out);
 110             return NULL;
 111         }
 112         cur->next->prev = cur;
 113         cur = cur->next;
 114     }
 115
 116     return out;
 117 }
 118 #else
 119 static void lex_token_new(lex_file *lex)
 120 {
 121 #if 0
 122     if (lex->tok)
 123         token_delete(lex->tok);
 124     lex->tok = token_new();
 125 #else
 126     lex->tok.value_count = 0;
 127     lex->tok.constval.t  = 0;
 128     lex->tok.ctx.line = lex->sline;
 129     lex->tok.ctx.file = lex->name;
 130 #endif
 131 }
 132 #endif
 133
 134 lex_file* lex_open(const char *file)
 135 {
 136     lex_file *lex;
 137     FILE *in = util_fopen(file, "rb");
 138
 139     if (!in) {
 140         lexerror(NULL, "open failed: '%s'\n", file);
 141         return NULL;
 142     }
 143
 144     lex = (lex_file*)mem_a(sizeof(*lex));
 145     if (!lex) {
 146         fclose(in);
 147         lexerror(NULL, "out of memory\n");
 148         return NULL;
 149     }
 150
 151     memset(lex, 0, sizeof(*lex));
 152
 153     lex->file = in;
 154     lex->name = util_strdup(file);
 155     lex->line = 1; /* we start counting at 1 */
 156
 157     lex->peekpos = 0;
 158     lex->eof = false;
 159
 160     lex_filenames_add(lex->name);
 161
 162     return lex;
 163 }
 164
 165 void lex_cleanup(void)
 166 {
 167     size_t i;
 168     for (i = 0; i < lex_filenames_elements; ++i)
 169         mem_d(lex_filenames_data[i]);
 170     mem_d(lex_filenames_data);
 171 }
 172
 173 void lex_close(lex_file *lex)
 174 {
 175     size_t i;
 176     for (i = 0; i < lex->frames_count; ++i)
 177         mem_d(lex->frames[i].name);
 178     MEM_VECTOR_CLEAR(lex, frames);
 179
 180     if (lex->modelname)
 181         mem_d(lex->modelname);
 182
 183     if (lex->file)
 184         fclose(lex->file);
 185 #if 0
 186     if (lex->tok)
 187         token_delete(lex->tok);
 188 #else
 189     MEM_VECTOR_CLEAR(&(lex->tok), value);
 190 #endif
 191     /* mem_d(lex->name); collected in lex_filenames */
 192     mem_d(lex);
 193 }
 194
 195 /* Get or put-back data
 196  * The following to functions do NOT understand what kind of data they
 197  * are working on.
 198  * The are merely wrapping get/put in order to count line numbers.
 199  */
 200 static void lex_ungetch(lex_file *lex, int ch);
 201 static int lex_try_trigraph(lex_file *lex, int old)
 202 {
 203     int c2, c3;
 204     c2 = fgetc(lex->file);
 205     if (c2 != '?') {
 206         lex_ungetch(lex, c2);
 207         return old;
 208     }
 209
 210     c3 = fgetc(lex->file);
 211     switch (c3) {
 212         case '=': return '#';
 213         case '/': return '\\';
 214         case '\'': return '^';
 215         case '(': return '[';
 216         case ')': return ']';
 217         case '!': return '|';
 218         case '<': return '{';
 219         case '>': return '}';
 220         case '-': return '~';
 221         default:
 222             lex_ungetch(lex, c3);
 223             lex_ungetch(lex, c2);
 224             return old;
 225     }
 226 }
 227
 228 static int lex_try_digraph(lex_file *lex, int ch)
 229 {
 230     int c2;
 231     c2 = fgetc(lex->file);
 232     if      (ch == '<' && c2 == ':')
 233         return '[';
 234     else if (ch == ':' && c2 == '>')
 235         return ']';
 236     else if (ch == '<' && c2 == '%')
 237         return '{';
 238     else if (ch == '%' && c2 == '>')
 239         return '}';
 240     else if (ch == '%' && c2 == ':')
 241         return '#';
 242     lex_ungetch(lex, c2);
 243     return ch;
 244 }
 245
 246 static int lex_getch(lex_file *lex)
 247 {
 248     int ch;
 249
 250     if (lex->peekpos) {
 251         lex->peekpos--;
 252         if (lex->peek[lex->peekpos] == '\n')
 253             lex->line++;
 254         return lex->peek[lex->peekpos];
 255     }
 256
 257     ch = fgetc(lex->file);
 258     if (ch == '\n')
 259         lex->line++;
 260     else if (ch == '?')
 261         return lex_try_trigraph(lex, ch);
 262     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 263         return lex_try_digraph(lex, ch);
 264     return ch;
 265 }
 266
 267 static void lex_ungetch(lex_file *lex, int ch)
 268 {
 269     lex->peek[lex->peekpos++] = ch;
 270     if (ch == '\n')
 271         lex->line--;
 272 }
 273
 274 /* classify characters
 275  * some additions to the is*() functions of ctype.h
 276  */
 277
 278 /* Idents are alphanumberic, but they start with alpha or _ */
 279 static bool isident_start(int ch)
 280 {
 281     return isalpha(ch) || ch == '_';
 282 }
 283
 284 static bool isident(int ch)
 285 {
 286     return isident_start(ch) || isdigit(ch);
 287 }
 288
 289 /* isxdigit_only is used when we already know it's not a digit
 290  * and want to see if it's a hex digit anyway.
 291  */
 292 static bool isxdigit_only(int ch)
 293 {
 294     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 295 }
 296
 297 /* Append a character to the token buffer */
 298 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 299 {
 300     if (!token_value_add(&lex->tok, ch)) {
 301         lexerror(lex, "out of memory");
 302         return false;
 303     }
 304     return true;
 305 }
 306
 307 /* Append a trailing null-byte */
 308 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 309 {
 310     if (!token_value_add(&lex->tok, 0)) {
 311         lexerror(lex, "out of memory");
 312         return false;
 313     }
 314     lex->tok.value_count--;
 315     return true;
 316 }
 317
 318 /* Skip whitespace and comments and return the first
 319  * non-white character.
 320  * As this makes use of the above getch() ungetch() functions,
 321  * we don't need to care at all about line numbering anymore.
 322  *
 323  * In theory, this function should only be used at the beginning
 324  * of lexing, or when we *know* the next character is part of the token.
 325  * Otherwise, if the parser throws an error, the linenumber may not be
 326  * the line of the error, but the line of the next token AFTER the error.
 327  *
 328  * This is currently only problematic when using c-like string-continuation,
 329  * since comments and whitespaces are allowed between 2 such strings.
 330  * Example:
 331 printf(   "line one\n"
 332 // A comment
 333           "A continuation of the previous string"
 334 // This line is skipped
 335       , foo);
 336
 337  * In this case, if the parse decides it didn't actually want a string,
 338  * and uses lex->line to print an error, it will show the ', foo);' line's
 339  * linenumber.
 340  *
 341  * On the other hand, the parser is supposed to remember the line of the next
 342  * token's beginning. In this case we would want skipwhite() to be called
 343  * AFTER reading a token, so that the parser, before reading the NEXT token,
 344  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 345  *
 346  * THIS SOLUTION
 347  *    here is to store the line of the first character after skipping
 348  *    the initial whitespace in lex->sline, this happens in lex_do.
 349  */
 350 static int lex_skipwhite(lex_file *lex)
 351 {
 352     int ch = 0;
 353     bool haswhite = false;
 354
 355     do
 356     {
 357         ch = lex_getch(lex);
 358         while (ch != EOF && isspace(ch)) {
 359             if (lex->flags.preprocessing) {
 360                 if (ch == '\n') {
 361                     /* end-of-line */
 362                     /* see if there was whitespace first */
 363                     if (haswhite) { /* (lex->tok.value_count) { */
 364                         lex_ungetch(lex, ch);
 365                         if (!lex_endtoken(lex))
 366                             return TOKEN_FATAL;
 367                         return TOKEN_WHITE;
 368                     }
 369                     /* otherwise return EOL */
 370                     return TOKEN_EOL;
 371                 }
 372                 haswhite = true;
 373                 if (!lex_tokench(lex, ch))
 374                     return TOKEN_FATAL;
 375             }
 376             ch = lex_getch(lex);
 377         }
 378
 379         if (ch == '/') {
 380             ch = lex_getch(lex);
 381             if (ch == '/')
 382             {
 383                 /* one line comment */
 384                 haswhite = true;
 385                 ch = lex_getch(lex);
 386
 387                 if (lex->flags.preprocessing) {
 388                     if (!lex_tokench(lex, ' ') ||
 389                         !lex_tokench(lex, ' '))
 390                     {
 391                         return TOKEN_FATAL;
 392                     }
 393                 }
 394
 395                 while (ch != EOF && ch != '\n') {
 396                     ch = lex_getch(lex);
 397                     if (lex->flags.preprocessing && !lex_tokench(lex, ' '))
 398                         return TOKEN_FATAL;
 399                 }
 400                 if (lex->flags.preprocessing) {
 401                     lex_ungetch(lex, '\n');
 402                     if (!lex_endtoken(lex))
 403                         return TOKEN_FATAL;
 404                     return TOKEN_WHITE;
 405                 }
 406                 continue;
 407             }
 408             if (ch == '*')
 409             {
 410                 /* multiline comment */
 411                 haswhite = true;
 412                 if (lex->flags.preprocessing) {
 413                     if (!lex_tokench(lex, ' ') ||
 414                         !lex_tokench(lex, ' '))
 415                     {
 416                         return TOKEN_FATAL;
 417                     }
 418                 }
 419
 420                 while (ch != EOF)
 421                 {
 422                     ch = lex_getch(lex);
 423                     if (ch == '*') {
 424                         ch = lex_getch(lex);
 425                         if (ch == '/') {
 426                             if (lex->flags.preprocessing) {
 427                                 if (!lex_tokench(lex, ' ') ||
 428                                     !lex_tokench(lex, ' '))
 429                                 {
 430                                     return TOKEN_FATAL;
 431                                 }
 432                             }
 433                             break;
 434                         }
 435                     }
 436                     if (lex->flags.preprocessing) {
 437                         if (ch != '\n')
 438                             ch = ' ';
 439                         if (!lex_tokench(lex, ch))
 440                             return TOKEN_FATAL;
 441                     }
 442                 }
 443                 ch = ' '; /* cause TRUE in the isspace check */
 444                 continue;
 445             }
 446             /* Otherwise roll back to the slash and break out of the loop */
 447             lex_ungetch(lex, ch);
 448             ch = '/';
 449             break;
 450         }
 451     } while (ch != EOF && isspace(ch));
 452
 453     if (haswhite) {
 454         if (!lex_endtoken(lex))
 455             return TOKEN_FATAL;
 456         lex_ungetch(lex, ch);
 457         return TOKEN_WHITE;
 458     }
 459     return ch;
 460 }
 461
 462 /* Get a token */
 463 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 464 {
 465     int ch;
 466
 467     ch = lex_getch(lex);
 468     while (ch != EOF && isident(ch))
 469     {
 470         if (!lex_tokench(lex, ch))
 471             return (lex->tok.ttype = TOKEN_FATAL);
 472         ch = lex_getch(lex);
 473     }
 474
 475     /* last ch was not an ident ch: */
 476     lex_ungetch(lex, ch);
 477
 478     return true;
 479 }
 480
 481 /* read one ident for the frame list */
 482 static int lex_parse_frame(lex_file *lex)
 483 {
 484     int ch;
 485
 486     lex_token_new(lex);
 487
 488     ch = lex_getch(lex);
 489     while (ch != EOF && ch != '\n' && isspace(ch))
 490         ch = lex_getch(lex);
 491
 492     if (ch == '\n')
 493         return 1;
 494
 495     if (!isident_start(ch)) {
 496         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 497         return -1;
 498     }
 499
 500     if (!lex_tokench(lex, ch))
 501         return -1;
 502     if (!lex_finish_ident(lex))
 503         return -1;
 504     if (!lex_endtoken(lex))
 505         return -1;
 506     return 0;
 507 }
 508
 509 /* read a list of $frames */
 510 static bool lex_finish_frames(lex_file *lex)
 511 {
 512     do {
 513         size_t i;
 514         int    rc;
 515         frame_macro m;
 516
 517         rc = lex_parse_frame(lex);
 518         if (rc > 0) /* end of line */
 519             return true;
 520         if (rc < 0) /* error */
 521             return false;
 522
 523         for (i = 0; i < lex->frames_count; ++i) {
 524             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 525                 lex->frames[i].value = lex->framevalue++;
 526                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 527                     return false;
 528                 break;
 529             }
 530         }
 531         if (i < lex->frames_count)
 532             continue;
 533
 534         m.value = lex->framevalue++;
 535         m.name = lex->tok.value;
 536         lex->tok.value = NULL;
 537         lex->tok.value_alloc = lex->tok.value_count = 0;
 538         if (!lex_file_frames_add(lex, m)) {
 539             lexerror(lex, "out of memory");
 540             return false;
 541         }
 542     } while (true);
 543 }
 544
 545 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 546 {
 547     int ch = 0;
 548
 549     while (ch != EOF)
 550     {
 551         ch = lex_getch(lex);
 552         if (ch == quote)
 553             return TOKEN_STRINGCONST;
 554
 555         if (ch == '\\') {
 556             ch = lex_getch(lex);
 557             if (ch == EOF) {
 558                 lexerror(lex, "unexpected end of file");
 559                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 560                 return (lex->tok.ttype = TOKEN_ERROR);
 561             }
 562
 563             switch (ch) {
 564             case '\\': break;
 565             case 'a':  ch = '\a'; break;
 566             case 'b':  ch = '\b'; break;
 567             case 'r':  ch = '\r'; break;
 568             case 'n':  ch = '\n'; break;
 569             case 't':  ch = '\t'; break;
 570             case 'f':  ch = '\f'; break;
 571             case 'v':  ch = '\v'; break;
 572             default:
 573                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 574                 /* so we just add the character plus backslash no matter what it actually is */
 575                 if (!lex_tokench(lex, '\\'))
 576                     return (lex->tok.ttype = TOKEN_FATAL);
 577             }
 578             /* add the character finally */
 579             if (!lex_tokench(lex, ch))
 580                 return (lex->tok.ttype = TOKEN_FATAL);
 581         }
 582         else if (!lex_tokench(lex, ch))
 583             return (lex->tok.ttype = TOKEN_FATAL);
 584     }
 585     lexerror(lex, "unexpected end of file within string constant");
 586     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 587     return (lex->tok.ttype = TOKEN_ERROR);
 588 }
 589
 590 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 591 {
 592     bool ishex = false;
 593
 594     int  ch = lastch;
 595
 596     /* parse a number... */
 597     lex->tok.ttype = TOKEN_INTCONST;
 598
 599     if (!lex_tokench(lex, ch))
 600         return (lex->tok.ttype = TOKEN_FATAL);
 601
 602     ch = lex_getch(lex);
 603     if (ch != '.' && !isdigit(ch))
 604     {
 605         if (lastch != '0' || ch != 'x')
 606         {
 607             /* end of the number or EOF */
 608             lex_ungetch(lex, ch);
 609             if (!lex_endtoken(lex))
 610                 return (lex->tok.ttype = TOKEN_FATAL);
 611
 612             lex->tok.constval.i = lastch - '0';
 613             return lex->tok.ttype;
 614         }
 615
 616         ishex = true;
 617     }
 618
 619     /* EOF would have been caught above */
 620
 621     if (ch != '.')
 622     {
 623         if (!lex_tokench(lex, ch))
 624             return (lex->tok.ttype = TOKEN_FATAL);
 625         ch = lex_getch(lex);
 626         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 627         {
 628             if (!lex_tokench(lex, ch))
 629                 return (lex->tok.ttype = TOKEN_FATAL);
 630             ch = lex_getch(lex);
 631         }
 632     }
 633     /* NOT else, '.' can come from above as well */
 634     if (ch == '.' && !ishex)
 635     {
 636         /* Allow floating comma in non-hex mode */
 637         lex->tok.ttype = TOKEN_FLOATCONST;
 638         if (!lex_tokench(lex, ch))
 639             return (lex->tok.ttype = TOKEN_FATAL);
 640
 641         /* continue digits-only */
 642         ch = lex_getch(lex);
 643         while (isdigit(ch))
 644         {
 645             if (!lex_tokench(lex, ch))
 646                 return (lex->tok.ttype = TOKEN_FATAL);
 647             ch = lex_getch(lex);
 648         }
 649     }
 650     /* put back the last character */
 651     /* but do not put back the trailing 'f' or a float */
 652     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 653         ch = lex_getch(lex);
 654
 655     /* generally we don't want words to follow numbers: */
 656     if (isident(ch)) {
 657         lexerror(lex, "unexpected trailing characters after number");
 658         return (lex->tok.ttype = TOKEN_ERROR);
 659     }
 660     lex_ungetch(lex, ch);
 661
 662     if (!lex_endtoken(lex))
 663         return (lex->tok.ttype = TOKEN_FATAL);
 664     if (lex->tok.ttype == TOKEN_FLOATCONST)
 665         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 666     else
 667         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 668     return lex->tok.ttype;
 669 }
 670
 671 int lex_do(lex_file *lex)
 672 {
 673     int ch, nextch;
 674
 675     lex_token_new(lex);
 676 #if 0
 677     if (!lex->tok)
 678         return TOKEN_FATAL;
 679 #endif
 680
 681     ch = lex_skipwhite(lex);
 682     lex->sline = lex->line;
 683     lex->tok.ctx.line = lex->sline;
 684     lex->tok.ctx.file = lex->name;
 685
 686     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
 687         return (lex->tok.ttype = ch);
 688     }
 689
 690     if (lex->eof)
 691         return (lex->tok.ttype = TOKEN_FATAL);
 692
 693     if (ch == EOF) {
 694         lex->eof = true;
 695         return (lex->tok.ttype = TOKEN_EOF);
 696     }
 697
 698     /* modelgen / spiritgen commands */
 699     if (ch == '$') {
 700         const char *v;
 701         size_t frame;
 702
 703         ch = lex_getch(lex);
 704         if (!isident_start(ch)) {
 705             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 706             return lex_do(lex);
 707         }
 708         if (!lex_tokench(lex, ch))
 709             return (lex->tok.ttype = TOKEN_FATAL);
 710         if (!lex_finish_ident(lex))
 711             return (lex->tok.ttype = TOKEN_ERROR);
 712         if (!lex_endtoken(lex))
 713             return (lex->tok.ttype = TOKEN_FATAL);
 714         /* skip the known commands */
 715         v = lex->tok.value;
 716
 717         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 718         {
 719             /* frame/framesave command works like an enum
 720              * similar to fteqcc we handle this in the lexer.
 721              * The reason for this is that it is sensitive to newlines,
 722              * which the parser is unaware of
 723              */
 724             if (!lex_finish_frames(lex))
 725                  return (lex->tok.ttype = TOKEN_ERROR);
 726             return lex_do(lex);
 727         }
 728
 729         if (!strcmp(v, "framevalue"))
 730         {
 731             ch = lex_getch(lex);
 732             while (ch != EOF && isspace(ch) && ch != '\n')
 733                 ch = lex_getch(lex);
 734
 735             if (!isdigit(ch)) {
 736                 lexerror(lex, "$framevalue requires an integer parameter");
 737                 return lex_do(lex);
 738             }
 739
 740             lex_token_new(lex);
 741             lex->tok.ttype = lex_finish_digit(lex, ch);
 742             if (!lex_endtoken(lex))
 743                 return (lex->tok.ttype = TOKEN_FATAL);
 744             if (lex->tok.ttype != TOKEN_INTCONST) {
 745                 lexerror(lex, "$framevalue requires an integer parameter");
 746                 return lex_do(lex);
 747             }
 748             lex->framevalue = lex->tok.constval.i;
 749             return lex_do(lex);
 750         }
 751
 752         if (!strcmp(v, "framerestore"))
 753         {
 754             int rc;
 755
 756             lex_token_new(lex);
 757
 758             rc = lex_parse_frame(lex);
 759
 760             if (rc > 0) {
 761                 lexerror(lex, "$framerestore requires a framename parameter");
 762                 return lex_do(lex);
 763             }
 764             if (rc < 0)
 765                 return (lex->tok.ttype = TOKEN_FATAL);
 766
 767             v = lex->tok.value;
 768             for (frame = 0; frame < lex->frames_count; ++frame) {
 769                 if (!strcmp(v, lex->frames[frame].name)) {
 770                     lex->framevalue = lex->frames[frame].value;
 771                     return lex_do(lex);
 772                 }
 773             }
 774             lexerror(lex, "unknown framename `%s`", v);
 775             return lex_do(lex);
 776         }
 777
 778         if (!strcmp(v, "modelname"))
 779         {
 780             int rc;
 781
 782             lex_token_new(lex);
 783
 784             rc = lex_parse_frame(lex);
 785
 786             if (rc > 0) {
 787                 lexerror(lex, "$framerestore requires a framename parameter");
 788                 return lex_do(lex);
 789             }
 790             if (rc < 0)
 791                 return (lex->tok.ttype = TOKEN_FATAL);
 792
 793             v = lex->tok.value;
 794             if (lex->modelname) {
 795                 frame_macro m;
 796                 m.value = lex->framevalue;
 797                 m.name = lex->modelname;
 798                 lex->modelname = NULL;
 799                 if (!lex_file_frames_add(lex, m)) {
 800                     lexerror(lex, "out of memory");
 801                     return (lex->tok.ttype = TOKEN_FATAL);
 802                 }
 803             }
 804             lex->modelname = lex->tok.value;
 805             lex->tok.value = NULL;
 806             lex->tok.value_alloc = lex->tok.value_count = 0;
 807             for (frame = 0; frame < lex->frames_count; ++frame) {
 808                 if (!strcmp(v, lex->frames[frame].name)) {
 809                     lex->framevalue = lex->frames[frame].value;
 810                     break;
 811                 }
 812             }
 813             return lex_do(lex);
 814         }
 815
 816         if (!strcmp(v, "flush"))
 817         {
 818             size_t frame;
 819             for (frame = 0; frame < lex->frames_count; ++frame)
 820                 mem_d(lex->frames[frame].name);
 821             MEM_VECTOR_CLEAR(lex, frames);
 822             /* skip line (fteqcc does it too) */
 823             ch = lex_getch(lex);
 824             while (ch != EOF && ch != '\n')
 825                 ch = lex_getch(lex);
 826             return lex_do(lex);
 827         }
 828
 829         if (!strcmp(v, "cd") ||
 830             !strcmp(v, "origin") ||
 831             !strcmp(v, "base") ||
 832             !strcmp(v, "flags") ||
 833             !strcmp(v, "scale") ||
 834             !strcmp(v, "skin"))
 835         {
 836             /* skip line */
 837             ch = lex_getch(lex);
 838             while (ch != EOF && ch != '\n')
 839                 ch = lex_getch(lex);
 840             return lex_do(lex);
 841         }
 842
 843         for (frame = 0; frame < lex->frames_count; ++frame) {
 844             if (!strcmp(v, lex->frames[frame].name)) {
 845                 lex->tok.constval.i = lex->frames[frame].value;
 846                 return (lex->tok.ttype = TOKEN_INTCONST);
 847             }
 848         }
 849
 850         lexerror(lex, "invalid frame macro");
 851         return lex_do(lex);
 852     }
 853
 854     /* single-character tokens */
 855     switch (ch)
 856     {
 857         case '(':
 858             if (!lex_tokench(lex, ch) ||
 859                 !lex_endtoken(lex))
 860             {
 861                 return (lex->tok.ttype = TOKEN_FATAL);
 862             }
 863             if (lex->flags.noops)
 864                 return (lex->tok.ttype = ch);
 865             else
 866                 return (lex->tok.ttype = TOKEN_OPERATOR);
 867         case ')':
 868         case ';':
 869         case '{':
 870         case '}':
 871         case '[':
 872         case ']':
 873
 874         case '#':
 875             if (!lex_tokench(lex, ch) ||
 876                 !lex_endtoken(lex))
 877             {
 878                 return (lex->tok.ttype = TOKEN_FATAL);
 879             }
 880             return (lex->tok.ttype = ch);
 881         default:
 882             break;
 883     }
 884
 885     if (lex->flags.noops)
 886     {
 887         /* Detect characters early which are normally
 888          * operators OR PART of an operator.
 889          */
 890         switch (ch)
 891         {
 892             case '+':
 893             case '-':
 894             case '*':
 895             case '/':
 896             case '<':
 897             case '>':
 898             case '=':
 899             case '&':
 900             case '|':
 901             case '^':
 902             case '~':
 903             case ',':
 904             case '!':
 905                 if (!lex_tokench(lex, ch) ||
 906                     !lex_endtoken(lex))
 907                 {
 908                     return (lex->tok.ttype = TOKEN_FATAL);
 909                 }
 910                 return (lex->tok.ttype = ch);
 911             default:
 912                 break;
 913         }
 914
 915         if (ch == '.')
 916         {
 917             if (!lex_tokench(lex, ch))
 918                 return (lex->tok.ttype = TOKEN_FATAL);
 919             /* peak ahead once */
 920             nextch = lex_getch(lex);
 921             if (nextch != '.') {
 922                 lex_ungetch(lex, nextch);
 923                 if (!lex_endtoken(lex))
 924                     return (lex->tok.ttype = TOKEN_FATAL);
 925                 return (lex->tok.ttype = ch);
 926             }
 927             /* peak ahead again */
 928             nextch = lex_getch(lex);
 929             if (nextch != '.') {
 930                 lex_ungetch(lex, nextch);
 931                 lex_ungetch(lex, nextch);
 932                 if (!lex_endtoken(lex))
 933                     return (lex->tok.ttype = TOKEN_FATAL);
 934                 return (lex->tok.ttype = ch);
 935             }
 936             /* fill the token to be "..." */
 937             if (!lex_tokench(lex, ch) ||
 938                 !lex_tokench(lex, ch) ||
 939                 !lex_endtoken(lex))
 940             {
 941                 return (lex->tok.ttype = TOKEN_FATAL);
 942             }
 943             return (lex->tok.ttype = TOKEN_DOTS);
 944         }
 945     }
 946
 947     if (ch == ',' || ch == '.') {
 948         if (!lex_tokench(lex, ch) ||
 949             !lex_endtoken(lex))
 950         {
 951             return (lex->tok.ttype = TOKEN_FATAL);
 952         }
 953         return (lex->tok.ttype = TOKEN_OPERATOR);
 954     }
 955
 956     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 957         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 958         ch == '=' || ch == '!' || /* ==, != */
 959         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 960     {
 961         if (!lex_tokench(lex, ch))
 962             return (lex->tok.ttype = TOKEN_FATAL);
 963
 964         nextch = lex_getch(lex);
 965         if (nextch == ch || nextch == '=') {
 966             if (!lex_tokench(lex, nextch))
 967                 return (lex->tok.ttype = TOKEN_FATAL);
 968         } else if (ch == '-' && nextch == '>') {
 969             if (!lex_tokench(lex, nextch))
 970                 return (lex->tok.ttype = TOKEN_FATAL);
 971         } else
 972             lex_ungetch(lex, nextch);
 973
 974         if (!lex_endtoken(lex))
 975             return (lex->tok.ttype = TOKEN_FATAL);
 976         return (lex->tok.ttype = TOKEN_OPERATOR);
 977     }
 978
 979     /*
 980     if (ch == '^' || ch == '~' || ch == '!')
 981     {
 982         if (!lex_tokench(lex, ch) ||
 983             !lex_endtoken(lex))
 984         {
 985             return (lex->tok.ttype = TOKEN_FATAL);
 986         }
 987         return (lex->tok.ttype = TOKEN_OPERATOR);
 988     }
 989     */
 990
 991     if (ch == '*' || ch == '/') /* *=, /= */
 992     {
 993         if (!lex_tokench(lex, ch))
 994             return (lex->tok.ttype = TOKEN_FATAL);
 995
 996         nextch = lex_getch(lex);
 997         if (nextch == '=') {
 998             if (!lex_tokench(lex, nextch))
 999                 return (lex->tok.ttype = TOKEN_FATAL);
1000         } else
1001             lex_ungetch(lex, nextch);
1002
1003         if (!lex_endtoken(lex))
1004             return (lex->tok.ttype = TOKEN_FATAL);
1005         return (lex->tok.ttype = TOKEN_OPERATOR);
1006     }
1007
1008     if (isident_start(ch))
1009     {
1010         const char *v;
1011
1012         if (!lex_tokench(lex, ch))
1013             return (lex->tok.ttype = TOKEN_FATAL);
1014         if (!lex_finish_ident(lex)) {
1015             /* error? */
1016             return (lex->tok.ttype = TOKEN_ERROR);
1017         }
1018         if (!lex_endtoken(lex))
1019             return (lex->tok.ttype = TOKEN_FATAL);
1020         lex->tok.ttype = TOKEN_IDENT;
1021
1022         v = lex->tok.value;
1023         if (!strcmp(v, "void")) {
1024             lex->tok.ttype = TOKEN_TYPENAME;
1025             lex->tok.constval.t = TYPE_VOID;
1026         } else if (!strcmp(v, "int")) {
1027             lex->tok.ttype = TOKEN_TYPENAME;
1028             lex->tok.constval.t = TYPE_INTEGER;
1029         } else if (!strcmp(v, "float")) {
1030             lex->tok.ttype = TOKEN_TYPENAME;
1031             lex->tok.constval.t = TYPE_FLOAT;
1032         } else if (!strcmp(v, "string")) {
1033             lex->tok.ttype = TOKEN_TYPENAME;
1034             lex->tok.constval.t = TYPE_STRING;
1035         } else if (!strcmp(v, "entity")) {
1036             lex->tok.ttype = TOKEN_TYPENAME;
1037             lex->tok.constval.t = TYPE_ENTITY;
1038         } else if (!strcmp(v, "vector")) {
1039             lex->tok.ttype = TOKEN_TYPENAME;
1040             lex->tok.constval.t = TYPE_VECTOR;
1041         } else if (!strcmp(v, "for")  ||
1042                  !strcmp(v, "while")  ||
1043                  !strcmp(v, "do")     ||
1044                  !strcmp(v, "if")     ||
1045                  !strcmp(v, "else")   ||
1046                  !strcmp(v, "local")  ||
1047                  !strcmp(v, "return") ||
1048                  !strcmp(v, "const"))
1049         {
1050             lex->tok.ttype = TOKEN_KEYWORD;
1051         }
1052         else if (opts_standard != COMPILER_QCC)
1053         {
1054             /* other standards reserve these keywords */
1055             if (!strcmp(v, "switch") ||
1056                 !strcmp(v, "struct") ||
1057                 !strcmp(v, "union")  ||
1058                 !strcmp(v, "break")  ||
1059                 !strcmp(v, "continue"))
1060             {
1061                 lex->tok.ttype = TOKEN_KEYWORD;
1062             }
1063         }
1064
1065         return lex->tok.ttype;
1066     }
1067
1068     if (ch == '"')
1069     {
1070         lex->flags.nodigraphs = true;
1071         lex->tok.ttype = lex_finish_string(lex, '"');
1072         while (lex->tok.ttype == TOKEN_STRINGCONST)
1073         {
1074             /* Allow c style "string" "continuation" */
1075             ch = lex_skipwhite(lex);
1076             if (ch != '"') {
1077                 lex_ungetch(lex, ch);
1078                 break;
1079             }
1080
1081             lex->tok.ttype = lex_finish_string(lex, '"');
1082         }
1083         lex->flags.nodigraphs = false;
1084         if (!lex_endtoken(lex))
1085             return (lex->tok.ttype = TOKEN_FATAL);
1086         return lex->tok.ttype;
1087     }
1088
1089     if (ch == '\'')
1090     {
1091         /* we parse character constants like string,
1092          * but return TOKEN_CHARCONST, or a vector type if it fits...
1093          * Likewise actual unescaping has to be done by the parser.
1094          * The difference is we don't allow 'char' 'continuation'.
1095          */
1096          lex->tok.ttype = lex_finish_string(lex, '\'');
1097          if (!lex_endtoken(lex))
1098               return (lex->tok.ttype = TOKEN_FATAL);
1099
1100          /* It's a vector if we can successfully scan 3 floats */
1101 #ifdef WIN32
1102          if (sscanf_s(lex->tok.value, " %f %f %f ",
1103                     &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1104 #else
1105          if (sscanf(lex->tok.value, " %f %f %f ",
1106                     &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1107 #endif
1108          {
1109               lex->tok.ttype = TOKEN_VECTORCONST;
1110          }
1111
1112          return lex->tok.ttype;
1113     }
1114
1115     if (isdigit(ch))
1116     {
1117         lex->tok.ttype = lex_finish_digit(lex, ch);
1118         if (!lex_endtoken(lex))
1119             return (lex->tok.ttype = TOKEN_FATAL);
1120         return lex->tok.ttype;
1121     }
1122
1123     lexerror(lex, "unknown token");
1124     return (lex->tok.ttype = TOKEN_ERROR);
1125 }