lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16         va_list ap;
  17
  18         va_start(ap, fmt);
  19     vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  20         va_end(ap);
  21 }
  22
  23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  24 {
  25         va_list ap;
  26         int lvl = LVL_WARNING;
  27
  28     if (!OPTS_WARN(warntype))
  29         return false;
  30
  31     if (opts_werror)
  32             lvl = LVL_ERROR;
  33
  34         va_start(ap, fmt);
  35     vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
  36         va_end(ap);
  37
  38         return opts_werror;
  39 }
  40
  41
  42 #if 0
  43 token* token_new()
  44 {
  45     token *tok = (token*)mem_a(sizeof(token));
  46     if (!tok)
  47         return NULL;
  48     memset(tok, 0, sizeof(*tok));
  49     return tok;
  50 }
  51
  52 void token_delete(token *self)
  53 {
  54     if (self->next && self->next->prev == self)
  55         self->next->prev = self->prev;
  56     if (self->prev && self->prev->next == self)
  57         self->prev->next = self->next;
  58     MEM_VECTOR_CLEAR(self, value);
  59     mem_d(self);
  60 }
  61
  62 token* token_copy(const token *cp)
  63 {
  64     token* self = token_new();
  65     if (!self)
  66         return NULL;
  67     /* copy the value */
  68     self->value_alloc = cp->value_count + 1;
  69     self->value_count = cp->value_count;
  70     self->value = (char*)mem_a(self->value_alloc);
  71     if (!self->value) {
  72         mem_d(self);
  73         return NULL;
  74     }
  75     memcpy(self->value, cp->value, cp->value_count);
  76     self->value[self->value_alloc-1] = 0;
  77
  78     /* rest */
  79     self->ctx = cp->ctx;
  80     self->ttype = cp->ttype;
  81     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  82     return self;
  83 }
  84
  85 void token_delete_all(token *t)
  86 {
  87     token *n;
  88
  89     do {
  90         n = t->next;
  91         token_delete(t);
  92         t = n;
  93     } while(t);
  94 }
  95
  96 token* token_copy_all(const token *cp)
  97 {
  98     token *cur;
  99     token *out;
 100
 101     out = cur = token_copy(cp);
 102     if (!out)
 103         return NULL;
 104
 105     while (cp->next) {
 106         cp = cp->next;
 107         cur->next = token_copy(cp);
 108         if (!cur->next) {
 109             token_delete_all(out);
 110             return NULL;
 111         }
 112         cur->next->prev = cur;
 113         cur = cur->next;
 114     }
 115
 116     return out;
 117 }
 118 #else
 119 static void lex_token_new(lex_file *lex)
 120 {
 121 #if 0
 122     if (lex->tok)
 123         token_delete(lex->tok);
 124     lex->tok = token_new();
 125 #else
 126     lex->tok.value_count = 0;
 127     lex->tok.constval.t  = 0;
 128     lex->tok.ctx.line = lex->sline;
 129     lex->tok.ctx.file = lex->name;
 130 #endif
 131 }
 132 #endif
 133
 134 lex_file* lex_open(const char *file)
 135 {
 136     lex_file *lex;
 137     FILE *in = util_fopen(file, "rb");
 138
 139     if (!in) {
 140         lexerror(NULL, "open failed: '%s'\n", file);
 141         return NULL;
 142     }
 143
 144     lex = (lex_file*)mem_a(sizeof(*lex));
 145     if (!lex) {
 146         fclose(in);
 147         lexerror(NULL, "out of memory\n");
 148         return NULL;
 149     }
 150
 151     memset(lex, 0, sizeof(*lex));
 152
 153     lex->file = in;
 154     lex->name = util_strdup(file);
 155     lex->line = 1; /* we start counting at 1 */
 156
 157     lex->peekpos = 0;
 158     lex->eof = false;
 159
 160     lex_filenames_add(lex->name);
 161
 162     return lex;
 163 }
 164
 165 void lex_cleanup(void)
 166 {
 167     size_t i;
 168     for (i = 0; i < lex_filenames_elements; ++i)
 169         mem_d(lex_filenames_data[i]);
 170     mem_d(lex_filenames_data);
 171 }
 172
 173 void lex_close(lex_file *lex)
 174 {
 175     size_t i;
 176     for (i = 0; i < lex->frames_count; ++i)
 177         mem_d(lex->frames[i].name);
 178     MEM_VECTOR_CLEAR(lex, frames);
 179
 180     if (lex->modelname)
 181         mem_d(lex->modelname);
 182
 183     if (lex->file)
 184         fclose(lex->file);
 185 #if 0
 186     if (lex->tok)
 187         token_delete(lex->tok);
 188 #else
 189     MEM_VECTOR_CLEAR(&(lex->tok), value);
 190 #endif
 191     /* mem_d(lex->name); collected in lex_filenames */
 192     mem_d(lex);
 193 }
 194
 195 /* Get or put-back data
 196  * The following to functions do NOT understand what kind of data they
 197  * are working on.
 198  * The are merely wrapping get/put in order to count line numbers.
 199  */
 200 static int lex_getch(lex_file *lex)
 201 {
 202     int ch;
 203
 204     if (lex->peekpos) {
 205         lex->peekpos--;
 206         if (lex->peek[lex->peekpos] == '\n')
 207             lex->line++;
 208         return lex->peek[lex->peekpos];
 209     }
 210
 211     ch = fgetc(lex->file);
 212     if (ch == '\n')
 213         lex->line++;
 214     return ch;
 215 }
 216
 217 static void lex_ungetch(lex_file *lex, int ch)
 218 {
 219     lex->peek[lex->peekpos++] = ch;
 220     if (ch == '\n')
 221         lex->line--;
 222 }
 223
 224 /* classify characters
 225  * some additions to the is*() functions of ctype.h
 226  */
 227
 228 /* Idents are alphanumberic, but they start with alpha or _ */
 229 static bool isident_start(int ch)
 230 {
 231     return isalpha(ch) || ch == '_';
 232 }
 233
 234 static bool isident(int ch)
 235 {
 236     return isident_start(ch) || isdigit(ch);
 237 }
 238
 239 /* isxdigit_only is used when we already know it's not a digit
 240  * and want to see if it's a hex digit anyway.
 241  */
 242 static bool isxdigit_only(int ch)
 243 {
 244     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 245 }
 246
 247 /* Skip whitespace and comments and return the first
 248  * non-white character.
 249  * As this makes use of the above getch() ungetch() functions,
 250  * we don't need to care at all about line numbering anymore.
 251  *
 252  * In theory, this function should only be used at the beginning
 253  * of lexing, or when we *know* the next character is part of the token.
 254  * Otherwise, if the parser throws an error, the linenumber may not be
 255  * the line of the error, but the line of the next token AFTER the error.
 256  *
 257  * This is currently only problematic when using c-like string-continuation,
 258  * since comments and whitespaces are allowed between 2 such strings.
 259  * Example:
 260 printf(   "line one\n"
 261 // A comment
 262           "A continuation of the previous string"
 263 // This line is skipped
 264       , foo);
 265
 266  * In this case, if the parse decides it didn't actually want a string,
 267  * and uses lex->line to print an error, it will show the ', foo);' line's
 268  * linenumber.
 269  *
 270  * On the other hand, the parser is supposed to remember the line of the next
 271  * token's beginning. In this case we would want skipwhite() to be called
 272  * AFTER reading a token, so that the parser, before reading the NEXT token,
 273  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 274  *
 275  * THIS SOLUTION
 276  *    here is to store the line of the first character after skipping
 277  *    the initial whitespace in lex->sline, this happens in lex_do.
 278  */
 279 static int lex_skipwhite(lex_file *lex)
 280 {
 281     int ch = 0;
 282
 283     do
 284     {
 285         ch = lex_getch(lex);
 286         while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
 287
 288         if (ch == '/') {
 289             ch = lex_getch(lex);
 290             if (ch == '/')
 291             {
 292                 /* one line comment */
 293                 ch = lex_getch(lex);
 294
 295                 /* check for special: '/', '/', '*', '/' */
 296                 if (ch == '*') {
 297                     ch = lex_getch(lex);
 298                     if (ch == '/') {
 299                         ch = ' ';
 300                         continue;
 301                     }
 302                 }
 303
 304                 while (ch != EOF && ch != '\n') {
 305                     ch = lex_getch(lex);
 306                 }
 307                 continue;
 308             }
 309             if (ch == '*')
 310             {
 311                 /* multiline comment */
 312                 while (ch != EOF)
 313                 {
 314                     ch = lex_getch(lex);
 315                     if (ch == '*') {
 316                         ch = lex_getch(lex);
 317                         if (ch == '/') {
 318                             ch = lex_getch(lex);
 319                             break;
 320                         }
 321                     }
 322                 }
 323                 if (ch == '/') /* allow *//* direct following comment */
 324                 {
 325                     lex_ungetch(lex, ch);
 326                     ch = ' '; /* cause TRUE in the isspace check */
 327                 }
 328                 continue;
 329             }
 330             /* Otherwise roll back to the slash and break out of the loop */
 331             lex_ungetch(lex, ch);
 332             ch = '/';
 333             break;
 334         }
 335     } while (ch != EOF && isspace(ch));
 336
 337     return ch;
 338 }
 339
 340 /* Append a character to the token buffer */
 341 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 342 {
 343     if (!token_value_add(&lex->tok, ch)) {
 344         lexerror(lex, "out of memory");
 345         return false;
 346     }
 347     return true;
 348 }
 349
 350 /* Append a trailing null-byte */
 351 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 352 {
 353     if (!token_value_add(&lex->tok, 0)) {
 354         lexerror(lex, "out of memory");
 355         return false;
 356     }
 357     lex->tok.value_count--;
 358     return true;
 359 }
 360
 361 /* Get a token */
 362 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 363 {
 364     int ch;
 365
 366     ch = lex_getch(lex);
 367     while (ch != EOF && isident(ch))
 368     {
 369         if (!lex_tokench(lex, ch))
 370             return (lex->tok.ttype = TOKEN_FATAL);
 371         ch = lex_getch(lex);
 372     }
 373
 374     /* last ch was not an ident ch: */
 375     lex_ungetch(lex, ch);
 376
 377     return true;
 378 }
 379
 380 /* read one ident for the frame list */
 381 static int lex_parse_frame(lex_file *lex)
 382 {
 383     int ch;
 384
 385     lex_token_new(lex);
 386
 387     ch = lex_getch(lex);
 388     while (ch != EOF && ch != '\n' && isspace(ch))
 389         ch = lex_getch(lex);
 390
 391     if (ch == '\n')
 392         return 1;
 393
 394     if (!isident_start(ch)) {
 395         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 396         return -1;
 397     }
 398
 399     if (!lex_tokench(lex, ch))
 400         return -1;
 401     if (!lex_finish_ident(lex))
 402         return -1;
 403     if (!lex_endtoken(lex))
 404         return -1;
 405     return 0;
 406 }
 407
 408 /* read a list of $frames */
 409 static bool lex_finish_frames(lex_file *lex)
 410 {
 411     do {
 412         size_t i;
 413         int    rc;
 414         frame_macro m;
 415
 416         rc = lex_parse_frame(lex);
 417         if (rc > 0) /* end of line */
 418             return true;
 419         if (rc < 0) /* error */
 420             return false;
 421
 422         for (i = 0; i < lex->frames_count; ++i) {
 423             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 424                 lex->frames[i].value = lex->framevalue++;
 425                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 426                     return false;
 427                 break;
 428             }
 429         }
 430         if (i < lex->frames_count)
 431             continue;
 432
 433         m.value = lex->framevalue++;
 434         m.name = lex->tok.value;
 435         lex->tok.value = NULL;
 436         lex->tok.value_alloc = lex->tok.value_count = 0;
 437         if (!lex_file_frames_add(lex, m)) {
 438             lexerror(lex, "out of memory");
 439             return false;
 440         }
 441     } while (true);
 442 }
 443
 444 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 445 {
 446     int ch = 0;
 447
 448     while (ch != EOF)
 449     {
 450         ch = lex_getch(lex);
 451         if (ch == quote)
 452             return TOKEN_STRINGCONST;
 453
 454         if (ch == '\\') {
 455             ch = lex_getch(lex);
 456             if (ch == EOF) {
 457                 lexerror(lex, "unexpected end of file");
 458                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 459                 return (lex->tok.ttype = TOKEN_ERROR);
 460             }
 461
 462             switch (ch) {
 463             case '\\': break;
 464             case 'a':  ch = '\a'; break;
 465             case 'b':  ch = '\b'; break;
 466             case 'r':  ch = '\r'; break;
 467             case 'n':  ch = '\n'; break;
 468             case 't':  ch = '\t'; break;
 469             case 'f':  ch = '\f'; break;
 470             case 'v':  ch = '\v'; break;
 471             default:
 472                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 473                 /* so we just add the character plus backslash no matter what it actually is */
 474                 if (!lex_tokench(lex, '\\'))
 475                     return (lex->tok.ttype = TOKEN_FATAL);
 476             }
 477             /* add the character finally */
 478             if (!lex_tokench(lex, ch))
 479                 return (lex->tok.ttype = TOKEN_FATAL);
 480         }
 481         else if (!lex_tokench(lex, ch))
 482             return (lex->tok.ttype = TOKEN_FATAL);
 483     }
 484     lexerror(lex, "unexpected end of file within string constant");
 485     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 486     return (lex->tok.ttype = TOKEN_ERROR);
 487 }
 488
 489 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 490 {
 491     bool ishex = false;
 492
 493     int  ch = lastch;
 494
 495     /* parse a number... */
 496     lex->tok.ttype = TOKEN_INTCONST;
 497
 498     if (!lex_tokench(lex, ch))
 499         return (lex->tok.ttype = TOKEN_FATAL);
 500
 501     ch = lex_getch(lex);
 502     if (ch != '.' && !isdigit(ch))
 503     {
 504         if (lastch != '0' || ch != 'x')
 505         {
 506             /* end of the number or EOF */
 507             lex_ungetch(lex, ch);
 508             if (!lex_endtoken(lex))
 509                 return (lex->tok.ttype = TOKEN_FATAL);
 510
 511             lex->tok.constval.i = lastch - '0';
 512             return lex->tok.ttype;
 513         }
 514
 515         ishex = true;
 516     }
 517
 518     /* EOF would have been caught above */
 519
 520     if (ch != '.')
 521     {
 522         if (!lex_tokench(lex, ch))
 523             return (lex->tok.ttype = TOKEN_FATAL);
 524         ch = lex_getch(lex);
 525         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 526         {
 527             if (!lex_tokench(lex, ch))
 528                 return (lex->tok.ttype = TOKEN_FATAL);
 529             ch = lex_getch(lex);
 530         }
 531     }
 532     /* NOT else, '.' can come from above as well */
 533     if (ch == '.' && !ishex)
 534     {
 535         /* Allow floating comma in non-hex mode */
 536         lex->tok.ttype = TOKEN_FLOATCONST;
 537         if (!lex_tokench(lex, ch))
 538             return (lex->tok.ttype = TOKEN_FATAL);
 539
 540         /* continue digits-only */
 541         ch = lex_getch(lex);
 542         while (isdigit(ch))
 543         {
 544             if (!lex_tokench(lex, ch))
 545                 return (lex->tok.ttype = TOKEN_FATAL);
 546             ch = lex_getch(lex);
 547         }
 548     }
 549     /* put back the last character */
 550     /* but do not put back the trailing 'f' or a float */
 551     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 552         ch = lex_getch(lex);
 553
 554     /* generally we don't want words to follow numbers: */
 555     if (isident(ch)) {
 556         lexerror(lex, "unexpected trailing characters after number");
 557         return (lex->tok.ttype = TOKEN_ERROR);
 558     }
 559     lex_ungetch(lex, ch);
 560
 561     if (!lex_endtoken(lex))
 562         return (lex->tok.ttype = TOKEN_FATAL);
 563     if (lex->tok.ttype == TOKEN_FLOATCONST)
 564         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 565     else
 566         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 567     return lex->tok.ttype;
 568 }
 569
 570 int lex_do(lex_file *lex)
 571 {
 572     int ch, nextch;
 573
 574     lex_token_new(lex);
 575 #if 0
 576     if (!lex->tok)
 577         return TOKEN_FATAL;
 578 #endif
 579
 580     ch = lex_skipwhite(lex);
 581     lex->sline = lex->line;
 582     lex->tok.ctx.line = lex->sline;
 583     lex->tok.ctx.file = lex->name;
 584
 585     if (lex->eof)
 586         return (lex->tok.ttype = TOKEN_FATAL);
 587
 588     if (ch == EOF) {
 589         lex->eof = true;
 590         return (lex->tok.ttype = TOKEN_EOF);
 591     }
 592
 593     /* modelgen / spiritgen commands */
 594     if (ch == '$') {
 595         const char *v;
 596         size_t frame;
 597
 598         ch = lex_getch(lex);
 599         if (!isident_start(ch)) {
 600             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 601             return lex_do(lex);
 602         }
 603         if (!lex_tokench(lex, ch))
 604             return (lex->tok.ttype = TOKEN_FATAL);
 605         if (!lex_finish_ident(lex))
 606             return (lex->tok.ttype = TOKEN_ERROR);
 607         if (!lex_endtoken(lex))
 608             return (lex->tok.ttype = TOKEN_FATAL);
 609         /* skip the known commands */
 610         v = lex->tok.value;
 611
 612         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 613         {
 614             /* frame/framesave command works like an enum
 615              * similar to fteqcc we handle this in the lexer.
 616              * The reason for this is that it is sensitive to newlines,
 617              * which the parser is unaware of
 618              */
 619             if (!lex_finish_frames(lex))
 620                  return (lex->tok.ttype = TOKEN_ERROR);
 621             return lex_do(lex);
 622         }
 623
 624         if (!strcmp(v, "framevalue"))
 625         {
 626             ch = lex_getch(lex);
 627             while (ch != EOF && isspace(ch) && ch != '\n')
 628                 ch = lex_getch(lex);
 629
 630             if (!isdigit(ch)) {
 631                 lexerror(lex, "$framevalue requires an integer parameter");
 632                 return lex_do(lex);
 633             }
 634
 635             lex_token_new(lex);
 636             lex->tok.ttype = lex_finish_digit(lex, ch);
 637             if (!lex_endtoken(lex))
 638                 return (lex->tok.ttype = TOKEN_FATAL);
 639             if (lex->tok.ttype != TOKEN_INTCONST) {
 640                 lexerror(lex, "$framevalue requires an integer parameter");
 641                 return lex_do(lex);
 642             }
 643             lex->framevalue = lex->tok.constval.i;
 644             return lex_do(lex);
 645         }
 646
 647         if (!strcmp(v, "framerestore"))
 648         {
 649             int rc;
 650
 651             lex_token_new(lex);
 652
 653             rc = lex_parse_frame(lex);
 654
 655             if (rc > 0) {
 656                 lexerror(lex, "$framerestore requires a framename parameter");
 657                 return lex_do(lex);
 658             }
 659             if (rc < 0)
 660                 return (lex->tok.ttype = TOKEN_FATAL);
 661
 662             v = lex->tok.value;
 663             for (frame = 0; frame < lex->frames_count; ++frame) {
 664                 if (!strcmp(v, lex->frames[frame].name)) {
 665                     lex->framevalue = lex->frames[frame].value;
 666                     return lex_do(lex);
 667                 }
 668             }
 669             lexerror(lex, "unknown framename `%s`", v);
 670             return lex_do(lex);
 671         }
 672
 673         if (!strcmp(v, "modelname"))
 674         {
 675             int rc;
 676
 677             lex_token_new(lex);
 678
 679             rc = lex_parse_frame(lex);
 680
 681             if (rc > 0) {
 682                 lexerror(lex, "$framerestore requires a framename parameter");
 683                 return lex_do(lex);
 684             }
 685             if (rc < 0)
 686                 return (lex->tok.ttype = TOKEN_FATAL);
 687
 688             v = lex->tok.value;
 689             if (lex->modelname) {
 690                 frame_macro m;
 691                 m.value = lex->framevalue;
 692                 m.name = lex->modelname;
 693                 lex->modelname = NULL;
 694                 if (!lex_file_frames_add(lex, m)) {
 695                     lexerror(lex, "out of memory");
 696                     return (lex->tok.ttype = TOKEN_FATAL);
 697                 }
 698             }
 699             lex->modelname = lex->tok.value;
 700             lex->tok.value = NULL;
 701             for (frame = 0; frame < lex->frames_count; ++frame) {
 702                 if (!strcmp(v, lex->frames[frame].name)) {
 703                     lex->framevalue = lex->frames[frame].value;
 704                     break;
 705                 }
 706             }
 707             return lex_do(lex);
 708         }
 709
 710         if (!strcmp(v, "flush"))
 711         {
 712             size_t frame;
 713             for (frame = 0; frame < lex->frames_count; ++frame)
 714                 mem_d(lex->frames[frame].name);
 715             MEM_VECTOR_CLEAR(lex, frames);
 716             /* skip line (fteqcc does it too) */
 717             ch = lex_getch(lex);
 718             while (ch != EOF && ch != '\n')
 719                 ch = lex_getch(lex);
 720             return lex_do(lex);
 721         }
 722
 723         if (!strcmp(v, "cd") ||
 724             !strcmp(v, "origin") ||
 725             !strcmp(v, "base") ||
 726             !strcmp(v, "flags") ||
 727             !strcmp(v, "scale") ||
 728             !strcmp(v, "skin"))
 729         {
 730             /* skip line */
 731             ch = lex_getch(lex);
 732             while (ch != EOF && ch != '\n')
 733                 ch = lex_getch(lex);
 734             return lex_do(lex);
 735         }
 736
 737         for (frame = 0; frame < lex->frames_count; ++frame) {
 738             if (!strcmp(v, lex->frames[frame].name)) {
 739                 lex->tok.constval.i = lex->frames[frame].value;
 740                 return (lex->tok.ttype = TOKEN_INTCONST);
 741             }
 742         }
 743
 744         lexerror(lex, "invalid frame macro");
 745         return lex_do(lex);
 746     }
 747
 748     /* single-character tokens */
 749     switch (ch)
 750     {
 751         case '(':
 752             if (!lex_tokench(lex, ch) ||
 753                 !lex_endtoken(lex))
 754             {
 755                 return (lex->tok.ttype = TOKEN_FATAL);
 756             }
 757             if (lex->flags.noops)
 758                 return (lex->tok.ttype = ch);
 759             else
 760                 return (lex->tok.ttype = TOKEN_OPERATOR);
 761         case ')':
 762         case ';':
 763         case '{':
 764         case '}':
 765         case '[':
 766         case ']':
 767
 768         case '#':
 769             if (!lex_tokench(lex, ch) ||
 770                 !lex_endtoken(lex))
 771             {
 772                 return (lex->tok.ttype = TOKEN_FATAL);
 773             }
 774             return (lex->tok.ttype = ch);
 775         default:
 776             break;
 777     }
 778
 779     if (lex->flags.noops)
 780     {
 781         /* Detect characters early which are normally
 782          * operators OR PART of an operator.
 783          */
 784         switch (ch)
 785         {
 786             case '+':
 787             case '-':
 788             case '*':
 789             case '/':
 790             case '<':
 791             case '>':
 792             case '=':
 793             case '&':
 794             case '|':
 795             case '^':
 796             case '~':
 797             case ',':
 798             case '!':
 799                 if (!lex_tokench(lex, ch) ||
 800                     !lex_endtoken(lex))
 801                 {
 802                     return (lex->tok.ttype = TOKEN_FATAL);
 803                 }
 804                 return (lex->tok.ttype = ch);
 805             default:
 806                 break;
 807         }
 808
 809         if (ch == '.')
 810         {
 811             if (!lex_tokench(lex, ch))
 812                 return (lex->tok.ttype = TOKEN_FATAL);
 813             /* peak ahead once */
 814             nextch = lex_getch(lex);
 815             if (nextch != '.') {
 816                 lex_ungetch(lex, nextch);
 817                 if (!lex_endtoken(lex))
 818                     return (lex->tok.ttype = TOKEN_FATAL);
 819                 return (lex->tok.ttype = ch);
 820             }
 821             /* peak ahead again */
 822             nextch = lex_getch(lex);
 823             if (nextch != '.') {
 824                 lex_ungetch(lex, nextch);
 825                 lex_ungetch(lex, nextch);
 826                 if (!lex_endtoken(lex))
 827                     return (lex->tok.ttype = TOKEN_FATAL);
 828                 return (lex->tok.ttype = ch);
 829             }
 830             /* fill the token to be "..." */
 831             if (!lex_tokench(lex, ch) ||
 832                 !lex_tokench(lex, ch) ||
 833                 !lex_endtoken(lex))
 834             {
 835                 return (lex->tok.ttype = TOKEN_FATAL);
 836             }
 837             return (lex->tok.ttype = TOKEN_DOTS);
 838         }
 839     }
 840
 841     if (ch == ',' || ch == '.') {
 842         if (!lex_tokench(lex, ch) ||
 843             !lex_endtoken(lex))
 844         {
 845             return (lex->tok.ttype = TOKEN_FATAL);
 846         }
 847         return (lex->tok.ttype = TOKEN_OPERATOR);
 848     }
 849
 850     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 851         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 852         ch == '=' || ch == '!' || /* ==, != */
 853         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 854     {
 855         if (!lex_tokench(lex, ch))
 856             return (lex->tok.ttype = TOKEN_FATAL);
 857
 858         nextch = lex_getch(lex);
 859         if (nextch == ch || nextch == '=') {
 860             if (!lex_tokench(lex, nextch))
 861                 return (lex->tok.ttype = TOKEN_FATAL);
 862         } else if (ch == '-' && nextch == '>') {
 863             if (!lex_tokench(lex, nextch))
 864                 return (lex->tok.ttype = TOKEN_FATAL);
 865         } else
 866             lex_ungetch(lex, nextch);
 867
 868         if (!lex_endtoken(lex))
 869             return (lex->tok.ttype = TOKEN_FATAL);
 870         return (lex->tok.ttype = TOKEN_OPERATOR);
 871     }
 872
 873     /*
 874     if (ch == '^' || ch == '~' || ch == '!')
 875     {
 876         if (!lex_tokench(lex, ch) ||
 877             !lex_endtoken(lex))
 878         {
 879             return (lex->tok.ttype = TOKEN_FATAL);
 880         }
 881         return (lex->tok.ttype = TOKEN_OPERATOR);
 882     }
 883     */
 884
 885     if (ch == '*' || ch == '/') /* *=, /= */
 886     {
 887         if (!lex_tokench(lex, ch))
 888             return (lex->tok.ttype = TOKEN_FATAL);
 889
 890         nextch = lex_getch(lex);
 891         if (nextch == '=') {
 892             if (!lex_tokench(lex, nextch))
 893                 return (lex->tok.ttype = TOKEN_FATAL);
 894         } else
 895             lex_ungetch(lex, nextch);
 896
 897         if (!lex_endtoken(lex))
 898             return (lex->tok.ttype = TOKEN_FATAL);
 899         return (lex->tok.ttype = TOKEN_OPERATOR);
 900     }
 901
 902     if (isident_start(ch))
 903     {
 904         const char *v;
 905
 906         if (!lex_tokench(lex, ch))
 907             return (lex->tok.ttype = TOKEN_FATAL);
 908         if (!lex_finish_ident(lex)) {
 909             /* error? */
 910             return (lex->tok.ttype = TOKEN_ERROR);
 911         }
 912         if (!lex_endtoken(lex))
 913             return (lex->tok.ttype = TOKEN_FATAL);
 914         lex->tok.ttype = TOKEN_IDENT;
 915
 916         v = lex->tok.value;
 917         if (!strcmp(v, "void")) {
 918             lex->tok.ttype = TOKEN_TYPENAME;
 919             lex->tok.constval.t = TYPE_VOID;
 920         } else if (!strcmp(v, "int")) {
 921             lex->tok.ttype = TOKEN_TYPENAME;
 922             lex->tok.constval.t = TYPE_INTEGER;
 923         } else if (!strcmp(v, "float")) {
 924             lex->tok.ttype = TOKEN_TYPENAME;
 925             lex->tok.constval.t = TYPE_FLOAT;
 926         } else if (!strcmp(v, "string")) {
 927             lex->tok.ttype = TOKEN_TYPENAME;
 928             lex->tok.constval.t = TYPE_STRING;
 929         } else if (!strcmp(v, "entity")) {
 930             lex->tok.ttype = TOKEN_TYPENAME;
 931             lex->tok.constval.t = TYPE_ENTITY;
 932         } else if (!strcmp(v, "vector")) {
 933             lex->tok.ttype = TOKEN_TYPENAME;
 934             lex->tok.constval.t = TYPE_VECTOR;
 935         } else if (!strcmp(v, "for")  ||
 936                  !strcmp(v, "while")  ||
 937                  !strcmp(v, "do")     ||
 938                  !strcmp(v, "if")     ||
 939                  !strcmp(v, "else")   ||
 940                  !strcmp(v, "local")  ||
 941                  !strcmp(v, "return") ||
 942                  !strcmp(v, "const"))
 943             lex->tok.ttype = TOKEN_KEYWORD;
 944
 945         return lex->tok.ttype;
 946     }
 947
 948     if (ch == '"')
 949     {
 950         lex->tok.ttype = lex_finish_string(lex, '"');
 951         while (lex->tok.ttype == TOKEN_STRINGCONST)
 952         {
 953             /* Allow c style "string" "continuation" */
 954             ch = lex_skipwhite(lex);
 955             if (ch != '"') {
 956                 lex_ungetch(lex, ch);
 957                 break;
 958             }
 959
 960             lex->tok.ttype = lex_finish_string(lex, '"');
 961         }
 962         if (!lex_endtoken(lex))
 963             return (lex->tok.ttype = TOKEN_FATAL);
 964         return lex->tok.ttype;
 965     }
 966
 967     if (ch == '\'')
 968     {
 969         /* we parse character constants like string,
 970          * but return TOKEN_CHARCONST, or a vector type if it fits...
 971          * Likewise actual unescaping has to be done by the parser.
 972          * The difference is we don't allow 'char' 'continuation'.
 973          */
 974          lex->tok.ttype = lex_finish_string(lex, '\'');
 975          if (!lex_endtoken(lex))
 976               return (lex->tok.ttype = TOKEN_FATAL);
 977
 978          /* It's a vector if we can successfully scan 3 floats */
 979 #ifdef WIN32
 980          if (sscanf_s(lex->tok.value, " %f %f %f ",
 981                     &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
 982 #else
 983          if (sscanf(lex->tok.value, " %f %f %f ",
 984                     &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
 985 #endif
 986          {
 987               lex->tok.ttype = TOKEN_VECTORCONST;
 988          }
 989
 990          return lex->tok.ttype;
 991     }
 992
 993     if (isdigit(ch))
 994     {
 995         lex->tok.ttype = lex_finish_digit(lex, ch);
 996         if (!lex_endtoken(lex))
 997             return (lex->tok.ttype = TOKEN_FATAL);
 998         return lex->tok.ttype;
 999     }
1000
1001     lexerror(lex, "unknown token");
1002     return (lex->tok.ttype = TOKEN_ERROR);
1003 }