lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16     va_list ap;
  17
  18     if (lex)
  19         printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline);
  20     else
  21         printf("error: ");
  22
  23     va_start(ap, fmt);
  24     vprintf(fmt, ap);
  25     va_end(ap);
  26
  27     printf("\n");
  28 }
  29
  30 void lexwarn(lex_file *lex, int warn, const char *fmt, ...)
  31 {
  32     va_list ap;
  33
  34     if (!OPTS_WARN(warn))
  35         return;
  36
  37     if (lex)
  38         printf("warning %s:%lu: ", lex->name, (unsigned long)lex->sline);
  39     else
  40         printf("warning: ");
  41
  42     va_start(ap, fmt);
  43     vprintf(fmt, ap);
  44     va_end(ap);
  45
  46     printf("\n");
  47 }
  48
  49 token* token_new()
  50 {
  51     token *tok = (token*)mem_a(sizeof(token));
  52     if (!tok)
  53         return NULL;
  54     memset(tok, 0, sizeof(*tok));
  55     return tok;
  56 }
  57
  58 void token_delete(token *self)
  59 {
  60     if (self->next && self->next->prev == self)
  61         self->next->prev = self->prev;
  62     if (self->prev && self->prev->next == self)
  63         self->prev->next = self->next;
  64     MEM_VECTOR_CLEAR(self, value);
  65     mem_d(self);
  66 }
  67
  68 token* token_copy(const token *cp)
  69 {
  70     token* self = token_new();
  71     if (!self)
  72         return NULL;
  73     /* copy the value */
  74     self->value_alloc = cp->value_count + 1;
  75     self->value_count = cp->value_count;
  76     self->value = (char*)mem_a(self->value_alloc);
  77     if (!self->value) {
  78         mem_d(self);
  79         return NULL;
  80     }
  81     memcpy(self->value, cp->value, cp->value_count);
  82     self->value[self->value_alloc-1] = 0;
  83
  84     /* rest */
  85     self->ctx = cp->ctx;
  86     self->ttype = cp->ttype;
  87     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  88     return self;
  89 }
  90
  91 void token_delete_all(token *t)
  92 {
  93     token *n;
  94
  95     do {
  96         n = t->next;
  97         token_delete(t);
  98         t = n;
  99     } while(t);
 100 }
 101
 102 token* token_copy_all(const token *cp)
 103 {
 104     token *cur;
 105     token *out;
 106
 107     out = cur = token_copy(cp);
 108     if (!out)
 109         return NULL;
 110
 111     while (cp->next) {
 112         cp = cp->next;
 113         cur->next = token_copy(cp);
 114         if (!cur->next) {
 115             token_delete_all(out);
 116             return NULL;
 117         }
 118         cur->next->prev = cur;
 119         cur = cur->next;
 120     }
 121
 122     return out;
 123 }
 124
 125 lex_file* lex_open(const char *file)
 126 {
 127     lex_file *lex;
 128     FILE *in = util_fopen(file, "rb");
 129
 130     if (!in) {
 131         lexerror(NULL, "open failed: '%s'\n", file);
 132         return NULL;
 133     }
 134
 135     lex = (lex_file*)mem_a(sizeof(*lex));
 136     if (!lex) {
 137         fclose(in);
 138         lexerror(NULL, "out of memory\n");
 139         return NULL;
 140     }
 141
 142     memset(lex, 0, sizeof(*lex));
 143
 144     lex->file = in;
 145     lex->name = util_strdup(file);
 146     lex->line = 1; /* we start counting at 1 */
 147
 148     lex->peekpos = 0;
 149     lex->eof = false;
 150
 151     lex_filenames_add(lex->name);
 152
 153     return lex;
 154 }
 155
 156 void lex_cleanup(void)
 157 {
 158     size_t i;
 159     for (i = 0; i < lex_filenames_elements; ++i)
 160         mem_d(lex_filenames_data[i]);
 161     mem_d(lex_filenames_data);
 162 }
 163
 164 void lex_close(lex_file *lex)
 165 {
 166     size_t i;
 167     for (i = 0; i < lex->frames_count; ++i)
 168         mem_d(lex->frames[i].name);
 169     MEM_VECTOR_CLEAR(lex, frames);
 170
 171     if (lex->modelname)
 172         mem_d(lex->modelname);
 173
 174     if (lex->file)
 175         fclose(lex->file);
 176     if (lex->tok)
 177         token_delete(lex->tok);
 178     /* mem_d(lex->name); collected in lex_filenames */
 179     mem_d(lex);
 180 }
 181
 182 /* Get or put-back data
 183  * The following to functions do NOT understand what kind of data they
 184  * are working on.
 185  * The are merely wrapping get/put in order to count line numbers.
 186  */
 187 static int lex_getch(lex_file *lex)
 188 {
 189     int ch;
 190
 191     if (lex->peekpos) {
 192         lex->peekpos--;
 193         if (lex->peek[lex->peekpos] == '\n')
 194             lex->line++;
 195         return lex->peek[lex->peekpos];
 196     }
 197
 198     ch = fgetc(lex->file);
 199     if (ch == '\n')
 200         lex->line++;
 201     return ch;
 202 }
 203
 204 static void lex_ungetch(lex_file *lex, int ch)
 205 {
 206     lex->peek[lex->peekpos++] = ch;
 207     if (ch == '\n')
 208         lex->line--;
 209 }
 210
 211 /* classify characters
 212  * some additions to the is*() functions of ctype.h
 213  */
 214
 215 /* Idents are alphanumberic, but they start with alpha or _ */
 216 static bool isident_start(int ch)
 217 {
 218     return isalpha(ch) || ch == '_';
 219 }
 220
 221 static bool isident(int ch)
 222 {
 223     return isident_start(ch) || isdigit(ch);
 224 }
 225
 226 /* isxdigit_only is used when we already know it's not a digit
 227  * and want to see if it's a hex digit anyway.
 228  */
 229 static bool isxdigit_only(int ch)
 230 {
 231     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 232 }
 233
 234 /* Skip whitespace and comments and return the first
 235  * non-white character.
 236  * As this makes use of the above getch() ungetch() functions,
 237  * we don't need to care at all about line numbering anymore.
 238  *
 239  * In theory, this function should only be used at the beginning
 240  * of lexing, or when we *know* the next character is part of the token.
 241  * Otherwise, if the parser throws an error, the linenumber may not be
 242  * the line of the error, but the line of the next token AFTER the error.
 243  *
 244  * This is currently only problematic when using c-like string-continuation,
 245  * since comments and whitespaces are allowed between 2 such strings.
 246  * Example:
 247 printf(   "line one\n"
 248 // A comment
 249           "A continuation of the previous string"
 250 // This line is skipped
 251       , foo);
 252
 253  * In this case, if the parse decides it didn't actually want a string,
 254  * and uses lex->line to print an error, it will show the ', foo);' line's
 255  * linenumber.
 256  *
 257  * On the other hand, the parser is supposed to remember the line of the next
 258  * token's beginning. In this case we would want skipwhite() to be called
 259  * AFTER reading a token, so that the parser, before reading the NEXT token,
 260  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 261  *
 262  * THIS SOLUTION
 263  *    here is to store the line of the first character after skipping
 264  *    the initial whitespace in lex->sline, this happens in lex_do.
 265  */
 266 static int lex_skipwhite(lex_file *lex)
 267 {
 268     int ch = 0;
 269
 270     do
 271     {
 272         ch = lex_getch(lex);
 273         while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
 274
 275         if (ch == '/') {
 276             ch = lex_getch(lex);
 277             if (ch == '/')
 278             {
 279                 /* one line comment */
 280                 ch = lex_getch(lex);
 281
 282                 /* check for special: '/', '/', '*', '/' */
 283                 if (ch == '*') {
 284                     ch = lex_getch(lex);
 285                     if (ch == '/') {
 286                         ch = ' ';
 287                         continue;
 288                     }
 289                 }
 290
 291                 while (ch != EOF && ch != '\n') {
 292                     ch = lex_getch(lex);
 293                 }
 294                 continue;
 295             }
 296             if (ch == '*')
 297             {
 298                 /* multiline comment */
 299                 while (ch != EOF)
 300                 {
 301                     ch = lex_getch(lex);
 302                     if (ch == '*') {
 303                         ch = lex_getch(lex);
 304                         if (ch == '/') {
 305                             ch = lex_getch(lex);
 306                             break;
 307                         }
 308                     }
 309                 }
 310                 if (ch == '/') /* allow *//* direct following comment */
 311                 {
 312                     lex_ungetch(lex, ch);
 313                     ch = ' '; /* cause TRUE in the isspace check */
 314                 }
 315                 continue;
 316             }
 317             /* Otherwise roll back to the slash and break out of the loop */
 318             lex_ungetch(lex, ch);
 319             ch = '/';
 320             break;
 321         }
 322     } while (ch != EOF && isspace(ch));
 323
 324     return ch;
 325 }
 326
 327 /* Append a character to the token buffer */
 328 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 329 {
 330     if (!token_value_add(lex->tok, ch)) {
 331         lexerror(lex, "out of memory");
 332         return false;
 333     }
 334     return true;
 335 }
 336
 337 /* Append a trailing null-byte */
 338 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 339 {
 340     if (!token_value_add(lex->tok, 0)) {
 341         lexerror(lex, "out of memory");
 342         return false;
 343     }
 344     lex->tok->value_count--;
 345     return true;
 346 }
 347
 348 /* Get a token */
 349 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 350 {
 351     int ch;
 352
 353     ch = lex_getch(lex);
 354     while (ch != EOF && isident(ch))
 355     {
 356         if (!lex_tokench(lex, ch))
 357             return (lex->tok->ttype = TOKEN_FATAL);
 358         ch = lex_getch(lex);
 359     }
 360
 361     /* last ch was not an ident ch: */
 362     lex_ungetch(lex, ch);
 363
 364     return true;
 365 }
 366
 367 /* read one ident for the frame list */
 368 static int lex_parse_frame(lex_file *lex)
 369 {
 370     int ch;
 371
 372     if (lex->tok)
 373         token_delete(lex->tok);
 374     lex->tok = token_new();
 375
 376     ch = lex_getch(lex);
 377     while (ch != EOF && ch != '\n' && isspace(ch))
 378         ch = lex_getch(lex);
 379
 380     if (ch == '\n')
 381         return 1;
 382
 383     if (!isident_start(ch)) {
 384         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 385         return -1;
 386     }
 387
 388     if (!lex_tokench(lex, ch))
 389         return -1;
 390     if (!lex_finish_ident(lex))
 391         return -1;
 392     if (!lex_endtoken(lex))
 393         return -1;
 394     return 0;
 395 }
 396
 397 /* read a list of $frames */
 398 static bool lex_finish_frames(lex_file *lex)
 399 {
 400     do {
 401         int rc;
 402         frame_macro m;
 403
 404         rc = lex_parse_frame(lex);
 405         if (rc > 0) /* end of line */
 406             return true;
 407         if (rc < 0) /* error */
 408             return false;
 409
 410         m.value = lex->framevalue++;
 411         m.name = lex->tok->value;
 412         lex->tok->value = NULL;
 413         if (!lex_file_frames_add(lex, m)) {
 414             lexerror(lex, "out of memory");
 415             return false;
 416         }
 417     } while (true);
 418 }
 419
 420 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 421 {
 422     int ch = 0;
 423
 424     while (ch != EOF)
 425     {
 426         ch = lex_getch(lex);
 427         if (ch == quote)
 428             return TOKEN_STRINGCONST;
 429
 430         if (ch == '\\') {
 431             ch = lex_getch(lex);
 432             if (ch == EOF) {
 433                 lexerror(lex, "unexpected end of file");
 434                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 435                 return (lex->tok->ttype = TOKEN_ERROR);
 436             }
 437
 438             switch (ch) {
 439             case '\\': break;
 440             case 'a':  ch = '\a'; break;
 441             case 'b':  ch = '\b'; break;
 442             case 'r':  ch = '\r'; break;
 443             case 'n':  ch = '\n'; break;
 444             case 't':  ch = '\t'; break;
 445             case 'f':  ch = '\f'; break;
 446             case 'v':  ch = '\v'; break;
 447             default:
 448                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 449                 /* so we just add the character plus backslash no matter what it actually is */
 450                 if (!lex_tokench(lex, '\\'))
 451                     return (lex->tok->ttype = TOKEN_FATAL);
 452             }
 453             /* add the character finally */
 454             if (!lex_tokench(lex, ch))
 455                 return (lex->tok->ttype = TOKEN_FATAL);
 456         }
 457         else if (!lex_tokench(lex, ch))
 458             return (lex->tok->ttype = TOKEN_FATAL);
 459     }
 460     lexerror(lex, "unexpected end of file within string constant");
 461     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 462     return (lex->tok->ttype = TOKEN_ERROR);
 463 }
 464
 465 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 466 {
 467     bool ishex = false;
 468
 469     int  ch = lastch;
 470
 471     /* parse a number... */
 472     lex->tok->ttype = TOKEN_INTCONST;
 473
 474     if (!lex_tokench(lex, ch))
 475         return (lex->tok->ttype = TOKEN_FATAL);
 476
 477     ch = lex_getch(lex);
 478     if (ch != '.' && !isdigit(ch))
 479     {
 480         if (lastch != '0' || ch != 'x')
 481         {
 482             /* end of the number or EOF */
 483             lex_ungetch(lex, ch);
 484             if (!lex_endtoken(lex))
 485                 return (lex->tok->ttype = TOKEN_FATAL);
 486
 487             lex->tok->constval.i = lastch - '0';
 488             return lex->tok->ttype;
 489         }
 490
 491         ishex = true;
 492     }
 493
 494     /* EOF would have been caught above */
 495
 496     if (ch != '.')
 497     {
 498         if (!lex_tokench(lex, ch))
 499             return (lex->tok->ttype = TOKEN_FATAL);
 500         ch = lex_getch(lex);
 501         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 502         {
 503             if (!lex_tokench(lex, ch))
 504                 return (lex->tok->ttype = TOKEN_FATAL);
 505             ch = lex_getch(lex);
 506         }
 507     }
 508     /* NOT else, '.' can come from above as well */
 509     if (ch == '.' && !ishex)
 510     {
 511         /* Allow floating comma in non-hex mode */
 512         lex->tok->ttype = TOKEN_FLOATCONST;
 513         if (!lex_tokench(lex, ch))
 514             return (lex->tok->ttype = TOKEN_FATAL);
 515
 516         /* continue digits-only */
 517         ch = lex_getch(lex);
 518         while (isdigit(ch))
 519         {
 520             if (!lex_tokench(lex, ch))
 521                 return (lex->tok->ttype = TOKEN_FATAL);
 522             ch = lex_getch(lex);
 523         }
 524     }
 525     /* put back the last character */
 526     /* but do not put back the trailing 'f' or a float */
 527     if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f')
 528         ch = lex_getch(lex);
 529
 530     /* generally we don't want words to follow numbers: */
 531     if (isident(ch)) {
 532         lexerror(lex, "unexpected trailing characters after number");
 533         return (lex->tok->ttype = TOKEN_ERROR);
 534     }
 535     lex_ungetch(lex, ch);
 536
 537     if (!lex_endtoken(lex))
 538         return (lex->tok->ttype = TOKEN_FATAL);
 539     if (lex->tok->ttype == TOKEN_FLOATCONST)
 540         lex->tok->constval.f = strtod(lex->tok->value, NULL);
 541     else
 542         lex->tok->constval.i = strtol(lex->tok->value, NULL, 0);
 543     return lex->tok->ttype;
 544 }
 545
 546 int lex_do(lex_file *lex)
 547 {
 548     int ch, nextch;
 549
 550     if (lex->tok)
 551         token_delete(lex->tok);
 552     lex->tok = token_new();
 553     if (!lex->tok)
 554         return TOKEN_FATAL;
 555
 556     ch = lex_skipwhite(lex);
 557     lex->sline = lex->line;
 558     lex->tok->ctx.line = lex->sline;
 559     lex->tok->ctx.file = lex->name;
 560
 561     if (lex->eof)
 562         return (lex->tok->ttype = TOKEN_FATAL);
 563
 564     if (ch == EOF) {
 565         lex->eof = true;
 566         return (lex->tok->ttype = TOKEN_EOF);
 567     }
 568
 569     /* modelgen / spiritgen commands */
 570     if (ch == '$') {
 571         const char *v;
 572         size_t frame;
 573
 574         ch = lex_getch(lex);
 575         if (!isident_start(ch)) {
 576             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 577             return lex_do(lex);
 578         }
 579         if (!lex_tokench(lex, ch))
 580             return (lex->tok->ttype = TOKEN_FATAL);
 581         if (!lex_finish_ident(lex))
 582             return (lex->tok->ttype = TOKEN_ERROR);
 583         if (!lex_endtoken(lex))
 584             return (lex->tok->ttype = TOKEN_FATAL);
 585         /* skip the known commands */
 586         v = lex->tok->value;
 587
 588         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 589         {
 590             /* frame/framesave command works like an enum
 591              * similar to fteqcc we handle this in the lexer.
 592              * The reason for this is that it is sensitive to newlines,
 593              * which the parser is unaware of
 594              */
 595             if (!lex_finish_frames(lex))
 596                  return (lex->tok->ttype = TOKEN_ERROR);
 597             return lex_do(lex);
 598         }
 599
 600         if (!strcmp(v, "framevalue"))
 601         {
 602             ch = lex_getch(lex);
 603             while (ch != EOF && isspace(ch) && ch != '\n')
 604                 ch = lex_getch(lex);
 605
 606             if (!isdigit(ch)) {
 607                 lexerror(lex, "$framevalue requires an integer parameter");
 608                 return lex_do(lex);
 609             }
 610
 611             token_delete(lex->tok);
 612             lex->tok = token_new();
 613             lex->tok->ttype = lex_finish_digit(lex, ch);
 614             if (!lex_endtoken(lex))
 615                 return (lex->tok->ttype = TOKEN_FATAL);
 616             if (lex->tok->ttype != TOKEN_INTCONST) {
 617                 lexerror(lex, "$framevalue requires an integer parameter");
 618                 return lex_do(lex);
 619             }
 620             lex->framevalue = lex->tok->constval.i;
 621             return lex_do(lex);
 622         }
 623
 624         if (!strcmp(v, "framerestore"))
 625         {
 626             int rc;
 627
 628             token_delete(lex->tok);
 629             lex->tok = token_new();
 630
 631             rc = lex_parse_frame(lex);
 632
 633             if (rc > 0) {
 634                 lexerror(lex, "$framerestore requires a framename parameter");
 635                 return lex_do(lex);
 636             }
 637             if (rc < 0)
 638                 return (lex->tok->ttype = TOKEN_FATAL);
 639
 640             v = lex->tok->value;
 641             for (frame = 0; frame < lex->frames_count; ++frame) {
 642                 if (!strcmp(v, lex->frames[frame].name)) {
 643                     lex->framevalue = lex->frames[frame].value;
 644                     return lex_do(lex);
 645                 }
 646             }
 647             lexerror(lex, "unknown framename `%s`", v);
 648             return lex_do(lex);
 649         }
 650
 651         if (!strcmp(v, "modelname"))
 652         {
 653             int rc;
 654
 655             token_delete(lex->tok);
 656             lex->tok = token_new();
 657
 658             rc = lex_parse_frame(lex);
 659
 660             if (rc > 0) {
 661                 lexerror(lex, "$framerestore requires a framename parameter");
 662                 return lex_do(lex);
 663             }
 664             if (rc < 0)
 665                 return (lex->tok->ttype = TOKEN_FATAL);
 666
 667             v = lex->tok->value;
 668             if (lex->modelname) {
 669                 frame_macro m;
 670                 m.value = lex->framevalue;
 671                 m.name = lex->modelname;
 672                 lex->modelname = NULL;
 673                 if (!lex_file_frames_add(lex, m)) {
 674                     lexerror(lex, "out of memory");
 675                     return (lex->tok->ttype = TOKEN_FATAL);
 676                 }
 677             }
 678             lex->modelname = lex->tok->value;
 679             lex->tok->value = NULL;
 680             for (frame = 0; frame < lex->frames_count; ++frame) {
 681                 if (!strcmp(v, lex->frames[frame].name)) {
 682                     lex->framevalue = lex->frames[frame].value;
 683                     break;
 684                 }
 685             }
 686             return lex_do(lex);
 687         }
 688
 689         if (!strcmp(v, "flush"))
 690         {
 691             size_t frame;
 692             for (frame = 0; frame < lex->frames_count; ++frame)
 693                 mem_d(lex->frames[frame].name);
 694             MEM_VECTOR_CLEAR(lex, frames);
 695             /* skip line (fteqcc does it too) */
 696             ch = lex_getch(lex);
 697             while (ch != EOF && ch != '\n')
 698                 ch = lex_getch(lex);
 699             return lex_do(lex);
 700         }
 701
 702         if (!strcmp(v, "cd") ||
 703             !strcmp(v, "origin") ||
 704             !strcmp(v, "base") ||
 705             !strcmp(v, "flags") ||
 706             !strcmp(v, "scale") ||
 707             !strcmp(v, "skin"))
 708         {
 709             /* skip line */
 710             ch = lex_getch(lex);
 711             while (ch != EOF && ch != '\n')
 712                 ch = lex_getch(lex);
 713             return lex_do(lex);
 714         }
 715
 716         for (frame = 0; frame < lex->frames_count; ++frame) {
 717             if (!strcmp(v, lex->frames[frame].name)) {
 718                 lex->tok->constval.i = lex->frames[frame].value;
 719                 return (lex->tok->ttype = TOKEN_INTCONST);
 720             }
 721         }
 722
 723         lexerror(lex, "invalid frame macro");
 724         return lex_do(lex);
 725     }
 726
 727     /* single-character tokens */
 728     switch (ch)
 729     {
 730         case '(':
 731             if (!lex_tokench(lex, ch) ||
 732                 !lex_endtoken(lex))
 733             {
 734                 return (lex->tok->ttype = TOKEN_FATAL);
 735             }
 736             if (lex->flags.noops)
 737                 return (lex->tok->ttype = ch);
 738             else
 739                 return (lex->tok->ttype = TOKEN_OPERATOR);
 740         case ')':
 741         case ';':
 742         case '{':
 743         case '}':
 744         case '[':
 745         case ']':
 746
 747         case '#':
 748             if (!lex_tokench(lex, ch) ||
 749                 !lex_endtoken(lex))
 750             {
 751                 return (lex->tok->ttype = TOKEN_FATAL);
 752             }
 753             return (lex->tok->ttype = ch);
 754         default:
 755             break;
 756     }
 757
 758     if (lex->flags.noops)
 759     {
 760         /* Detect characters early which are normally
 761          * operators OR PART of an operator.
 762          */
 763         switch (ch)
 764         {
 765             case '+':
 766             case '-':
 767             case '*':
 768             case '/':
 769             case '<':
 770             case '>':
 771             case '=':
 772             case '&':
 773             case '|':
 774             case '^':
 775             case '~':
 776             case ',':
 777             case '.':
 778             case '!':
 779                 if (!lex_tokench(lex, ch) ||
 780                     !lex_endtoken(lex))
 781                 {
 782                     return (lex->tok->ttype = TOKEN_FATAL);
 783                 }
 784                 return (lex->tok->ttype = ch);
 785             default:
 786                 break;
 787         }
 788     }
 789
 790     if (ch == ',' || ch == '.') {
 791         if (!lex_tokench(lex, ch) ||
 792             !lex_endtoken(lex))
 793         {
 794             return (lex->tok->ttype = TOKEN_FATAL);
 795         }
 796         return (lex->tok->ttype = TOKEN_OPERATOR);
 797     }
 798
 799     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 800         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 801         ch == '=' || ch == '!' || /* ==, != */
 802         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 803     {
 804         if (!lex_tokench(lex, ch))
 805             return (lex->tok->ttype = TOKEN_FATAL);
 806
 807         nextch = lex_getch(lex);
 808         if (nextch == ch || nextch == '=') {
 809             if (!lex_tokench(lex, nextch))
 810                 return (lex->tok->ttype = TOKEN_FATAL);
 811         } else if (ch == '-' && nextch == '>') {
 812             if (!lex_tokench(lex, nextch))
 813                 return (lex->tok->ttype = TOKEN_FATAL);
 814         } else
 815             lex_ungetch(lex, nextch);
 816
 817         if (!lex_endtoken(lex))
 818             return (lex->tok->ttype = TOKEN_FATAL);
 819         return (lex->tok->ttype = TOKEN_OPERATOR);
 820     }
 821
 822     /*
 823     if (ch == '^' || ch == '~' || ch == '!')
 824     {
 825         if (!lex_tokench(lex, ch) ||
 826             !lex_endtoken(lex))
 827         {
 828             return (lex->tok->ttype = TOKEN_FATAL);
 829         }
 830         return (lex->tok->ttype = TOKEN_OPERATOR);
 831     }
 832     */
 833
 834     if (ch == '*' || ch == '/') /* *=, /= */
 835     {
 836         if (!lex_tokench(lex, ch))
 837             return (lex->tok->ttype = TOKEN_FATAL);
 838
 839         nextch = lex_getch(lex);
 840         if (nextch == '=') {
 841             if (!lex_tokench(lex, nextch))
 842                 return (lex->tok->ttype = TOKEN_FATAL);
 843         } else
 844             lex_ungetch(lex, nextch);
 845
 846         if (!lex_endtoken(lex))
 847             return (lex->tok->ttype = TOKEN_FATAL);
 848         return (lex->tok->ttype = TOKEN_OPERATOR);
 849     }
 850
 851     if (isident_start(ch))
 852     {
 853         const char *v;
 854
 855         if (!lex_tokench(lex, ch))
 856             return (lex->tok->ttype = TOKEN_FATAL);
 857         if (!lex_finish_ident(lex)) {
 858             /* error? */
 859             return (lex->tok->ttype = TOKEN_ERROR);
 860         }
 861         if (!lex_endtoken(lex))
 862             return (lex->tok->ttype = TOKEN_FATAL);
 863         lex->tok->ttype = TOKEN_IDENT;
 864
 865         v = lex->tok->value;
 866         if (!strcmp(v, "void")) {
 867             lex->tok->ttype = TOKEN_TYPENAME;
 868             lex->tok->constval.t = TYPE_VOID;
 869         } else if (!strcmp(v, "int")) {
 870             lex->tok->ttype = TOKEN_TYPENAME;
 871             lex->tok->constval.t = TYPE_INTEGER;
 872         } else if (!strcmp(v, "float")) {
 873             lex->tok->ttype = TOKEN_TYPENAME;
 874             lex->tok->constval.t = TYPE_FLOAT;
 875         } else if (!strcmp(v, "string")) {
 876             lex->tok->ttype = TOKEN_TYPENAME;
 877             lex->tok->constval.t = TYPE_STRING;
 878         } else if (!strcmp(v, "entity")) {
 879             lex->tok->ttype = TOKEN_TYPENAME;
 880             lex->tok->constval.t = TYPE_ENTITY;
 881         } else if (!strcmp(v, "vector")) {
 882             lex->tok->ttype = TOKEN_TYPENAME;
 883             lex->tok->constval.t = TYPE_VECTOR;
 884         } else if (!strcmp(v, "for")  ||
 885                  !strcmp(v, "while")  ||
 886                  !strcmp(v, "do")     ||
 887                  !strcmp(v, "if")     ||
 888                  !strcmp(v, "else")   ||
 889                  !strcmp(v, "local")  ||
 890                  !strcmp(v, "return") ||
 891                  !strcmp(v, "const"))
 892             lex->tok->ttype = TOKEN_KEYWORD;
 893
 894         return lex->tok->ttype;
 895     }
 896
 897     if (ch == '"')
 898     {
 899         lex->tok->ttype = lex_finish_string(lex, '"');
 900         while (lex->tok->ttype == TOKEN_STRINGCONST)
 901         {
 902             /* Allow c style "string" "continuation" */
 903             ch = lex_skipwhite(lex);
 904             if (ch != '"') {
 905                 lex_ungetch(lex, ch);
 906                 break;
 907             }
 908
 909             lex->tok->ttype = lex_finish_string(lex, '"');
 910         }
 911         if (!lex_endtoken(lex))
 912             return (lex->tok->ttype = TOKEN_FATAL);
 913         return lex->tok->ttype;
 914     }
 915
 916     if (ch == '\'')
 917     {
 918         /* we parse character constants like string,
 919          * but return TOKEN_CHARCONST, or a vector type if it fits...
 920          * Likewise actual unescaping has to be done by the parser.
 921          * The difference is we don't allow 'char' 'continuation'.
 922          */
 923          lex->tok->ttype = lex_finish_string(lex, '\'');
 924          if (!lex_endtoken(lex))
 925               return (lex->tok->ttype = TOKEN_FATAL);
 926
 927          /* It's a vector if we can successfully scan 3 floats */
 928 #ifdef WIN32
 929          if (sscanf_s(lex->tok->value, " %f %f %f ",
 930                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 931 #else
 932          if (sscanf(lex->tok->value, " %f %f %f ",
 933                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 934 #endif
 935          {
 936               lex->tok->ttype = TOKEN_VECTORCONST;
 937          }
 938
 939          return lex->tok->ttype;
 940     }
 941
 942     if (isdigit(ch))
 943     {
 944         lex->tok->ttype = lex_finish_digit(lex, ch);
 945         if (!lex_endtoken(lex))
 946             return (lex->tok->ttype = TOKEN_FATAL);
 947         return lex->tok->ttype;
 948     }
 949
 950     lexerror(lex, "unknown token");
 951     return (lex->tok->ttype = TOKEN_ERROR);
 952 }