lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16     va_list ap;
  17
  18     if (lex)
  19         printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline);
  20     else
  21         printf("error: ");
  22
  23     va_start(ap, fmt);
  24     vprintf(fmt, ap);
  25     va_end(ap);
  26
  27     printf("\n");
  28 }
  29
  30 void lexwarn(lex_file *lex, int warn, const char *fmt, ...)
  31 {
  32     va_list ap;
  33
  34     if (!OPTS_WARN(warn))
  35         return;
  36
  37     if (lex)
  38         printf("warning %s:%lu: ", lex->name, (unsigned long)lex->sline);
  39     else
  40         printf("warning: ");
  41
  42     va_start(ap, fmt);
  43     vprintf(fmt, ap);
  44     va_end(ap);
  45
  46     printf("\n");
  47 }
  48
  49 token* token_new()
  50 {
  51     token *tok = (token*)mem_a(sizeof(token));
  52     if (!tok)
  53         return NULL;
  54     memset(tok, 0, sizeof(*tok));
  55     return tok;
  56 }
  57
  58 void token_delete(token *self)
  59 {
  60     if (self->next && self->next->prev == self)
  61         self->next->prev = self->prev;
  62     if (self->prev && self->prev->next == self)
  63         self->prev->next = self->next;
  64     MEM_VECTOR_CLEAR(self, value);
  65     mem_d(self);
  66 }
  67
  68 token* token_copy(const token *cp)
  69 {
  70     token* self = token_new();
  71     if (!self)
  72         return NULL;
  73     /* copy the value */
  74     self->value_alloc = cp->value_count + 1;
  75     self->value_count = cp->value_count;
  76     self->value = (char*)mem_a(self->value_alloc);
  77     if (!self->value) {
  78         mem_d(self);
  79         return NULL;
  80     }
  81     memcpy(self->value, cp->value, cp->value_count);
  82     self->value[self->value_alloc-1] = 0;
  83
  84     /* rest */
  85     self->ctx = cp->ctx;
  86     self->ttype = cp->ttype;
  87     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  88     return self;
  89 }
  90
  91 void token_delete_all(token *t)
  92 {
  93     token *n;
  94
  95     do {
  96         n = t->next;
  97         token_delete(t);
  98         t = n;
  99     } while(t);
 100 }
 101
 102 token* token_copy_all(const token *cp)
 103 {
 104     token *cur;
 105     token *out;
 106
 107     out = cur = token_copy(cp);
 108     if (!out)
 109         return NULL;
 110
 111     while (cp->next) {
 112         cp = cp->next;
 113         cur->next = token_copy(cp);
 114         if (!cur->next) {
 115             token_delete_all(out);
 116             return NULL;
 117         }
 118         cur->next->prev = cur;
 119         cur = cur->next;
 120     }
 121
 122     return out;
 123 }
 124
 125 lex_file* lex_open(const char *file)
 126 {
 127     lex_file *lex;
 128     FILE *in = util_fopen(file, "rb");
 129
 130     if (!in) {
 131         lexerror(NULL, "open failed: '%s'\n", file);
 132         return NULL;
 133     }
 134
 135     lex = (lex_file*)mem_a(sizeof(*lex));
 136     if (!lex) {
 137         fclose(in);
 138         lexerror(NULL, "out of memory\n");
 139         return NULL;
 140     }
 141
 142     memset(lex, 0, sizeof(*lex));
 143
 144     lex->file = in;
 145     lex->name = util_strdup(file);
 146     lex->line = 1; /* we start counting at 1 */
 147
 148     lex->peekpos = 0;
 149
 150     lex_filenames_add(lex->name);
 151
 152     return lex;
 153 }
 154
 155 void lex_cleanup(void)
 156 {
 157     size_t i;
 158     for (i = 0; i < lex_filenames_elements; ++i)
 159         mem_d(lex_filenames_data[i]);
 160     mem_d(lex_filenames_data);
 161 }
 162
 163 void lex_close(lex_file *lex)
 164 {
 165     size_t i;
 166     for (i = 0; i < lex->frames_count; ++i)
 167         mem_d(lex->frames[i].name);
 168     MEM_VECTOR_CLEAR(lex, frames);
 169
 170     if (lex->modelname)
 171         mem_d(lex->modelname);
 172
 173     if (lex->file)
 174         fclose(lex->file);
 175     if (lex->tok)
 176         token_delete(lex->tok);
 177     /* mem_d(lex->name); collected in lex_filenames */
 178     mem_d(lex);
 179 }
 180
 181 /* Get or put-back data
 182  * The following to functions do NOT understand what kind of data they
 183  * are working on.
 184  * The are merely wrapping get/put in order to count line numbers.
 185  */
 186 static int lex_getch(lex_file *lex)
 187 {
 188     int ch;
 189
 190     if (lex->peekpos) {
 191         lex->peekpos--;
 192         if (lex->peek[lex->peekpos] == '\n')
 193             lex->line++;
 194         return lex->peek[lex->peekpos];
 195     }
 196
 197     ch = fgetc(lex->file);
 198     if (ch == '\n')
 199         lex->line++;
 200     return ch;
 201 }
 202
 203 static void lex_ungetch(lex_file *lex, int ch)
 204 {
 205     lex->peek[lex->peekpos++] = ch;
 206     if (ch == '\n')
 207         lex->line--;
 208 }
 209
 210 /* classify characters
 211  * some additions to the is*() functions of ctype.h
 212  */
 213
 214 /* Idents are alphanumberic, but they start with alpha or _ */
 215 static bool isident_start(int ch)
 216 {
 217     return isalpha(ch) || ch == '_';
 218 }
 219
 220 static bool isident(int ch)
 221 {
 222     return isident_start(ch) || isdigit(ch);
 223 }
 224
 225 /* isxdigit_only is used when we already know it's not a digit
 226  * and want to see if it's a hex digit anyway.
 227  */
 228 static bool isxdigit_only(int ch)
 229 {
 230     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 231 }
 232
 233 /* Skip whitespace and comments and return the first
 234  * non-white character.
 235  * As this makes use of the above getch() ungetch() functions,
 236  * we don't need to care at all about line numbering anymore.
 237  *
 238  * In theory, this function should only be used at the beginning
 239  * of lexing, or when we *know* the next character is part of the token.
 240  * Otherwise, if the parser throws an error, the linenumber may not be
 241  * the line of the error, but the line of the next token AFTER the error.
 242  *
 243  * This is currently only problematic when using c-like string-continuation,
 244  * since comments and whitespaces are allowed between 2 such strings.
 245  * Example:
 246 printf(   "line one\n"
 247 // A comment
 248           "A continuation of the previous string"
 249 // This line is skipped
 250       , foo);
 251
 252  * In this case, if the parse decides it didn't actually want a string,
 253  * and uses lex->line to print an error, it will show the ', foo);' line's
 254  * linenumber.
 255  *
 256  * On the other hand, the parser is supposed to remember the line of the next
 257  * token's beginning. In this case we would want skipwhite() to be called
 258  * AFTER reading a token, so that the parser, before reading the NEXT token,
 259  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 260  *
 261  * THIS SOLUTION
 262  *    here is to store the line of the first character after skipping
 263  *    the initial whitespace in lex->sline, this happens in lex_do.
 264  */
 265 static int lex_skipwhite(lex_file *lex)
 266 {
 267     int ch = 0;
 268
 269     do
 270     {
 271         ch = lex_getch(lex);
 272         while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
 273
 274         if (ch == '/') {
 275             ch = lex_getch(lex);
 276             if (ch == '/')
 277             {
 278                 /* one line comment */
 279                 ch = lex_getch(lex);
 280
 281                 /* check for special: '/', '/', '*', '/' */
 282                 if (ch == '*') {
 283                     ch = lex_getch(lex);
 284                     if (ch == '/') {
 285                         ch = ' ';
 286                         continue;
 287                     }
 288                 }
 289
 290                 while (ch != EOF && ch != '\n') {
 291                     ch = lex_getch(lex);
 292                 }
 293                 continue;
 294             }
 295             if (ch == '*')
 296             {
 297                 /* multiline comment */
 298                 while (ch != EOF)
 299                 {
 300                     ch = lex_getch(lex);
 301                     if (ch == '*') {
 302                         ch = lex_getch(lex);
 303                         if (ch == '/') {
 304                             ch = lex_getch(lex);
 305                             break;
 306                         }
 307                     }
 308                 }
 309                 if (ch == '/') /* allow *//* direct following comment */
 310                 {
 311                     lex_ungetch(lex, ch);
 312                     ch = ' '; /* cause TRUE in the isspace check */
 313                 }
 314                 continue;
 315             }
 316             /* Otherwise roll back to the slash and break out of the loop */
 317             lex_ungetch(lex, ch);
 318             ch = '/';
 319             break;
 320         }
 321     } while (ch != EOF && isspace(ch));
 322
 323     return ch;
 324 }
 325
 326 /* Append a character to the token buffer */
 327 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 328 {
 329     if (!token_value_add(lex->tok, ch)) {
 330         lexerror(lex, "out of memory");
 331         return false;
 332     }
 333     return true;
 334 }
 335
 336 /* Append a trailing null-byte */
 337 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 338 {
 339     if (!token_value_add(lex->tok, 0)) {
 340         lexerror(lex, "out of memory");
 341         return false;
 342     }
 343     lex->tok->value_count--;
 344     return true;
 345 }
 346
 347 /* Get a token */
 348 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 349 {
 350     int ch;
 351
 352     ch = lex_getch(lex);
 353     while (ch != EOF && isident(ch))
 354     {
 355         if (!lex_tokench(lex, ch))
 356             return (lex->tok->ttype = TOKEN_FATAL);
 357         ch = lex_getch(lex);
 358     }
 359
 360     /* last ch was not an ident ch: */
 361     lex_ungetch(lex, ch);
 362
 363     return true;
 364 }
 365
 366 /* read one ident for the frame list */
 367 static int lex_parse_frame(lex_file *lex)
 368 {
 369     int ch;
 370
 371     if (lex->tok)
 372         token_delete(lex->tok);
 373     lex->tok = token_new();
 374
 375     ch = lex_getch(lex);
 376     while (ch != EOF && ch != '\n' && isspace(ch))
 377         ch = lex_getch(lex);
 378
 379     if (ch == '\n')
 380         return 1;
 381
 382     if (!isident_start(ch)) {
 383         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 384         return -1;
 385     }
 386
 387     if (!lex_tokench(lex, ch))
 388         return -1;
 389     if (!lex_finish_ident(lex))
 390         return -1;
 391     if (!lex_endtoken(lex))
 392         return -1;
 393     return 0;
 394 }
 395
 396 /* read a list of $frames */
 397 static bool lex_finish_frames(lex_file *lex)
 398 {
 399     do {
 400         int rc;
 401         frame_macro m;
 402
 403         rc = lex_parse_frame(lex);
 404         if (rc > 0) /* end of line */
 405             return true;
 406         if (rc < 0) /* error */
 407             return false;
 408
 409         m.value = lex->framevalue++;
 410         m.name = lex->tok->value;
 411         lex->tok->value = NULL;
 412         if (!lex_file_frames_add(lex, m)) {
 413             lexerror(lex, "out of memory");
 414             return false;
 415         }
 416     } while (true);
 417 }
 418
 419 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 420 {
 421     int ch = 0;
 422
 423     while (ch != EOF)
 424     {
 425         ch = lex_getch(lex);
 426         if (ch == quote)
 427             return TOKEN_STRINGCONST;
 428
 429         if (ch == '\\') {
 430             ch = lex_getch(lex);
 431             if (ch == EOF) {
 432                 lexerror(lex, "unexpected end of file");
 433                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 434                 return (lex->tok->ttype = TOKEN_ERROR);
 435             }
 436
 437             switch (ch) {
 438             case '\\': break;
 439             case 'a':  ch = '\a'; break;
 440             case 'b':  ch = '\b'; break;
 441             case 'r':  ch = '\r'; break;
 442             case 'n':  ch = '\n'; break;
 443             case 't':  ch = '\t'; break;
 444             case 'f':  ch = '\f'; break;
 445             case 'v':  ch = '\v'; break;
 446             default:
 447                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 448                 /* so we just add the character plus backslash no matter what it actually is */
 449                 if (!lex_tokench(lex, '\\'))
 450                     return (lex->tok->ttype = TOKEN_FATAL);
 451             }
 452             /* add the character finally */
 453             if (!lex_tokench(lex, ch))
 454                 return (lex->tok->ttype = TOKEN_FATAL);
 455         }
 456         else if (!lex_tokench(lex, ch))
 457             return (lex->tok->ttype = TOKEN_FATAL);
 458     }
 459     lexerror(lex, "unexpected end of file within string constant");
 460     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 461     return (lex->tok->ttype = TOKEN_ERROR);
 462 }
 463
 464 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 465 {
 466     bool ishex = false;
 467
 468     int  ch = lastch;
 469
 470     /* parse a number... */
 471     lex->tok->ttype = TOKEN_INTCONST;
 472
 473     if (!lex_tokench(lex, ch))
 474         return (lex->tok->ttype = TOKEN_FATAL);
 475
 476     ch = lex_getch(lex);
 477     if (ch != '.' && !isdigit(ch))
 478     {
 479         if (lastch != '0' || ch != 'x')
 480         {
 481             /* end of the number or EOF */
 482             lex_ungetch(lex, ch);
 483             if (!lex_endtoken(lex))
 484                 return (lex->tok->ttype = TOKEN_FATAL);
 485
 486             lex->tok->constval.i = lastch - '0';
 487             return lex->tok->ttype;
 488         }
 489
 490         ishex = true;
 491     }
 492
 493     /* EOF would have been caught above */
 494
 495     if (ch != '.')
 496     {
 497         if (!lex_tokench(lex, ch))
 498             return (lex->tok->ttype = TOKEN_FATAL);
 499         ch = lex_getch(lex);
 500         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 501         {
 502             if (!lex_tokench(lex, ch))
 503                 return (lex->tok->ttype = TOKEN_FATAL);
 504             ch = lex_getch(lex);
 505         }
 506     }
 507     /* NOT else, '.' can come from above as well */
 508     if (ch == '.' && !ishex)
 509     {
 510         /* Allow floating comma in non-hex mode */
 511         lex->tok->ttype = TOKEN_FLOATCONST;
 512         if (!lex_tokench(lex, ch))
 513             return (lex->tok->ttype = TOKEN_FATAL);
 514
 515         /* continue digits-only */
 516         ch = lex_getch(lex);
 517         while (isdigit(ch))
 518         {
 519             if (!lex_tokench(lex, ch))
 520                 return (lex->tok->ttype = TOKEN_FATAL);
 521             ch = lex_getch(lex);
 522         }
 523     }
 524     /* put back the last character */
 525     /* but do not put back the trailing 'f' or a float */
 526     if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f')
 527         ch = lex_getch(lex);
 528
 529     /* generally we don't want words to follow numbers: */
 530     if (isident(ch)) {
 531         lexerror(lex, "unexpected trailing characters after number");
 532         return (lex->tok->ttype = TOKEN_ERROR);
 533     }
 534     lex_ungetch(lex, ch);
 535
 536     if (!lex_endtoken(lex))
 537         return (lex->tok->ttype = TOKEN_FATAL);
 538     if (lex->tok->ttype == TOKEN_FLOATCONST)
 539         lex->tok->constval.f = strtod(lex->tok->value, NULL);
 540     else
 541         lex->tok->constval.i = strtol(lex->tok->value, NULL, 0);
 542     return lex->tok->ttype;
 543 }
 544
 545 int lex_do(lex_file *lex)
 546 {
 547     int ch, nextch;
 548
 549     if (lex->tok)
 550         token_delete(lex->tok);
 551     lex->tok = token_new();
 552     if (!lex->tok)
 553         return TOKEN_FATAL;
 554
 555     ch = lex_skipwhite(lex);
 556     lex->sline = lex->line;
 557     lex->tok->ctx.line = lex->sline;
 558     lex->tok->ctx.file = lex->name;
 559
 560     if (ch == EOF)
 561         return (lex->tok->ttype = TOKEN_EOF);
 562
 563     /* modelgen / spiritgen commands */
 564     if (ch == '$') {
 565         const char *v;
 566         size_t frame;
 567
 568         ch = lex_getch(lex);
 569         if (!isident_start(ch)) {
 570             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 571             return lex_do(lex);
 572         }
 573         if (!lex_tokench(lex, ch))
 574             return (lex->tok->ttype = TOKEN_FATAL);
 575         if (!lex_finish_ident(lex))
 576             return (lex->tok->ttype = TOKEN_ERROR);
 577         if (!lex_endtoken(lex))
 578             return (lex->tok->ttype = TOKEN_FATAL);
 579         /* skip the known commands */
 580         v = lex->tok->value;
 581
 582         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 583         {
 584             /* frame/framesave command works like an enum
 585              * similar to fteqcc we handle this in the lexer.
 586              * The reason for this is that it is sensitive to newlines,
 587              * which the parser is unaware of
 588              */
 589             if (!lex_finish_frames(lex))
 590                  return (lex->tok->ttype = TOKEN_ERROR);
 591             return lex_do(lex);
 592         }
 593
 594         if (!strcmp(v, "framevalue"))
 595         {
 596             ch = lex_getch(lex);
 597             while (ch != EOF && isspace(ch) && ch != '\n')
 598                 ch = lex_getch(lex);
 599
 600             if (!isdigit(ch)) {
 601                 lexerror(lex, "$framevalue requires an integer parameter");
 602                 return lex_do(lex);
 603             }
 604
 605             token_delete(lex->tok);
 606             lex->tok = token_new();
 607             lex->tok->ttype = lex_finish_digit(lex, ch);
 608             if (!lex_endtoken(lex))
 609                 return (lex->tok->ttype = TOKEN_FATAL);
 610             if (lex->tok->ttype != TOKEN_INTCONST) {
 611                 lexerror(lex, "$framevalue requires an integer parameter");
 612                 return lex_do(lex);
 613             }
 614             lex->framevalue = lex->tok->constval.i;
 615             return lex_do(lex);
 616         }
 617
 618         if (!strcmp(v, "framerestore"))
 619         {
 620             int rc;
 621
 622             token_delete(lex->tok);
 623             lex->tok = token_new();
 624
 625             rc = lex_parse_frame(lex);
 626
 627             if (rc > 0) {
 628                 lexerror(lex, "$framerestore requires a framename parameter");
 629                 return lex_do(lex);
 630             }
 631             if (rc < 0)
 632                 return (lex->tok->ttype = TOKEN_FATAL);
 633
 634             v = lex->tok->value;
 635             for (frame = 0; frame < lex->frames_count; ++frame) {
 636                 if (!strcmp(v, lex->frames[frame].name)) {
 637                     lex->framevalue = lex->frames[frame].value;
 638                     return lex_do(lex);
 639                 }
 640             }
 641             lexerror(lex, "unknown framename `%s`", v);
 642             return lex_do(lex);
 643         }
 644
 645         if (!strcmp(v, "modelname"))
 646         {
 647             int rc;
 648
 649             token_delete(lex->tok);
 650             lex->tok = token_new();
 651
 652             rc = lex_parse_frame(lex);
 653
 654             if (rc > 0) {
 655                 lexerror(lex, "$framerestore requires a framename parameter");
 656                 return lex_do(lex);
 657             }
 658             if (rc < 0)
 659                 return (lex->tok->ttype = TOKEN_FATAL);
 660
 661             v = lex->tok->value;
 662             if (lex->modelname) {
 663                 frame_macro m;
 664                 m.value = lex->framevalue;
 665                 m.name = lex->modelname;
 666                 lex->modelname = NULL;
 667                 if (!lex_file_frames_add(lex, m)) {
 668                     lexerror(lex, "out of memory");
 669                     return (lex->tok->ttype = TOKEN_FATAL);
 670                 }
 671             }
 672             lex->modelname = lex->tok->value;
 673             lex->tok->value = NULL;
 674             for (frame = 0; frame < lex->frames_count; ++frame) {
 675                 if (!strcmp(v, lex->frames[frame].name)) {
 676                     lex->framevalue = lex->frames[frame].value;
 677                     break;
 678                 }
 679             }
 680             return lex_do(lex);
 681         }
 682
 683         if (!strcmp(v, "flush"))
 684         {
 685             size_t frame;
 686             for (frame = 0; frame < lex->frames_count; ++frame)
 687                 mem_d(lex->frames[frame].name);
 688             MEM_VECTOR_CLEAR(lex, frames);
 689             /* skip line (fteqcc does it too) */
 690             ch = lex_getch(lex);
 691             while (ch != EOF && ch != '\n')
 692                 ch = lex_getch(lex);
 693             return lex_do(lex);
 694         }
 695
 696         if (!strcmp(v, "cd") ||
 697             !strcmp(v, "origin") ||
 698             !strcmp(v, "base") ||
 699             !strcmp(v, "flags") ||
 700             !strcmp(v, "scale") ||
 701             !strcmp(v, "skin"))
 702         {
 703             /* skip line */
 704             ch = lex_getch(lex);
 705             while (ch != EOF && ch != '\n')
 706                 ch = lex_getch(lex);
 707             return lex_do(lex);
 708         }
 709
 710         for (frame = 0; frame < lex->frames_count; ++frame) {
 711             if (!strcmp(v, lex->frames[frame].name)) {
 712                 lex->tok->constval.i = lex->frames[frame].value;
 713                 return (lex->tok->ttype = TOKEN_INTCONST);
 714             }
 715         }
 716
 717         lexerror(lex, "invalid frame macro");
 718         return lex_do(lex);
 719     }
 720
 721     /* single-character tokens */
 722     switch (ch)
 723     {
 724         case '(':
 725             if (!lex_tokench(lex, ch) ||
 726                 !lex_endtoken(lex))
 727             {
 728                 return (lex->tok->ttype = TOKEN_FATAL);
 729             }
 730             if (lex->flags.noops)
 731                 return (lex->tok->ttype = ch);
 732             else
 733                 return (lex->tok->ttype = TOKEN_OPERATOR);
 734         case ')':
 735         case ';':
 736         case '{':
 737         case '}':
 738         case '[':
 739         case ']':
 740
 741         case '#':
 742             if (!lex_tokench(lex, ch) ||
 743                 !lex_endtoken(lex))
 744             {
 745                 return (lex->tok->ttype = TOKEN_FATAL);
 746             }
 747             return (lex->tok->ttype = ch);
 748         default:
 749             break;
 750     }
 751
 752     if (lex->flags.noops)
 753     {
 754         /* Detect characters early which are normally
 755          * operators OR PART of an operator.
 756          */
 757         switch (ch)
 758         {
 759             case '+':
 760             case '-':
 761             case '*':
 762             case '/':
 763             case '<':
 764             case '>':
 765             case '=':
 766             case '&':
 767             case '|':
 768             case '^':
 769             case '~':
 770             case ',':
 771             case '.':
 772             case '!':
 773                 if (!lex_tokench(lex, ch) ||
 774                     !lex_endtoken(lex))
 775                 {
 776                     return (lex->tok->ttype = TOKEN_FATAL);
 777                 }
 778                 return (lex->tok->ttype = ch);
 779             default:
 780                 break;
 781         }
 782     }
 783
 784     if (ch == ',' || ch == '.') {
 785         if (!lex_tokench(lex, ch) ||
 786             !lex_endtoken(lex))
 787         {
 788             return (lex->tok->ttype = TOKEN_FATAL);
 789         }
 790         return (lex->tok->ttype = TOKEN_OPERATOR);
 791     }
 792
 793     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 794         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 795         ch == '=' || ch == '!' || /* ==, != */
 796         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 797     {
 798         if (!lex_tokench(lex, ch))
 799             return (lex->tok->ttype = TOKEN_FATAL);
 800
 801         nextch = lex_getch(lex);
 802         if (nextch == ch || nextch == '=') {
 803             if (!lex_tokench(lex, nextch))
 804                 return (lex->tok->ttype = TOKEN_FATAL);
 805         } else if (ch == '-' && nextch == '>') {
 806             if (!lex_tokench(lex, nextch))
 807                 return (lex->tok->ttype = TOKEN_FATAL);
 808         } else
 809             lex_ungetch(lex, nextch);
 810
 811         if (!lex_endtoken(lex))
 812             return (lex->tok->ttype = TOKEN_FATAL);
 813         return (lex->tok->ttype = TOKEN_OPERATOR);
 814     }
 815
 816     /*
 817     if (ch == '^' || ch == '~' || ch == '!')
 818     {
 819         if (!lex_tokench(lex, ch) ||
 820             !lex_endtoken(lex))
 821         {
 822             return (lex->tok->ttype = TOKEN_FATAL);
 823         }
 824         return (lex->tok->ttype = TOKEN_OPERATOR);
 825     }
 826     */
 827
 828     if (ch == '*' || ch == '/') /* *=, /= */
 829     {
 830         if (!lex_tokench(lex, ch))
 831             return (lex->tok->ttype = TOKEN_FATAL);
 832
 833         nextch = lex_getch(lex);
 834         if (nextch == '=') {
 835             if (!lex_tokench(lex, nextch))
 836                 return (lex->tok->ttype = TOKEN_FATAL);
 837         } else
 838             lex_ungetch(lex, nextch);
 839
 840         if (!lex_endtoken(lex))
 841             return (lex->tok->ttype = TOKEN_FATAL);
 842         return (lex->tok->ttype = TOKEN_OPERATOR);
 843     }
 844
 845     if (isident_start(ch))
 846     {
 847         const char *v;
 848
 849         if (!lex_tokench(lex, ch))
 850             return (lex->tok->ttype = TOKEN_FATAL);
 851         if (!lex_finish_ident(lex)) {
 852             /* error? */
 853             return (lex->tok->ttype = TOKEN_ERROR);
 854         }
 855         if (!lex_endtoken(lex))
 856             return (lex->tok->ttype = TOKEN_FATAL);
 857         lex->tok->ttype = TOKEN_IDENT;
 858
 859         v = lex->tok->value;
 860         if (!strcmp(v, "void")) {
 861             lex->tok->ttype = TOKEN_TYPENAME;
 862             lex->tok->constval.t = TYPE_VOID;
 863         } else if (!strcmp(v, "int")) {
 864             lex->tok->ttype = TOKEN_TYPENAME;
 865             lex->tok->constval.t = TYPE_INTEGER;
 866         } else if (!strcmp(v, "float")) {
 867             lex->tok->ttype = TOKEN_TYPENAME;
 868             lex->tok->constval.t = TYPE_FLOAT;
 869         } else if (!strcmp(v, "string")) {
 870             lex->tok->ttype = TOKEN_TYPENAME;
 871             lex->tok->constval.t = TYPE_STRING;
 872         } else if (!strcmp(v, "entity")) {
 873             lex->tok->ttype = TOKEN_TYPENAME;
 874             lex->tok->constval.t = TYPE_ENTITY;
 875         } else if (!strcmp(v, "vector")) {
 876             lex->tok->ttype = TOKEN_TYPENAME;
 877             lex->tok->constval.t = TYPE_VECTOR;
 878         } else if (!strcmp(v, "for")  ||
 879                  !strcmp(v, "while")  ||
 880                  !strcmp(v, "do")     ||
 881                  !strcmp(v, "if")     ||
 882                  !strcmp(v, "else")   ||
 883                  !strcmp(v, "local")  ||
 884                  !strcmp(v, "return") ||
 885                  !strcmp(v, "const"))
 886             lex->tok->ttype = TOKEN_KEYWORD;
 887
 888         return lex->tok->ttype;
 889     }
 890
 891     if (ch == '"')
 892     {
 893         lex->tok->ttype = lex_finish_string(lex, '"');
 894         while (lex->tok->ttype == TOKEN_STRINGCONST)
 895         {
 896             /* Allow c style "string" "continuation" */
 897             ch = lex_skipwhite(lex);
 898             if (ch != '"') {
 899                 lex_ungetch(lex, ch);
 900                 break;
 901             }
 902
 903             lex->tok->ttype = lex_finish_string(lex, '"');
 904         }
 905         if (!lex_endtoken(lex))
 906             return (lex->tok->ttype = TOKEN_FATAL);
 907         return lex->tok->ttype;
 908     }
 909
 910     if (ch == '\'')
 911     {
 912         /* we parse character constants like string,
 913          * but return TOKEN_CHARCONST, or a vector type if it fits...
 914          * Likewise actual unescaping has to be done by the parser.
 915          * The difference is we don't allow 'char' 'continuation'.
 916          */
 917          lex->tok->ttype = lex_finish_string(lex, '\'');
 918          if (!lex_endtoken(lex))
 919               return (lex->tok->ttype = TOKEN_FATAL);
 920
 921          /* It's a vector if we can successfully scan 3 floats */
 922 #ifdef WIN32
 923          if (sscanf_s(lex->tok->value, " %f %f %f ",
 924                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 925 #else
 926          if (sscanf(lex->tok->value, " %f %f %f ",
 927                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 928 #endif
 929          {
 930               lex->tok->ttype = TOKEN_VECTORCONST;
 931          }
 932
 933          return lex->tok->ttype;
 934     }
 935
 936     if (isdigit(ch))
 937     {
 938         lex->tok->ttype = lex_finish_digit(lex, ch);
 939         if (!lex_endtoken(lex))
 940             return (lex->tok->ttype = TOKEN_FATAL);
 941         return lex->tok->ttype;
 942     }
 943
 944     lexerror(lex, "unknown token");
 945     return (lex->tok->ttype = TOKEN_ERROR);
 946 }