lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 void lexerror(lex_file *lex, const char *fmt, ...)
  13 {
  14     va_list ap;
  15
  16     if (lex)
  17         printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline);
  18     else
  19         printf("error: ");
  20
  21     va_start(ap, fmt);
  22     vprintf(fmt, ap);
  23     va_end(ap);
  24
  25     printf("\n");
  26 }
  27
  28 void lexwarn(lex_file *lex, int warn, const char *fmt, ...)
  29 {
  30     va_list ap;
  31
  32     if (!OPTS_WARN(warn))
  33         return;
  34
  35     if (lex)
  36         printf("warning %s:%lu: ", lex->name, (unsigned long)lex->sline);
  37     else
  38         printf("warning: ");
  39
  40     va_start(ap, fmt);
  41     vprintf(fmt, ap);
  42     va_end(ap);
  43
  44     printf("\n");
  45 }
  46
  47 token* token_new()
  48 {
  49     token *tok = (token*)mem_a(sizeof(token));
  50     if (!tok)
  51         return NULL;
  52     memset(tok, 0, sizeof(*tok));
  53     return tok;
  54 }
  55
  56 void token_delete(token *self)
  57 {
  58     if (self->next && self->next->prev == self)
  59         self->next->prev = self->prev;
  60     if (self->prev && self->prev->next == self)
  61         self->prev->next = self->next;
  62     MEM_VECTOR_CLEAR(self, value);
  63     mem_d(self);
  64 }
  65
  66 token* token_copy(const token *cp)
  67 {
  68     token* self = token_new();
  69     if (!self)
  70         return NULL;
  71     /* copy the value */
  72     self->value_alloc = cp->value_count + 1;
  73     self->value_count = cp->value_count;
  74     self->value = (char*)mem_a(self->value_alloc);
  75     if (!self->value) {
  76         mem_d(self);
  77         return NULL;
  78     }
  79     memcpy(self->value, cp->value, cp->value_count);
  80     self->value[self->value_alloc-1] = 0;
  81
  82     /* rest */
  83     self->ctx = cp->ctx;
  84     self->ttype = cp->ttype;
  85     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  86     return self;
  87 }
  88
  89 void token_delete_all(token *t)
  90 {
  91     token *n;
  92
  93     do {
  94         n = t->next;
  95         token_delete(t);
  96         t = n;
  97     } while(t);
  98 }
  99
 100 token* token_copy_all(const token *cp)
 101 {
 102     token *cur;
 103     token *out;
 104
 105     out = cur = token_copy(cp);
 106     if (!out)
 107         return NULL;
 108
 109     while (cp->next) {
 110         cp = cp->next;
 111         cur->next = token_copy(cp);
 112         if (!cur->next) {
 113             token_delete_all(out);
 114             return NULL;
 115         }
 116         cur->next->prev = cur;
 117         cur = cur->next;
 118     }
 119
 120     return out;
 121 }
 122
 123 lex_file* lex_open(const char *file)
 124 {
 125     lex_file *lex;
 126     FILE *in = util_fopen(file, "rb");
 127
 128     if (!in) {
 129         lexerror(NULL, "open failed: '%s'\n", file);
 130         return NULL;
 131     }
 132
 133     lex = (lex_file*)mem_a(sizeof(*lex));
 134     if (!lex) {
 135         fclose(in);
 136         lexerror(NULL, "out of memory\n");
 137         return NULL;
 138     }
 139
 140     memset(lex, 0, sizeof(*lex));
 141
 142     lex->file = in;
 143     lex->name = util_strdup(file);
 144     lex->line = 1; /* we start counting at 1 */
 145
 146     lex->peekpos = 0;
 147
 148     return lex;
 149 }
 150
 151 void lex_close(lex_file *lex)
 152 {
 153     size_t i;
 154     for (i = 0; i < lex->frames_count; ++i)
 155         mem_d(lex->frames[i].name);
 156     MEM_VECTOR_CLEAR(lex, frames);
 157
 158     if (lex->modelname)
 159         mem_d(lex->modelname);
 160
 161     if (lex->file)
 162         fclose(lex->file);
 163     if (lex->tok)
 164         token_delete(lex->tok);
 165     mem_d(lex->name);
 166     mem_d(lex);
 167 }
 168
 169 /* Get or put-back data
 170  * The following to functions do NOT understand what kind of data they
 171  * are working on.
 172  * The are merely wrapping get/put in order to count line numbers.
 173  */
 174 static int lex_getch(lex_file *lex)
 175 {
 176     int ch;
 177
 178     if (lex->peekpos) {
 179         lex->peekpos--;
 180         if (lex->peek[lex->peekpos] == '\n')
 181             lex->line++;
 182         return lex->peek[lex->peekpos];
 183     }
 184
 185     ch = fgetc(lex->file);
 186     if (ch == '\n')
 187         lex->line++;
 188     return ch;
 189 }
 190
 191 static void lex_ungetch(lex_file *lex, int ch)
 192 {
 193     lex->peek[lex->peekpos++] = ch;
 194     if (ch == '\n')
 195         lex->line--;
 196 }
 197
 198 /* classify characters
 199  * some additions to the is*() functions of ctype.h
 200  */
 201
 202 /* Idents are alphanumberic, but they start with alpha or _ */
 203 static bool isident_start(int ch)
 204 {
 205     return isalpha(ch) || ch == '_';
 206 }
 207
 208 static bool isident(int ch)
 209 {
 210     return isident_start(ch) || isdigit(ch);
 211 }
 212
 213 /* isxdigit_only is used when we already know it's not a digit
 214  * and want to see if it's a hex digit anyway.
 215  */
 216 static bool isxdigit_only(int ch)
 217 {
 218     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 219 }
 220
 221 /* Skip whitespace and comments and return the first
 222  * non-white character.
 223  * As this makes use of the above getch() ungetch() functions,
 224  * we don't need to care at all about line numbering anymore.
 225  *
 226  * In theory, this function should only be used at the beginning
 227  * of lexing, or when we *know* the next character is part of the token.
 228  * Otherwise, if the parser throws an error, the linenumber may not be
 229  * the line of the error, but the line of the next token AFTER the error.
 230  *
 231  * This is currently only problematic when using c-like string-continuation,
 232  * since comments and whitespaces are allowed between 2 such strings.
 233  * Example:
 234 printf(   "line one\n"
 235 // A comment
 236           "A continuation of the previous string"
 237 // This line is skipped
 238       , foo);
 239
 240  * In this case, if the parse decides it didn't actually want a string,
 241  * and uses lex->line to print an error, it will show the ', foo);' line's
 242  * linenumber.
 243  *
 244  * On the other hand, the parser is supposed to remember the line of the next
 245  * token's beginning. In this case we would want skipwhite() to be called
 246  * AFTER reading a token, so that the parser, before reading the NEXT token,
 247  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 248  *
 249  * THIS SOLUTION
 250  *    here is to store the line of the first character after skipping
 251  *    the initial whitespace in lex->sline, this happens in lex_do.
 252  */
 253 static int lex_skipwhite(lex_file *lex)
 254 {
 255     int ch = 0;
 256
 257     do
 258     {
 259         ch = lex_getch(lex);
 260         while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
 261
 262         if (ch == '/') {
 263             ch = lex_getch(lex);
 264             if (ch == '/')
 265             {
 266                 /* one line comment */
 267                 ch = lex_getch(lex);
 268
 269                 /* check for special: '/', '/', '*', '/' */
 270                 if (ch == '*') {
 271                     ch = lex_getch(lex);
 272                     if (ch == '/') {
 273                         ch = ' ';
 274                         continue;
 275                     }
 276                 }
 277
 278                 while (ch != EOF && ch != '\n') {
 279                     ch = lex_getch(lex);
 280                 }
 281                 continue;
 282             }
 283             if (ch == '*')
 284             {
 285                 /* multiline comment */
 286                 while (ch != EOF)
 287                 {
 288                     ch = lex_getch(lex);
 289                     if (ch == '*') {
 290                         ch = lex_getch(lex);
 291                         if (ch == '/') {
 292                             ch = lex_getch(lex);
 293                             break;
 294                         }
 295                     }
 296                 }
 297                 if (ch == '/') /* allow *//* direct following comment */
 298                 {
 299                     lex_ungetch(lex, ch);
 300                     ch = ' '; /* cause TRUE in the isspace check */
 301                 }
 302                 continue;
 303             }
 304             /* Otherwise roll back to the slash and break out of the loop */
 305             lex_ungetch(lex, ch);
 306             ch = '/';
 307             break;
 308         }
 309     } while (ch != EOF && isspace(ch));
 310
 311     return ch;
 312 }
 313
 314 /* Append a character to the token buffer */
 315 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 316 {
 317     if (!token_value_add(lex->tok, ch)) {
 318         lexerror(lex, "out of memory");
 319         return false;
 320     }
 321     return true;
 322 }
 323
 324 /* Append a trailing null-byte */
 325 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 326 {
 327     if (!token_value_add(lex->tok, 0)) {
 328         lexerror(lex, "out of memory");
 329         return false;
 330     }
 331     lex->tok->value_count--;
 332     return true;
 333 }
 334
 335 /* Get a token */
 336 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 337 {
 338     int ch;
 339
 340     ch = lex_getch(lex);
 341     while (ch != EOF && isident(ch))
 342     {
 343         if (!lex_tokench(lex, ch))
 344             return (lex->tok->ttype = TOKEN_FATAL);
 345         ch = lex_getch(lex);
 346     }
 347
 348     /* last ch was not an ident ch: */
 349     lex_ungetch(lex, ch);
 350
 351     return true;
 352 }
 353
 354 /* read one ident for the frame list */
 355 static int lex_parse_frame(lex_file *lex)
 356 {
 357     int ch;
 358
 359     if (lex->tok)
 360         token_delete(lex->tok);
 361     lex->tok = token_new();
 362
 363     ch = lex_getch(lex);
 364     while (ch != EOF && ch != '\n' && isspace(ch))
 365         ch = lex_getch(lex);
 366
 367     if (ch == '\n')
 368         return 1;
 369
 370     if (!isident_start(ch)) {
 371         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 372         return -1;
 373     }
 374
 375     if (!lex_tokench(lex, ch))
 376         return -1;
 377     if (!lex_finish_ident(lex))
 378         return -1;
 379     if (!lex_endtoken(lex))
 380         return -1;
 381     return 0;
 382 }
 383
 384 /* read a list of $frames */
 385 static bool lex_finish_frames(lex_file *lex)
 386 {
 387     do {
 388         int rc;
 389         frame_macro m;
 390
 391         rc = lex_parse_frame(lex);
 392         if (rc > 0) /* end of line */
 393             return true;
 394         if (rc < 0) /* error */
 395             return false;
 396
 397         m.value = lex->framevalue++;
 398         m.name = lex->tok->value;
 399         lex->tok->value = NULL;
 400         if (!lex_file_frames_add(lex, m)) {
 401             lexerror(lex, "out of memory");
 402             return false;
 403         }
 404     } while (true);
 405 }
 406
 407 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 408 {
 409     int ch = 0;
 410
 411     while (ch != EOF)
 412     {
 413         ch = lex_getch(lex);
 414         if (ch == quote)
 415             return TOKEN_STRINGCONST;
 416
 417         if (ch == '\\') {
 418             ch = lex_getch(lex);
 419             if (ch == EOF) {
 420                 lexerror(lex, "unexpected end of file");
 421                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 422                 return (lex->tok->ttype = TOKEN_ERROR);
 423             }
 424
 425             switch (ch) {
 426             case '\\': break;
 427             case 'a':  ch = '\a'; break;
 428             case 'b':  ch = '\b'; break;
 429             case 'r':  ch = '\r'; break;
 430             case 'n':  ch = '\n'; break;
 431             case 't':  ch = '\t'; break;
 432             case 'f':  ch = '\f'; break;
 433             case 'v':  ch = '\v'; break;
 434             default:
 435                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 436                 /* so we just add the character plus backslash no matter what it actually is */
 437                 if (!lex_tokench(lex, '\\'))
 438                     return (lex->tok->ttype = TOKEN_FATAL);
 439             }
 440             /* add the character finally */
 441             if (!lex_tokench(lex, ch))
 442                 return (lex->tok->ttype = TOKEN_FATAL);
 443         }
 444         else if (!lex_tokench(lex, ch))
 445             return (lex->tok->ttype = TOKEN_FATAL);
 446     }
 447     lexerror(lex, "unexpected end of file within string constant");
 448     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 449     return (lex->tok->ttype = TOKEN_ERROR);
 450 }
 451
 452 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 453 {
 454     bool ishex = false;
 455
 456     int  ch = lastch;
 457
 458     /* parse a number... */
 459     lex->tok->ttype = TOKEN_INTCONST;
 460
 461     if (!lex_tokench(lex, ch))
 462         return (lex->tok->ttype = TOKEN_FATAL);
 463
 464     ch = lex_getch(lex);
 465     if (ch != '.' && !isdigit(ch))
 466     {
 467         if (lastch != '0' || ch != 'x')
 468         {
 469             /* end of the number or EOF */
 470             lex_ungetch(lex, ch);
 471             if (!lex_endtoken(lex))
 472                 return (lex->tok->ttype = TOKEN_FATAL);
 473
 474             lex->tok->constval.i = lastch - '0';
 475             return lex->tok->ttype;
 476         }
 477
 478         ishex = true;
 479     }
 480
 481     /* EOF would have been caught above */
 482
 483     if (ch != '.')
 484     {
 485         if (!lex_tokench(lex, ch))
 486             return (lex->tok->ttype = TOKEN_FATAL);
 487         ch = lex_getch(lex);
 488         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 489         {
 490             if (!lex_tokench(lex, ch))
 491                 return (lex->tok->ttype = TOKEN_FATAL);
 492             ch = lex_getch(lex);
 493         }
 494     }
 495     /* NOT else, '.' can come from above as well */
 496     if (ch == '.' && !ishex)
 497     {
 498         /* Allow floating comma in non-hex mode */
 499         lex->tok->ttype = TOKEN_FLOATCONST;
 500         if (!lex_tokench(lex, ch))
 501             return (lex->tok->ttype = TOKEN_FATAL);
 502
 503         /* continue digits-only */
 504         ch = lex_getch(lex);
 505         while (isdigit(ch))
 506         {
 507             if (!lex_tokench(lex, ch))
 508                 return (lex->tok->ttype = TOKEN_FATAL);
 509             ch = lex_getch(lex);
 510         }
 511     }
 512     /* put back the last character */
 513     /* but do not put back the trailing 'f' or a float */
 514     if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f')
 515         ch = lex_getch(lex);
 516
 517     /* generally we don't want words to follow numbers: */
 518     if (isident(ch)) {
 519         lexerror(lex, "unexpected trailing characters after number");
 520         return (lex->tok->ttype = TOKEN_ERROR);
 521     }
 522     lex_ungetch(lex, ch);
 523
 524     if (!lex_endtoken(lex))
 525         return (lex->tok->ttype = TOKEN_FATAL);
 526     if (lex->tok->ttype == TOKEN_FLOATCONST)
 527         lex->tok->constval.f = strtod(lex->tok->value, NULL);
 528     else
 529         lex->tok->constval.i = strtol(lex->tok->value, NULL, 0);
 530     return lex->tok->ttype;
 531 }
 532
 533 int lex_do(lex_file *lex)
 534 {
 535     int ch, nextch;
 536
 537     if (lex->tok)
 538         token_delete(lex->tok);
 539     lex->tok = token_new();
 540     if (!lex->tok)
 541         return TOKEN_FATAL;
 542
 543     ch = lex_skipwhite(lex);
 544     lex->sline = lex->line;
 545     lex->tok->ctx.line = lex->sline;
 546     lex->tok->ctx.file = lex->name;
 547
 548     if (ch == EOF)
 549         return (lex->tok->ttype = TOKEN_EOF);
 550
 551     /* modelgen / spiritgen commands */
 552     if (ch == '$') {
 553         const char *v;
 554         size_t frame;
 555
 556         ch = lex_getch(lex);
 557         if (!isident_start(ch)) {
 558             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 559             return lex_do(lex);
 560         }
 561         if (!lex_tokench(lex, ch))
 562             return (lex->tok->ttype = TOKEN_FATAL);
 563         if (!lex_finish_ident(lex))
 564             return (lex->tok->ttype = TOKEN_ERROR);
 565         if (!lex_endtoken(lex))
 566             return (lex->tok->ttype = TOKEN_FATAL);
 567         /* skip the known commands */
 568         v = lex->tok->value;
 569
 570         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 571         {
 572             /* frame/framesave command works like an enum
 573              * similar to fteqcc we handle this in the lexer.
 574              * The reason for this is that it is sensitive to newlines,
 575              * which the parser is unaware of
 576              */
 577             if (!lex_finish_frames(lex))
 578                  return (lex->tok->ttype = TOKEN_ERROR);
 579             return lex_do(lex);
 580         }
 581
 582         if (!strcmp(v, "framevalue"))
 583         {
 584             ch = lex_getch(lex);
 585             while (ch != EOF && isspace(ch) && ch != '\n')
 586                 ch = lex_getch(lex);
 587
 588             if (!isdigit(ch)) {
 589                 lexerror(lex, "$framevalue requires an integer parameter");
 590                 return lex_do(lex);
 591             }
 592
 593             token_delete(lex->tok);
 594             lex->tok = token_new();
 595             lex->tok->ttype = lex_finish_digit(lex, ch);
 596             if (!lex_endtoken(lex))
 597                 return (lex->tok->ttype = TOKEN_FATAL);
 598             if (lex->tok->ttype != TOKEN_INTCONST) {
 599                 lexerror(lex, "$framevalue requires an integer parameter");
 600                 return lex_do(lex);
 601             }
 602             lex->framevalue = lex->tok->constval.i;
 603             return lex_do(lex);
 604         }
 605
 606         if (!strcmp(v, "framerestore"))
 607         {
 608             int rc;
 609
 610             token_delete(lex->tok);
 611             lex->tok = token_new();
 612
 613             rc = lex_parse_frame(lex);
 614
 615             if (rc > 0) {
 616                 lexerror(lex, "$framerestore requires a framename parameter");
 617                 return lex_do(lex);
 618             }
 619             if (rc < 0)
 620                 return (lex->tok->ttype = TOKEN_FATAL);
 621
 622             v = lex->tok->value;
 623             for (frame = 0; frame < lex->frames_count; ++frame) {
 624                 if (!strcmp(v, lex->frames[frame].name)) {
 625                     lex->framevalue = lex->frames[frame].value;
 626                     return lex_do(lex);
 627                 }
 628             }
 629             lexerror(lex, "unknown framename `%s`", v);
 630             return lex_do(lex);
 631         }
 632
 633         if (!strcmp(v, "modelname"))
 634         {
 635             int rc;
 636
 637             token_delete(lex->tok);
 638             lex->tok = token_new();
 639
 640             rc = lex_parse_frame(lex);
 641
 642             if (rc > 0) {
 643                 lexerror(lex, "$framerestore requires a framename parameter");
 644                 return lex_do(lex);
 645             }
 646             if (rc < 0)
 647                 return (lex->tok->ttype = TOKEN_FATAL);
 648
 649             v = lex->tok->value;
 650             if (lex->modelname) {
 651                 frame_macro m;
 652                 m.value = lex->framevalue;
 653                 m.name = lex->modelname;
 654                 lex->modelname = NULL;
 655                 if (!lex_file_frames_add(lex, m)) {
 656                     lexerror(lex, "out of memory");
 657                     return (lex->tok->ttype = TOKEN_FATAL);
 658                 }
 659             }
 660             lex->modelname = lex->tok->value;
 661             lex->tok->value = NULL;
 662             for (frame = 0; frame < lex->frames_count; ++frame) {
 663                 if (!strcmp(v, lex->frames[frame].name)) {
 664                     lex->framevalue = lex->frames[frame].value;
 665                     break;
 666                 }
 667             }
 668             return lex_do(lex);
 669         }
 670
 671         if (!strcmp(v, "flush"))
 672         {
 673             size_t frame;
 674             for (frame = 0; frame < lex->frames_count; ++frame)
 675                 mem_d(lex->frames[frame].name);
 676             MEM_VECTOR_CLEAR(lex, frames);
 677             /* skip line (fteqcc does it too) */
 678             ch = lex_getch(lex);
 679             while (ch != EOF && ch != '\n')
 680                 ch = lex_getch(lex);
 681             return lex_do(lex);
 682         }
 683
 684         if (!strcmp(v, "cd") ||
 685             !strcmp(v, "origin") ||
 686             !strcmp(v, "base") ||
 687             !strcmp(v, "flags") ||
 688             !strcmp(v, "scale") ||
 689             !strcmp(v, "skin"))
 690         {
 691             /* skip line */
 692             ch = lex_getch(lex);
 693             while (ch != EOF && ch != '\n')
 694                 ch = lex_getch(lex);
 695             return lex_do(lex);
 696         }
 697
 698         for (frame = 0; frame < lex->frames_count; ++frame) {
 699             if (!strcmp(v, lex->frames[frame].name)) {
 700                 lex->tok->constval.i = lex->frames[frame].value;
 701                 return (lex->tok->ttype = TOKEN_INTCONST);
 702             }
 703         }
 704
 705         lexerror(lex, "invalid frame macro");
 706         return lex_do(lex);
 707     }
 708
 709     /* single-character tokens */
 710     switch (ch)
 711     {
 712         case '(':
 713             if (!lex_tokench(lex, ch) ||
 714                 !lex_endtoken(lex))
 715             {
 716                 return (lex->tok->ttype = TOKEN_FATAL);
 717             }
 718             if (lex->flags.noops)
 719                 return (lex->tok->ttype = ch);
 720             else
 721                 return (lex->tok->ttype = TOKEN_OPERATOR);
 722         case ')':
 723         case ';':
 724         case '{':
 725         case '}':
 726         case '[':
 727         case ']':
 728
 729         case '#':
 730             if (!lex_tokench(lex, ch) ||
 731                 !lex_endtoken(lex))
 732             {
 733                 return (lex->tok->ttype = TOKEN_FATAL);
 734             }
 735             return (lex->tok->ttype = ch);
 736         default:
 737             break;
 738     }
 739
 740     if (lex->flags.noops)
 741     {
 742         /* Detect characters early which are normally
 743          * operators OR PART of an operator.
 744          */
 745         switch (ch)
 746         {
 747             case '+':
 748             case '-':
 749             case '*':
 750             case '/':
 751             case '<':
 752             case '>':
 753             case '=':
 754             case '&':
 755             case '|':
 756             case '^':
 757             case '~':
 758             case ',':
 759             case '.':
 760             case '!':
 761                 if (!lex_tokench(lex, ch) ||
 762                     !lex_endtoken(lex))
 763                 {
 764                     return (lex->tok->ttype = TOKEN_FATAL);
 765                 }
 766                 return (lex->tok->ttype = ch);
 767             default:
 768                 break;
 769         }
 770     }
 771
 772     if (ch == ',' || ch == '.') {
 773         if (!lex_tokench(lex, ch) ||
 774             !lex_endtoken(lex))
 775         {
 776             return (lex->tok->ttype = TOKEN_FATAL);
 777         }
 778         return (lex->tok->ttype = TOKEN_OPERATOR);
 779     }
 780
 781     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 782         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 783         ch == '=' || ch == '!' || /* ==, != */
 784         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 785     {
 786         if (!lex_tokench(lex, ch))
 787             return (lex->tok->ttype = TOKEN_FATAL);
 788
 789         nextch = lex_getch(lex);
 790         if (nextch == ch || nextch == '=') {
 791             if (!lex_tokench(lex, nextch))
 792                 return (lex->tok->ttype = TOKEN_FATAL);
 793         } else if (ch == '-' && nextch == '>') {
 794             if (!lex_tokench(lex, nextch))
 795                 return (lex->tok->ttype = TOKEN_FATAL);
 796         } else
 797             lex_ungetch(lex, nextch);
 798
 799         if (!lex_endtoken(lex))
 800             return (lex->tok->ttype = TOKEN_FATAL);
 801         return (lex->tok->ttype = TOKEN_OPERATOR);
 802     }
 803
 804     /*
 805     if (ch == '^' || ch == '~' || ch == '!')
 806     {
 807         if (!lex_tokench(lex, ch) ||
 808             !lex_endtoken(lex))
 809         {
 810             return (lex->tok->ttype = TOKEN_FATAL);
 811         }
 812         return (lex->tok->ttype = TOKEN_OPERATOR);
 813     }
 814     */
 815
 816     if (ch == '*' || ch == '/') /* *=, /= */
 817     {
 818         if (!lex_tokench(lex, ch))
 819             return (lex->tok->ttype = TOKEN_FATAL);
 820
 821         nextch = lex_getch(lex);
 822         if (nextch == '=') {
 823             if (!lex_tokench(lex, nextch))
 824                 return (lex->tok->ttype = TOKEN_FATAL);
 825         } else
 826             lex_ungetch(lex, nextch);
 827
 828         if (!lex_endtoken(lex))
 829             return (lex->tok->ttype = TOKEN_FATAL);
 830         return (lex->tok->ttype = TOKEN_OPERATOR);
 831     }
 832
 833     if (isident_start(ch))
 834     {
 835         const char *v;
 836
 837         if (!lex_tokench(lex, ch))
 838             return (lex->tok->ttype = TOKEN_FATAL);
 839         if (!lex_finish_ident(lex)) {
 840             /* error? */
 841             return (lex->tok->ttype = TOKEN_ERROR);
 842         }
 843         if (!lex_endtoken(lex))
 844             return (lex->tok->ttype = TOKEN_FATAL);
 845         lex->tok->ttype = TOKEN_IDENT;
 846
 847         v = lex->tok->value;
 848         if (!strcmp(v, "void")) {
 849             lex->tok->ttype = TOKEN_TYPENAME;
 850             lex->tok->constval.t = TYPE_VOID;
 851         } else if (!strcmp(v, "int")) {
 852             lex->tok->ttype = TOKEN_TYPENAME;
 853             lex->tok->constval.t = TYPE_INTEGER;
 854         } else if (!strcmp(v, "float")) {
 855             lex->tok->ttype = TOKEN_TYPENAME;
 856             lex->tok->constval.t = TYPE_FLOAT;
 857         } else if (!strcmp(v, "string")) {
 858             lex->tok->ttype = TOKEN_TYPENAME;
 859             lex->tok->constval.t = TYPE_STRING;
 860         } else if (!strcmp(v, "entity")) {
 861             lex->tok->ttype = TOKEN_TYPENAME;
 862             lex->tok->constval.t = TYPE_ENTITY;
 863         } else if (!strcmp(v, "vector")) {
 864             lex->tok->ttype = TOKEN_TYPENAME;
 865             lex->tok->constval.t = TYPE_VECTOR;
 866         } else if (!strcmp(v, "for")  ||
 867                  !strcmp(v, "while")  ||
 868                  !strcmp(v, "do")     ||
 869                  !strcmp(v, "if")     ||
 870                  !strcmp(v, "else")   ||
 871                  !strcmp(v, "local")  ||
 872                  !strcmp(v, "return") ||
 873                  !strcmp(v, "const"))
 874             lex->tok->ttype = TOKEN_KEYWORD;
 875
 876         return lex->tok->ttype;
 877     }
 878
 879     if (ch == '"')
 880     {
 881         lex->tok->ttype = lex_finish_string(lex, '"');
 882         while (lex->tok->ttype == TOKEN_STRINGCONST)
 883         {
 884             /* Allow c style "string" "continuation" */
 885             ch = lex_skipwhite(lex);
 886             if (ch != '"') {
 887                 lex_ungetch(lex, ch);
 888                 break;
 889             }
 890
 891             lex->tok->ttype = lex_finish_string(lex, '"');
 892         }
 893         if (!lex_endtoken(lex))
 894             return (lex->tok->ttype = TOKEN_FATAL);
 895         return lex->tok->ttype;
 896     }
 897
 898     if (ch == '\'')
 899     {
 900         /* we parse character constants like string,
 901          * but return TOKEN_CHARCONST, or a vector type if it fits...
 902          * Likewise actual unescaping has to be done by the parser.
 903          * The difference is we don't allow 'char' 'continuation'.
 904          */
 905          lex->tok->ttype = lex_finish_string(lex, '\'');
 906          if (!lex_endtoken(lex))
 907               return (lex->tok->ttype = TOKEN_FATAL);
 908
 909          /* It's a vector if we can successfully scan 3 floats */
 910 #ifdef WIN32
 911          if (sscanf_s(lex->tok->value, " %f %f %f ",
 912                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 913 #else
 914          if (sscanf(lex->tok->value, " %f %f %f ",
 915                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 916 #endif
 917          {
 918               lex->tok->ttype = TOKEN_VECTORCONST;
 919          }
 920
 921          return lex->tok->ttype;
 922     }
 923
 924     if (isdigit(ch))
 925     {
 926         lex->tok->ttype = lex_finish_digit(lex, ch);
 927         if (!lex_endtoken(lex))
 928             return (lex->tok->ttype = TOKEN_FATAL);
 929         return lex->tok->ttype;
 930     }
 931
 932     lexerror(lex, "unknown token");
 933     return (lex->tok->ttype = TOKEN_ERROR);
 934 }