lexer.c

   1 #include <stdio.h>
   2 #include <stdlib.h>
   3 #include <string.h>
   4 #include <stdarg.h>
   5
   6 #include "gmqcc.h"
   7 #include "lexer.h"
   8
   9 MEM_VEC_FUNCTIONS(token, char, value)
  10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  11
  12 VECTOR_MAKE(char*, lex_filenames);
  13
  14 void lexerror(lex_file *lex, const char *fmt, ...)
  15 {
  16     va_list ap;
  17
  18     if (lex)
  19         printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline);
  20     else
  21         printf("error: ");
  22
  23     va_start(ap, fmt);
  24     vprintf(fmt, ap);
  25     va_end(ap);
  26
  27     printf("\n");
  28 }
  29
  30 bool lexwarn(lex_file *lex, int warn, const char *fmt, ...)
  31 {
  32     va_list ap;
  33
  34     if (!OPTS_WARN(warn))
  35         return false;
  36
  37     if (lex)
  38         printf("warning %s:%lu: ", lex->name, (unsigned long)lex->sline);
  39     else
  40         printf("warning: ");
  41
  42     va_start(ap, fmt);
  43     vprintf(fmt, ap);
  44     va_end(ap);
  45
  46     printf("\n");
  47
  48     return opts_werror;
  49 }
  50
  51 token* token_new()
  52 {
  53     token *tok = (token*)mem_a(sizeof(token));
  54     if (!tok)
  55         return NULL;
  56     memset(tok, 0, sizeof(*tok));
  57     return tok;
  58 }
  59
  60 void token_delete(token *self)
  61 {
  62     if (self->next && self->next->prev == self)
  63         self->next->prev = self->prev;
  64     if (self->prev && self->prev->next == self)
  65         self->prev->next = self->next;
  66     MEM_VECTOR_CLEAR(self, value);
  67     mem_d(self);
  68 }
  69
  70 token* token_copy(const token *cp)
  71 {
  72     token* self = token_new();
  73     if (!self)
  74         return NULL;
  75     /* copy the value */
  76     self->value_alloc = cp->value_count + 1;
  77     self->value_count = cp->value_count;
  78     self->value = (char*)mem_a(self->value_alloc);
  79     if (!self->value) {
  80         mem_d(self);
  81         return NULL;
  82     }
  83     memcpy(self->value, cp->value, cp->value_count);
  84     self->value[self->value_alloc-1] = 0;
  85
  86     /* rest */
  87     self->ctx = cp->ctx;
  88     self->ttype = cp->ttype;
  89     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
  90     return self;
  91 }
  92
  93 void token_delete_all(token *t)
  94 {
  95     token *n;
  96
  97     do {
  98         n = t->next;
  99         token_delete(t);
 100         t = n;
 101     } while(t);
 102 }
 103
 104 token* token_copy_all(const token *cp)
 105 {
 106     token *cur;
 107     token *out;
 108
 109     out = cur = token_copy(cp);
 110     if (!out)
 111         return NULL;
 112
 113     while (cp->next) {
 114         cp = cp->next;
 115         cur->next = token_copy(cp);
 116         if (!cur->next) {
 117             token_delete_all(out);
 118             return NULL;
 119         }
 120         cur->next->prev = cur;
 121         cur = cur->next;
 122     }
 123
 124     return out;
 125 }
 126
 127 lex_file* lex_open(const char *file)
 128 {
 129     lex_file *lex;
 130     FILE *in = util_fopen(file, "rb");
 131
 132     if (!in) {
 133         lexerror(NULL, "open failed: '%s'\n", file);
 134         return NULL;
 135     }
 136
 137     lex = (lex_file*)mem_a(sizeof(*lex));
 138     if (!lex) {
 139         fclose(in);
 140         lexerror(NULL, "out of memory\n");
 141         return NULL;
 142     }
 143
 144     memset(lex, 0, sizeof(*lex));
 145
 146     lex->file = in;
 147     lex->name = util_strdup(file);
 148     lex->line = 1; /* we start counting at 1 */
 149
 150     lex->peekpos = 0;
 151     lex->eof = false;
 152
 153     lex_filenames_add(lex->name);
 154
 155     return lex;
 156 }
 157
 158 void lex_cleanup(void)
 159 {
 160     size_t i;
 161     for (i = 0; i < lex_filenames_elements; ++i)
 162         mem_d(lex_filenames_data[i]);
 163     mem_d(lex_filenames_data);
 164 }
 165
 166 void lex_close(lex_file *lex)
 167 {
 168     size_t i;
 169     for (i = 0; i < lex->frames_count; ++i)
 170         mem_d(lex->frames[i].name);
 171     MEM_VECTOR_CLEAR(lex, frames);
 172
 173     if (lex->modelname)
 174         mem_d(lex->modelname);
 175
 176     if (lex->file)
 177         fclose(lex->file);
 178     if (lex->tok)
 179         token_delete(lex->tok);
 180     /* mem_d(lex->name); collected in lex_filenames */
 181     mem_d(lex);
 182 }
 183
 184 /* Get or put-back data
 185  * The following to functions do NOT understand what kind of data they
 186  * are working on.
 187  * The are merely wrapping get/put in order to count line numbers.
 188  */
 189 static int lex_getch(lex_file *lex)
 190 {
 191     int ch;
 192
 193     if (lex->peekpos) {
 194         lex->peekpos--;
 195         if (lex->peek[lex->peekpos] == '\n')
 196             lex->line++;
 197         return lex->peek[lex->peekpos];
 198     }
 199
 200     ch = fgetc(lex->file);
 201     if (ch == '\n')
 202         lex->line++;
 203     return ch;
 204 }
 205
 206 static void lex_ungetch(lex_file *lex, int ch)
 207 {
 208     lex->peek[lex->peekpos++] = ch;
 209     if (ch == '\n')
 210         lex->line--;
 211 }
 212
 213 /* classify characters
 214  * some additions to the is*() functions of ctype.h
 215  */
 216
 217 /* Idents are alphanumberic, but they start with alpha or _ */
 218 static bool isident_start(int ch)
 219 {
 220     return isalpha(ch) || ch == '_';
 221 }
 222
 223 static bool isident(int ch)
 224 {
 225     return isident_start(ch) || isdigit(ch);
 226 }
 227
 228 /* isxdigit_only is used when we already know it's not a digit
 229  * and want to see if it's a hex digit anyway.
 230  */
 231 static bool isxdigit_only(int ch)
 232 {
 233     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 234 }
 235
 236 /* Skip whitespace and comments and return the first
 237  * non-white character.
 238  * As this makes use of the above getch() ungetch() functions,
 239  * we don't need to care at all about line numbering anymore.
 240  *
 241  * In theory, this function should only be used at the beginning
 242  * of lexing, or when we *know* the next character is part of the token.
 243  * Otherwise, if the parser throws an error, the linenumber may not be
 244  * the line of the error, but the line of the next token AFTER the error.
 245  *
 246  * This is currently only problematic when using c-like string-continuation,
 247  * since comments and whitespaces are allowed between 2 such strings.
 248  * Example:
 249 printf(   "line one\n"
 250 // A comment
 251           "A continuation of the previous string"
 252 // This line is skipped
 253       , foo);
 254
 255  * In this case, if the parse decides it didn't actually want a string,
 256  * and uses lex->line to print an error, it will show the ', foo);' line's
 257  * linenumber.
 258  *
 259  * On the other hand, the parser is supposed to remember the line of the next
 260  * token's beginning. In this case we would want skipwhite() to be called
 261  * AFTER reading a token, so that the parser, before reading the NEXT token,
 262  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 263  *
 264  * THIS SOLUTION
 265  *    here is to store the line of the first character after skipping
 266  *    the initial whitespace in lex->sline, this happens in lex_do.
 267  */
 268 static int lex_skipwhite(lex_file *lex)
 269 {
 270     int ch = 0;
 271
 272     do
 273     {
 274         ch = lex_getch(lex);
 275         while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
 276
 277         if (ch == '/') {
 278             ch = lex_getch(lex);
 279             if (ch == '/')
 280             {
 281                 /* one line comment */
 282                 ch = lex_getch(lex);
 283
 284                 /* check for special: '/', '/', '*', '/' */
 285                 if (ch == '*') {
 286                     ch = lex_getch(lex);
 287                     if (ch == '/') {
 288                         ch = ' ';
 289                         continue;
 290                     }
 291                 }
 292
 293                 while (ch != EOF && ch != '\n') {
 294                     ch = lex_getch(lex);
 295                 }
 296                 continue;
 297             }
 298             if (ch == '*')
 299             {
 300                 /* multiline comment */
 301                 while (ch != EOF)
 302                 {
 303                     ch = lex_getch(lex);
 304                     if (ch == '*') {
 305                         ch = lex_getch(lex);
 306                         if (ch == '/') {
 307                             ch = lex_getch(lex);
 308                             break;
 309                         }
 310                     }
 311                 }
 312                 if (ch == '/') /* allow *//* direct following comment */
 313                 {
 314                     lex_ungetch(lex, ch);
 315                     ch = ' '; /* cause TRUE in the isspace check */
 316                 }
 317                 continue;
 318             }
 319             /* Otherwise roll back to the slash and break out of the loop */
 320             lex_ungetch(lex, ch);
 321             ch = '/';
 322             break;
 323         }
 324     } while (ch != EOF && isspace(ch));
 325
 326     return ch;
 327 }
 328
 329 /* Append a character to the token buffer */
 330 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
 331 {
 332     if (!token_value_add(lex->tok, ch)) {
 333         lexerror(lex, "out of memory");
 334         return false;
 335     }
 336     return true;
 337 }
 338
 339 /* Append a trailing null-byte */
 340 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
 341 {
 342     if (!token_value_add(lex->tok, 0)) {
 343         lexerror(lex, "out of memory");
 344         return false;
 345     }
 346     lex->tok->value_count--;
 347     return true;
 348 }
 349
 350 /* Get a token */
 351 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 352 {
 353     int ch;
 354
 355     ch = lex_getch(lex);
 356     while (ch != EOF && isident(ch))
 357     {
 358         if (!lex_tokench(lex, ch))
 359             return (lex->tok->ttype = TOKEN_FATAL);
 360         ch = lex_getch(lex);
 361     }
 362
 363     /* last ch was not an ident ch: */
 364     lex_ungetch(lex, ch);
 365
 366     return true;
 367 }
 368
 369 /* read one ident for the frame list */
 370 static int lex_parse_frame(lex_file *lex)
 371 {
 372     int ch;
 373
 374     if (lex->tok)
 375         token_delete(lex->tok);
 376     lex->tok = token_new();
 377
 378     ch = lex_getch(lex);
 379     while (ch != EOF && ch != '\n' && isspace(ch))
 380         ch = lex_getch(lex);
 381
 382     if (ch == '\n')
 383         return 1;
 384
 385     if (!isident_start(ch)) {
 386         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 387         return -1;
 388     }
 389
 390     if (!lex_tokench(lex, ch))
 391         return -1;
 392     if (!lex_finish_ident(lex))
 393         return -1;
 394     if (!lex_endtoken(lex))
 395         return -1;
 396     return 0;
 397 }
 398
 399 /* read a list of $frames */
 400 static bool lex_finish_frames(lex_file *lex)
 401 {
 402     do {
 403         size_t i;
 404         int    rc;
 405         frame_macro m;
 406
 407         rc = lex_parse_frame(lex);
 408         if (rc > 0) /* end of line */
 409             return true;
 410         if (rc < 0) /* error */
 411             return false;
 412
 413         for (i = 0; i < lex->frames_count; ++i) {
 414             if (!strcmp(lex->tok->value, lex->frames[i].name)) {
 415                 lex->frames[i].value = lex->framevalue++;
 416                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok->value))
 417                     return false;
 418                 continue;
 419             }
 420         }
 421
 422         m.value = lex->framevalue++;
 423         m.name = lex->tok->value;
 424         lex->tok->value = NULL;
 425         if (!lex_file_frames_add(lex, m)) {
 426             lexerror(lex, "out of memory");
 427             return false;
 428         }
 429     } while (true);
 430 }
 431
 432 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 433 {
 434     int ch = 0;
 435
 436     while (ch != EOF)
 437     {
 438         ch = lex_getch(lex);
 439         if (ch == quote)
 440             return TOKEN_STRINGCONST;
 441
 442         if (ch == '\\') {
 443             ch = lex_getch(lex);
 444             if (ch == EOF) {
 445                 lexerror(lex, "unexpected end of file");
 446                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 447                 return (lex->tok->ttype = TOKEN_ERROR);
 448             }
 449
 450             switch (ch) {
 451             case '\\': break;
 452             case 'a':  ch = '\a'; break;
 453             case 'b':  ch = '\b'; break;
 454             case 'r':  ch = '\r'; break;
 455             case 'n':  ch = '\n'; break;
 456             case 't':  ch = '\t'; break;
 457             case 'f':  ch = '\f'; break;
 458             case 'v':  ch = '\v'; break;
 459             default:
 460                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 461                 /* so we just add the character plus backslash no matter what it actually is */
 462                 if (!lex_tokench(lex, '\\'))
 463                     return (lex->tok->ttype = TOKEN_FATAL);
 464             }
 465             /* add the character finally */
 466             if (!lex_tokench(lex, ch))
 467                 return (lex->tok->ttype = TOKEN_FATAL);
 468         }
 469         else if (!lex_tokench(lex, ch))
 470             return (lex->tok->ttype = TOKEN_FATAL);
 471     }
 472     lexerror(lex, "unexpected end of file within string constant");
 473     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 474     return (lex->tok->ttype = TOKEN_ERROR);
 475 }
 476
 477 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 478 {
 479     bool ishex = false;
 480
 481     int  ch = lastch;
 482
 483     /* parse a number... */
 484     lex->tok->ttype = TOKEN_INTCONST;
 485
 486     if (!lex_tokench(lex, ch))
 487         return (lex->tok->ttype = TOKEN_FATAL);
 488
 489     ch = lex_getch(lex);
 490     if (ch != '.' && !isdigit(ch))
 491     {
 492         if (lastch != '0' || ch != 'x')
 493         {
 494             /* end of the number or EOF */
 495             lex_ungetch(lex, ch);
 496             if (!lex_endtoken(lex))
 497                 return (lex->tok->ttype = TOKEN_FATAL);
 498
 499             lex->tok->constval.i = lastch - '0';
 500             return lex->tok->ttype;
 501         }
 502
 503         ishex = true;
 504     }
 505
 506     /* EOF would have been caught above */
 507
 508     if (ch != '.')
 509     {
 510         if (!lex_tokench(lex, ch))
 511             return (lex->tok->ttype = TOKEN_FATAL);
 512         ch = lex_getch(lex);
 513         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 514         {
 515             if (!lex_tokench(lex, ch))
 516                 return (lex->tok->ttype = TOKEN_FATAL);
 517             ch = lex_getch(lex);
 518         }
 519     }
 520     /* NOT else, '.' can come from above as well */
 521     if (ch == '.' && !ishex)
 522     {
 523         /* Allow floating comma in non-hex mode */
 524         lex->tok->ttype = TOKEN_FLOATCONST;
 525         if (!lex_tokench(lex, ch))
 526             return (lex->tok->ttype = TOKEN_FATAL);
 527
 528         /* continue digits-only */
 529         ch = lex_getch(lex);
 530         while (isdigit(ch))
 531         {
 532             if (!lex_tokench(lex, ch))
 533                 return (lex->tok->ttype = TOKEN_FATAL);
 534             ch = lex_getch(lex);
 535         }
 536     }
 537     /* put back the last character */
 538     /* but do not put back the trailing 'f' or a float */
 539     if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f')
 540         ch = lex_getch(lex);
 541
 542     /* generally we don't want words to follow numbers: */
 543     if (isident(ch)) {
 544         lexerror(lex, "unexpected trailing characters after number");
 545         return (lex->tok->ttype = TOKEN_ERROR);
 546     }
 547     lex_ungetch(lex, ch);
 548
 549     if (!lex_endtoken(lex))
 550         return (lex->tok->ttype = TOKEN_FATAL);
 551     if (lex->tok->ttype == TOKEN_FLOATCONST)
 552         lex->tok->constval.f = strtod(lex->tok->value, NULL);
 553     else
 554         lex->tok->constval.i = strtol(lex->tok->value, NULL, 0);
 555     return lex->tok->ttype;
 556 }
 557
 558 int lex_do(lex_file *lex)
 559 {
 560     int ch, nextch;
 561
 562     if (lex->tok)
 563         token_delete(lex->tok);
 564     lex->tok = token_new();
 565     if (!lex->tok)
 566         return TOKEN_FATAL;
 567
 568     ch = lex_skipwhite(lex);
 569     lex->sline = lex->line;
 570     lex->tok->ctx.line = lex->sline;
 571     lex->tok->ctx.file = lex->name;
 572
 573     if (lex->eof)
 574         return (lex->tok->ttype = TOKEN_FATAL);
 575
 576     if (ch == EOF) {
 577         lex->eof = true;
 578         return (lex->tok->ttype = TOKEN_EOF);
 579     }
 580
 581     /* modelgen / spiritgen commands */
 582     if (ch == '$') {
 583         const char *v;
 584         size_t frame;
 585
 586         ch = lex_getch(lex);
 587         if (!isident_start(ch)) {
 588             lexerror(lex, "hanging '$' modelgen/spritegen command line");
 589             return lex_do(lex);
 590         }
 591         if (!lex_tokench(lex, ch))
 592             return (lex->tok->ttype = TOKEN_FATAL);
 593         if (!lex_finish_ident(lex))
 594             return (lex->tok->ttype = TOKEN_ERROR);
 595         if (!lex_endtoken(lex))
 596             return (lex->tok->ttype = TOKEN_FATAL);
 597         /* skip the known commands */
 598         v = lex->tok->value;
 599
 600         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
 601         {
 602             /* frame/framesave command works like an enum
 603              * similar to fteqcc we handle this in the lexer.
 604              * The reason for this is that it is sensitive to newlines,
 605              * which the parser is unaware of
 606              */
 607             if (!lex_finish_frames(lex))
 608                  return (lex->tok->ttype = TOKEN_ERROR);
 609             return lex_do(lex);
 610         }
 611
 612         if (!strcmp(v, "framevalue"))
 613         {
 614             ch = lex_getch(lex);
 615             while (ch != EOF && isspace(ch) && ch != '\n')
 616                 ch = lex_getch(lex);
 617
 618             if (!isdigit(ch)) {
 619                 lexerror(lex, "$framevalue requires an integer parameter");
 620                 return lex_do(lex);
 621             }
 622
 623             token_delete(lex->tok);
 624             lex->tok = token_new();
 625             lex->tok->ttype = lex_finish_digit(lex, ch);
 626             if (!lex_endtoken(lex))
 627                 return (lex->tok->ttype = TOKEN_FATAL);
 628             if (lex->tok->ttype != TOKEN_INTCONST) {
 629                 lexerror(lex, "$framevalue requires an integer parameter");
 630                 return lex_do(lex);
 631             }
 632             lex->framevalue = lex->tok->constval.i;
 633             return lex_do(lex);
 634         }
 635
 636         if (!strcmp(v, "framerestore"))
 637         {
 638             int rc;
 639
 640             token_delete(lex->tok);
 641             lex->tok = token_new();
 642
 643             rc = lex_parse_frame(lex);
 644
 645             if (rc > 0) {
 646                 lexerror(lex, "$framerestore requires a framename parameter");
 647                 return lex_do(lex);
 648             }
 649             if (rc < 0)
 650                 return (lex->tok->ttype = TOKEN_FATAL);
 651
 652             v = lex->tok->value;
 653             for (frame = 0; frame < lex->frames_count; ++frame) {
 654                 if (!strcmp(v, lex->frames[frame].name)) {
 655                     lex->framevalue = lex->frames[frame].value;
 656                     return lex_do(lex);
 657                 }
 658             }
 659             lexerror(lex, "unknown framename `%s`", v);
 660             return lex_do(lex);
 661         }
 662
 663         if (!strcmp(v, "modelname"))
 664         {
 665             int rc;
 666
 667             token_delete(lex->tok);
 668             lex->tok = token_new();
 669
 670             rc = lex_parse_frame(lex);
 671
 672             if (rc > 0) {
 673                 lexerror(lex, "$framerestore requires a framename parameter");
 674                 return lex_do(lex);
 675             }
 676             if (rc < 0)
 677                 return (lex->tok->ttype = TOKEN_FATAL);
 678
 679             v = lex->tok->value;
 680             if (lex->modelname) {
 681                 frame_macro m;
 682                 m.value = lex->framevalue;
 683                 m.name = lex->modelname;
 684                 lex->modelname = NULL;
 685                 if (!lex_file_frames_add(lex, m)) {
 686                     lexerror(lex, "out of memory");
 687                     return (lex->tok->ttype = TOKEN_FATAL);
 688                 }
 689             }
 690             lex->modelname = lex->tok->value;
 691             lex->tok->value = NULL;
 692             for (frame = 0; frame < lex->frames_count; ++frame) {
 693                 if (!strcmp(v, lex->frames[frame].name)) {
 694                     lex->framevalue = lex->frames[frame].value;
 695                     break;
 696                 }
 697             }
 698             return lex_do(lex);
 699         }
 700
 701         if (!strcmp(v, "flush"))
 702         {
 703             size_t frame;
 704             for (frame = 0; frame < lex->frames_count; ++frame)
 705                 mem_d(lex->frames[frame].name);
 706             MEM_VECTOR_CLEAR(lex, frames);
 707             /* skip line (fteqcc does it too) */
 708             ch = lex_getch(lex);
 709             while (ch != EOF && ch != '\n')
 710                 ch = lex_getch(lex);
 711             return lex_do(lex);
 712         }
 713
 714         if (!strcmp(v, "cd") ||
 715             !strcmp(v, "origin") ||
 716             !strcmp(v, "base") ||
 717             !strcmp(v, "flags") ||
 718             !strcmp(v, "scale") ||
 719             !strcmp(v, "skin"))
 720         {
 721             /* skip line */
 722             ch = lex_getch(lex);
 723             while (ch != EOF && ch != '\n')
 724                 ch = lex_getch(lex);
 725             return lex_do(lex);
 726         }
 727
 728         for (frame = 0; frame < lex->frames_count; ++frame) {
 729             if (!strcmp(v, lex->frames[frame].name)) {
 730                 lex->tok->constval.i = lex->frames[frame].value;
 731                 return (lex->tok->ttype = TOKEN_INTCONST);
 732             }
 733         }
 734
 735         lexerror(lex, "invalid frame macro");
 736         return lex_do(lex);
 737     }
 738
 739     /* single-character tokens */
 740     switch (ch)
 741     {
 742         case '(':
 743             if (!lex_tokench(lex, ch) ||
 744                 !lex_endtoken(lex))
 745             {
 746                 return (lex->tok->ttype = TOKEN_FATAL);
 747             }
 748             if (lex->flags.noops)
 749                 return (lex->tok->ttype = ch);
 750             else
 751                 return (lex->tok->ttype = TOKEN_OPERATOR);
 752         case ')':
 753         case ';':
 754         case '{':
 755         case '}':
 756         case '[':
 757         case ']':
 758
 759         case '#':
 760             if (!lex_tokench(lex, ch) ||
 761                 !lex_endtoken(lex))
 762             {
 763                 return (lex->tok->ttype = TOKEN_FATAL);
 764             }
 765             return (lex->tok->ttype = ch);
 766         default:
 767             break;
 768     }
 769
 770     if (lex->flags.noops)
 771     {
 772         /* Detect characters early which are normally
 773          * operators OR PART of an operator.
 774          */
 775         switch (ch)
 776         {
 777             case '+':
 778             case '-':
 779             case '*':
 780             case '/':
 781             case '<':
 782             case '>':
 783             case '=':
 784             case '&':
 785             case '|':
 786             case '^':
 787             case '~':
 788             case ',':
 789             case '!':
 790                 if (!lex_tokench(lex, ch) ||
 791                     !lex_endtoken(lex))
 792                 {
 793                     return (lex->tok->ttype = TOKEN_FATAL);
 794                 }
 795                 return (lex->tok->ttype = ch);
 796             default:
 797                 break;
 798         }
 799
 800         if (ch == '.')
 801         {
 802             if (!lex_tokench(lex, ch))
 803                 return (lex->tok->ttype = TOKEN_FATAL);
 804             /* peak ahead once */
 805             nextch = lex_getch(lex);
 806             if (nextch != '.') {
 807                 lex_ungetch(lex, nextch);
 808                 if (!lex_endtoken(lex))
 809                     return (lex->tok->ttype = TOKEN_FATAL);
 810                 return (lex->tok->ttype = ch);
 811             }
 812             /* peak ahead again */
 813             nextch = lex_getch(lex);
 814             if (nextch != '.') {
 815                 lex_ungetch(lex, nextch);
 816                 lex_ungetch(lex, nextch);
 817                 if (!lex_endtoken(lex))
 818                     return (lex->tok->ttype = TOKEN_FATAL);
 819                 return (lex->tok->ttype = ch);
 820             }
 821             /* fill the token to be "..." */
 822             if (!lex_tokench(lex, ch) ||
 823                 !lex_tokench(lex, ch) ||
 824                 !lex_endtoken(lex))
 825             {
 826                 return (lex->tok->ttype = TOKEN_FATAL);
 827             }
 828             return (lex->tok->ttype = TOKEN_DOTS);
 829         }
 830     }
 831
 832     if (ch == ',' || ch == '.') {
 833         if (!lex_tokench(lex, ch) ||
 834             !lex_endtoken(lex))
 835         {
 836             return (lex->tok->ttype = TOKEN_FATAL);
 837         }
 838         return (lex->tok->ttype = TOKEN_OPERATOR);
 839     }
 840
 841     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
 842         ch == '>' || ch == '<' || /* <<, >>, <=, >= */
 843         ch == '=' || ch == '!' || /* ==, != */
 844         ch == '&' || ch == '|')   /* &&, ||, &=, |= */
 845     {
 846         if (!lex_tokench(lex, ch))
 847             return (lex->tok->ttype = TOKEN_FATAL);
 848
 849         nextch = lex_getch(lex);
 850         if (nextch == ch || nextch == '=') {
 851             if (!lex_tokench(lex, nextch))
 852                 return (lex->tok->ttype = TOKEN_FATAL);
 853         } else if (ch == '-' && nextch == '>') {
 854             if (!lex_tokench(lex, nextch))
 855                 return (lex->tok->ttype = TOKEN_FATAL);
 856         } else
 857             lex_ungetch(lex, nextch);
 858
 859         if (!lex_endtoken(lex))
 860             return (lex->tok->ttype = TOKEN_FATAL);
 861         return (lex->tok->ttype = TOKEN_OPERATOR);
 862     }
 863
 864     /*
 865     if (ch == '^' || ch == '~' || ch == '!')
 866     {
 867         if (!lex_tokench(lex, ch) ||
 868             !lex_endtoken(lex))
 869         {
 870             return (lex->tok->ttype = TOKEN_FATAL);
 871         }
 872         return (lex->tok->ttype = TOKEN_OPERATOR);
 873     }
 874     */
 875
 876     if (ch == '*' || ch == '/') /* *=, /= */
 877     {
 878         if (!lex_tokench(lex, ch))
 879             return (lex->tok->ttype = TOKEN_FATAL);
 880
 881         nextch = lex_getch(lex);
 882         if (nextch == '=') {
 883             if (!lex_tokench(lex, nextch))
 884                 return (lex->tok->ttype = TOKEN_FATAL);
 885         } else
 886             lex_ungetch(lex, nextch);
 887
 888         if (!lex_endtoken(lex))
 889             return (lex->tok->ttype = TOKEN_FATAL);
 890         return (lex->tok->ttype = TOKEN_OPERATOR);
 891     }
 892
 893     if (isident_start(ch))
 894     {
 895         const char *v;
 896
 897         if (!lex_tokench(lex, ch))
 898             return (lex->tok->ttype = TOKEN_FATAL);
 899         if (!lex_finish_ident(lex)) {
 900             /* error? */
 901             return (lex->tok->ttype = TOKEN_ERROR);
 902         }
 903         if (!lex_endtoken(lex))
 904             return (lex->tok->ttype = TOKEN_FATAL);
 905         lex->tok->ttype = TOKEN_IDENT;
 906
 907         v = lex->tok->value;
 908         if (!strcmp(v, "void")) {
 909             lex->tok->ttype = TOKEN_TYPENAME;
 910             lex->tok->constval.t = TYPE_VOID;
 911         } else if (!strcmp(v, "int")) {
 912             lex->tok->ttype = TOKEN_TYPENAME;
 913             lex->tok->constval.t = TYPE_INTEGER;
 914         } else if (!strcmp(v, "float")) {
 915             lex->tok->ttype = TOKEN_TYPENAME;
 916             lex->tok->constval.t = TYPE_FLOAT;
 917         } else if (!strcmp(v, "string")) {
 918             lex->tok->ttype = TOKEN_TYPENAME;
 919             lex->tok->constval.t = TYPE_STRING;
 920         } else if (!strcmp(v, "entity")) {
 921             lex->tok->ttype = TOKEN_TYPENAME;
 922             lex->tok->constval.t = TYPE_ENTITY;
 923         } else if (!strcmp(v, "vector")) {
 924             lex->tok->ttype = TOKEN_TYPENAME;
 925             lex->tok->constval.t = TYPE_VECTOR;
 926         } else if (!strcmp(v, "for")  ||
 927                  !strcmp(v, "while")  ||
 928                  !strcmp(v, "do")     ||
 929                  !strcmp(v, "if")     ||
 930                  !strcmp(v, "else")   ||
 931                  !strcmp(v, "local")  ||
 932                  !strcmp(v, "return") ||
 933                  !strcmp(v, "const"))
 934             lex->tok->ttype = TOKEN_KEYWORD;
 935
 936         return lex->tok->ttype;
 937     }
 938
 939     if (ch == '"')
 940     {
 941         lex->tok->ttype = lex_finish_string(lex, '"');
 942         while (lex->tok->ttype == TOKEN_STRINGCONST)
 943         {
 944             /* Allow c style "string" "continuation" */
 945             ch = lex_skipwhite(lex);
 946             if (ch != '"') {
 947                 lex_ungetch(lex, ch);
 948                 break;
 949             }
 950
 951             lex->tok->ttype = lex_finish_string(lex, '"');
 952         }
 953         if (!lex_endtoken(lex))
 954             return (lex->tok->ttype = TOKEN_FATAL);
 955         return lex->tok->ttype;
 956     }
 957
 958     if (ch == '\'')
 959     {
 960         /* we parse character constants like string,
 961          * but return TOKEN_CHARCONST, or a vector type if it fits...
 962          * Likewise actual unescaping has to be done by the parser.
 963          * The difference is we don't allow 'char' 'continuation'.
 964          */
 965          lex->tok->ttype = lex_finish_string(lex, '\'');
 966          if (!lex_endtoken(lex))
 967               return (lex->tok->ttype = TOKEN_FATAL);
 968
 969          /* It's a vector if we can successfully scan 3 floats */
 970 #ifdef WIN32
 971          if (sscanf_s(lex->tok->value, " %f %f %f ",
 972                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 973 #else
 974          if (sscanf(lex->tok->value, " %f %f %f ",
 975                     &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
 976 #endif
 977          {
 978               lex->tok->ttype = TOKEN_VECTORCONST;
 979          }
 980
 981          return lex->tok->ttype;
 982     }
 983
 984     if (isdigit(ch))
 985     {
 986         lex->tok->ttype = lex_finish_digit(lex, ch);
 987         if (!lex_endtoken(lex))
 988             return (lex->tok->ttype = TOKEN_FATAL);
 989         return lex->tok->ttype;
 990     }
 991
 992     lexerror(lex, "unknown token");
 993     return (lex->tok->ttype = TOKEN_ERROR);
 994 }