lexer.c

   1 /*
   2  * Copyright (C) 2012, 2013
   3  *     Wolfgang Bumiller
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   6  * this software and associated documentation files (the "Software"), to deal in
   7  * the Software without restriction, including without limitation the rights to
   8  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9  * of the Software, and to permit persons to whom the Software is furnished to do
  10  * so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be included in all
  13  * copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <stdarg.h>
  27
  28 #include "gmqcc.h"
  29 #include "lexer.h"
  30
  31 /*
  32  * List of Keywords
  33  */
  34
  35 /* original */
  36 static const char *keywords_qc[] = {
  37     "for", "do", "while",
  38     "if", "else",
  39     "local",
  40     "return",
  41     "const"
  42 };
  43 static size_t num_keywords_qc = sizeof(keywords_qc) / sizeof(keywords_qc[0]);
  44
  45 /* For fte/gmgqcc */
  46 static const char *keywords_fg[] = {
  47     "switch", "case", "default",
  48     "struct", "union",
  49     "break", "continue",
  50     "typedef",
  51     "goto",
  52
  53     "__builtin_debug_printtype"
  54 };
  55 static size_t num_keywords_fg = sizeof(keywords_fg) / sizeof(keywords_fg[0]);
  56
  57 /*
  58  * Lexer code
  59  */
  60
  61 char* *lex_filenames;
  62
  63 void lexerror(lex_file *lex, const char *fmt, ...)
  64 {
  65     va_list ap;
  66
  67     va_start(ap, fmt);
  68     if (lex)
  69         con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
  70     else
  71         con_vprintmsg(LVL_ERROR, "", 0, "parse error", fmt, ap);
  72     va_end(ap);
  73 }
  74
  75 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  76 {
  77     bool    r;
  78     lex_ctx ctx;
  79     va_list ap;
  80
  81     ctx.file = lex->name;
  82     ctx.line = lex->sline;
  83
  84     va_start(ap, fmt);
  85     r = vcompile_warning(ctx, warntype, fmt, ap);
  86     va_end(ap);
  87     return r;
  88 }
  89
  90
  91 #if 0
  92 token* token_new()
  93 {
  94     token *tok = (token*)mem_a(sizeof(token));
  95     if (!tok)
  96         return NULL;
  97     memset(tok, 0, sizeof(*tok));
  98     return tok;
  99 }
 100
 101 void token_delete(token *self)
 102 {
 103     if (self->next && self->next->prev == self)
 104         self->next->prev = self->prev;
 105     if (self->prev && self->prev->next == self)
 106         self->prev->next = self->next;
 107     MEM_VECTOR_CLEAR(self, value);
 108     mem_d(self);
 109 }
 110
 111 token* token_copy(const token *cp)
 112 {
 113     token* self = token_new();
 114     if (!self)
 115         return NULL;
 116     /* copy the value */
 117     self->value_alloc = cp->value_count + 1;
 118     self->value_count = cp->value_count;
 119     self->value = (char*)mem_a(self->value_alloc);
 120     if (!self->value) {
 121         mem_d(self);
 122         return NULL;
 123     }
 124     memcpy(self->value, cp->value, cp->value_count);
 125     self->value[self->value_alloc-1] = 0;
 126
 127     /* rest */
 128     self->ctx = cp->ctx;
 129     self->ttype = cp->ttype;
 130     memcpy(&self->constval, &cp->constval, sizeof(self->constval));
 131     return self;
 132 }
 133
 134 void token_delete_all(token *t)
 135 {
 136     token *n;
 137
 138     do {
 139         n = t->next;
 140         token_delete(t);
 141         t = n;
 142     } while(t);
 143 }
 144
 145 token* token_copy_all(const token *cp)
 146 {
 147     token *cur;
 148     token *out;
 149
 150     out = cur = token_copy(cp);
 151     if (!out)
 152         return NULL;
 153
 154     while (cp->next) {
 155         cp = cp->next;
 156         cur->next = token_copy(cp);
 157         if (!cur->next) {
 158             token_delete_all(out);
 159             return NULL;
 160         }
 161         cur->next->prev = cur;
 162         cur = cur->next;
 163     }
 164
 165     return out;
 166 }
 167 #else
 168 static void lex_token_new(lex_file *lex)
 169 {
 170 #if 0
 171     if (lex->tok)
 172         token_delete(lex->tok);
 173     lex->tok = token_new();
 174 #else
 175     if (lex->tok.value)
 176         vec_shrinkto(lex->tok.value, 0);
 177     lex->tok.constval.t  = 0;
 178     lex->tok.ctx.line = lex->sline;
 179     lex->tok.ctx.file = lex->name;
 180 #endif
 181 }
 182 #endif
 183
 184 lex_file* lex_open(const char *file)
 185 {
 186     lex_file *lex;
 187     FILE *in = fs_file_open(file, "rb");
 188
 189     if (!in) {
 190         lexerror(NULL, "open failed: '%s'\n", file);
 191         return NULL;
 192     }
 193
 194     lex = (lex_file*)mem_a(sizeof(*lex));
 195     if (!lex) {
 196         fs_file_close(in);
 197         lexerror(NULL, "out of memory\n");
 198         return NULL;
 199     }
 200
 201     memset(lex, 0, sizeof(*lex));
 202
 203     lex->file = in;
 204     lex->name = util_strdup(file);
 205     lex->line = 1; /* we start counting at 1 */
 206
 207     lex->peekpos = 0;
 208     lex->eof = false;
 209
 210     vec_push(lex_filenames, lex->name);
 211     return lex;
 212 }
 213
 214 lex_file* lex_open_string(const char *str, size_t len, const char *name)
 215 {
 216     lex_file *lex;
 217
 218     lex = (lex_file*)mem_a(sizeof(*lex));
 219     if (!lex) {
 220         lexerror(NULL, "out of memory\n");
 221         return NULL;
 222     }
 223
 224     memset(lex, 0, sizeof(*lex));
 225
 226     lex->file = NULL;
 227     lex->open_string        = str;
 228     lex->open_string_length = len;
 229     lex->open_string_pos    = 0;
 230
 231     lex->name = util_strdup(name ? name : "<string-source>");
 232     lex->line = 1; /* we start counting at 1 */
 233
 234     lex->peekpos = 0;
 235     lex->eof = false;
 236
 237     vec_push(lex_filenames, lex->name);
 238
 239     return lex;
 240 }
 241
 242 void lex_cleanup(void)
 243 {
 244     size_t i;
 245     for (i = 0; i < vec_size(lex_filenames); ++i)
 246         mem_d(lex_filenames[i]);
 247     vec_free(lex_filenames);
 248 }
 249
 250 void lex_close(lex_file *lex)
 251 {
 252     size_t i;
 253     for (i = 0; i < vec_size(lex->frames); ++i)
 254         mem_d(lex->frames[i].name);
 255     vec_free(lex->frames);
 256
 257     if (lex->modelname)
 258         vec_free(lex->modelname);
 259
 260     if (lex->file)
 261         fs_file_close(lex->file);
 262 #if 0
 263     if (lex->tok)
 264         token_delete(lex->tok);
 265 #else
 266     vec_free(lex->tok.value);
 267 #endif
 268     /* mem_d(lex->name); collected in lex_filenames */
 269     mem_d(lex);
 270 }
 271
 272 static int lex_fgetc(lex_file *lex)
 273 {
 274     if (lex->file) {
 275         return fs_file_getc(lex->file);
 276     }
 277     if (lex->open_string) {
 278         if (lex->open_string_pos >= lex->open_string_length)
 279             return EOF;
 280         return lex->open_string[lex->open_string_pos++];
 281     }
 282     return EOF;
 283 }
 284
 285 /* Get or put-back data
 286  * The following to functions do NOT understand what kind of data they
 287  * are working on.
 288  * The are merely wrapping get/put in order to count line numbers.
 289  */
 290 static void lex_ungetch(lex_file *lex, int ch);
 291 static int lex_try_trigraph(lex_file *lex, int old)
 292 {
 293     int c2, c3;
 294     c2 = lex_fgetc(lex);
 295     if (!lex->push_line && c2 == '\n')
 296         lex->line++;
 297     if (c2 != '?') {
 298         lex_ungetch(lex, c2);
 299         return old;
 300     }
 301
 302     c3 = lex_fgetc(lex);
 303     if (!lex->push_line && c3 == '\n')
 304         lex->line++;
 305     switch (c3) {
 306         case '=': return '#';
 307         case '/': return '\\';
 308         case '\'': return '^';
 309         case '(': return '[';
 310         case ')': return ']';
 311         case '!': return '|';
 312         case '<': return '{';
 313         case '>': return '}';
 314         case '-': return '~';
 315         default:
 316             lex_ungetch(lex, c3);
 317             lex_ungetch(lex, c2);
 318             return old;
 319     }
 320 }
 321
 322 static int lex_try_digraph(lex_file *lex, int ch)
 323 {
 324     int c2;
 325     c2 = lex_fgetc(lex);
 326     /* we just used fgetc() so count lines
 327      * need to offset a \n the ungetch would recognize
 328      */
 329     if (!lex->push_line && c2 == '\n')
 330         lex->line++;
 331     if      (ch == '<' && c2 == ':')
 332         return '[';
 333     else if (ch == ':' && c2 == '>')
 334         return ']';
 335     else if (ch == '<' && c2 == '%')
 336         return '{';
 337     else if (ch == '%' && c2 == '>')
 338         return '}';
 339     else if (ch == '%' && c2 == ':')
 340         return '#';
 341     lex_ungetch(lex, c2);
 342     return ch;
 343 }
 344
 345 static int lex_getch(lex_file *lex)
 346 {
 347     int ch;
 348
 349     if (lex->peekpos) {
 350         lex->peekpos--;
 351         if (!lex->push_line && lex->peek[lex->peekpos] == '\n')
 352             lex->line++;
 353         return lex->peek[lex->peekpos];
 354     }
 355
 356     ch = lex_fgetc(lex);
 357     if (!lex->push_line && ch == '\n')
 358         lex->line++;
 359     else if (ch == '?')
 360         return lex_try_trigraph(lex, ch);
 361     else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
 362         return lex_try_digraph(lex, ch);
 363     return ch;
 364 }
 365
 366 static void lex_ungetch(lex_file *lex, int ch)
 367 {
 368     lex->peek[lex->peekpos++] = ch;
 369     if (!lex->push_line && ch == '\n')
 370         lex->line--;
 371 }
 372
 373 /* classify characters
 374  * some additions to the is*() functions of ctype.h
 375  */
 376
 377 /* Idents are alphanumberic, but they start with alpha or _ */
 378 static bool isident_start(int ch)
 379 {
 380     return isalpha(ch) || ch == '_';
 381 }
 382
 383 static bool isident(int ch)
 384 {
 385     return isident_start(ch) || isdigit(ch);
 386 }
 387
 388 /* isxdigit_only is used when we already know it's not a digit
 389  * and want to see if it's a hex digit anyway.
 390  */
 391 static bool isxdigit_only(int ch)
 392 {
 393     return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
 394 }
 395
 396 /* Append a character to the token buffer */
 397 static void lex_tokench(lex_file *lex, int ch)
 398 {
 399     vec_push(lex->tok.value, ch);
 400 }
 401
 402 /* Append a trailing null-byte */
 403 static void lex_endtoken(lex_file *lex)
 404 {
 405     vec_push(lex->tok.value, 0);
 406     vec_shrinkby(lex->tok.value, 1);
 407 }
 408
 409 static bool lex_try_pragma(lex_file *lex)
 410 {
 411     int ch;
 412     char *pragma  = NULL;
 413     char *command = NULL;
 414     char *param   = NULL;
 415     size_t line;
 416
 417     if (lex->flags.preprocessing)
 418         return false;
 419
 420     line = lex->line;
 421
 422     ch = lex_getch(lex);
 423     if (ch != '#') {
 424         lex_ungetch(lex, ch);
 425         return false;
 426     }
 427
 428     for (ch = lex_getch(lex); vec_size(pragma) < 8 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 429         vec_push(pragma, ch);
 430     vec_push(pragma, 0);
 431
 432     if (ch != ' ' || strcmp(pragma, "pragma")) {
 433         lex_ungetch(lex, ch);
 434         goto unroll;
 435     }
 436
 437     for (ch = lex_getch(lex); vec_size(command) < 32 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
 438         vec_push(command, ch);
 439     vec_push(command, 0);
 440
 441     if (ch != '(') {
 442         lex_ungetch(lex, ch);
 443         goto unroll;
 444     }
 445
 446     for (ch = lex_getch(lex); vec_size(param) < 1024 && ch != ')' && ch != '\n'; ch = lex_getch(lex))
 447         vec_push(param, ch);
 448     vec_push(param, 0);
 449
 450     if (ch != ')') {
 451         lex_ungetch(lex, ch);
 452         goto unroll;
 453     }
 454
 455     if (!strcmp(command, "push")) {
 456         if (!strcmp(param, "line")) {
 457             lex->push_line++;
 458             if (lex->push_line == 1)
 459                 --line;
 460         }
 461         else
 462             goto unroll;
 463     }
 464     else if (!strcmp(command, "pop")) {
 465         if (!strcmp(param, "line")) {
 466             if (lex->push_line)
 467                 lex->push_line--;
 468             if (lex->push_line == 0)
 469                 --line;
 470         }
 471         else
 472             goto unroll;
 473     }
 474     else if (!strcmp(command, "file")) {
 475         lex->name = util_strdup(param);
 476         vec_push(lex_filenames, lex->name);
 477     }
 478     else if (!strcmp(command, "line")) {
 479         line = strtol(param, NULL, 0)-1;
 480     }
 481     else
 482         goto unroll;
 483
 484     lex->line = line;
 485     while (ch != '\n' && ch != EOF)
 486         ch = lex_getch(lex);
 487     return true;
 488
 489 unroll:
 490     if (command) {
 491         vec_pop(command);
 492         while (vec_size(command)) {
 493             lex_ungetch(lex, (unsigned char)vec_last(command));
 494             vec_pop(command);
 495         }
 496         vec_free(command);
 497         lex_ungetch(lex, ' ');
 498     }
 499     if (param) {
 500         vec_pop(param);
 501         while (vec_size(param)) {
 502             lex_ungetch(lex, (unsigned char)vec_last(param));
 503             vec_pop(param);
 504         }
 505         vec_free(param);
 506         lex_ungetch(lex, ' ');
 507     }
 508     if (pragma) {
 509         vec_pop(pragma);
 510         while (vec_size(pragma)) {
 511             lex_ungetch(lex, (unsigned char)vec_last(pragma));
 512             vec_pop(pragma);
 513         }
 514         vec_free(pragma);
 515     }
 516     lex_ungetch(lex, '#');
 517
 518     lex->line = line;
 519     return false;
 520 }
 521
 522 /* Skip whitespace and comments and return the first
 523  * non-white character.
 524  * As this makes use of the above getch() ungetch() functions,
 525  * we don't need to care at all about line numbering anymore.
 526  *
 527  * In theory, this function should only be used at the beginning
 528  * of lexing, or when we *know* the next character is part of the token.
 529  * Otherwise, if the parser throws an error, the linenumber may not be
 530  * the line of the error, but the line of the next token AFTER the error.
 531  *
 532  * This is currently only problematic when using c-like string-continuation,
 533  * since comments and whitespaces are allowed between 2 such strings.
 534  * Example:
 535 printf(   "line one\n"
 536 // A comment
 537           "A continuation of the previous string"
 538 // This line is skipped
 539       , foo);
 540
 541  * In this case, if the parse decides it didn't actually want a string,
 542  * and uses lex->line to print an error, it will show the ', foo);' line's
 543  * linenumber.
 544  *
 545  * On the other hand, the parser is supposed to remember the line of the next
 546  * token's beginning. In this case we would want skipwhite() to be called
 547  * AFTER reading a token, so that the parser, before reading the NEXT token,
 548  * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
 549  *
 550  * THIS SOLUTION
 551  *    here is to store the line of the first character after skipping
 552  *    the initial whitespace in lex->sline, this happens in lex_do.
 553  */
 554 static int lex_skipwhite(lex_file *lex, bool hadwhite)
 555 {
 556     int ch = 0;
 557     bool haswhite = hadwhite;
 558
 559     do
 560     {
 561         ch = lex_getch(lex);
 562         while (ch != EOF && isspace(ch)) {
 563             if (ch == '\n') {
 564                 if (lex_try_pragma(lex))
 565                     continue;
 566             }
 567             if (lex->flags.preprocessing) {
 568                 if (ch == '\n') {
 569                     /* end-of-line */
 570                     /* see if there was whitespace first */
 571                     if (haswhite) { /* (vec_size(lex->tok.value)) { */
 572                         lex_ungetch(lex, ch);
 573                         lex_endtoken(lex);
 574                         return TOKEN_WHITE;
 575                     }
 576                     /* otherwise return EOL */
 577                     return TOKEN_EOL;
 578                 }
 579                 haswhite = true;
 580                 lex_tokench(lex, ch);
 581             }
 582             ch = lex_getch(lex);
 583         }
 584
 585         if (ch == '/') {
 586             ch = lex_getch(lex);
 587             if (ch == '/')
 588             {
 589                 /* one line comment */
 590                 ch = lex_getch(lex);
 591
 592                 if (lex->flags.preprocessing) {
 593                     haswhite = true;
 594                     /*
 595                     lex_tokench(lex, '/');
 596                     lex_tokench(lex, '/');
 597                     */
 598                     lex_tokench(lex, ' ');
 599                     lex_tokench(lex, ' ');
 600                 }
 601
 602                 while (ch != EOF && ch != '\n') {
 603                     if (lex->flags.preprocessing)
 604                         lex_tokench(lex, ' '); /* ch); */
 605                     ch = lex_getch(lex);
 606                 }
 607                 if (lex->flags.preprocessing) {
 608                     lex_ungetch(lex, '\n');
 609                     lex_endtoken(lex);
 610                     return TOKEN_WHITE;
 611                 }
 612                 continue;
 613             }
 614             if (ch == '*')
 615             {
 616                 /* multiline comment */
 617                 if (lex->flags.preprocessing) {
 618                     haswhite = true;
 619                     /*
 620                     lex_tokench(lex, '/');
 621                     lex_tokench(lex, '*');
 622                     */
 623                     lex_tokench(lex, ' ');
 624                     lex_tokench(lex, ' ');
 625                 }
 626
 627                 while (ch != EOF)
 628                 {
 629                     ch = lex_getch(lex);
 630                     if (ch == '*') {
 631                         ch = lex_getch(lex);
 632                         if (ch == '/') {
 633                             if (lex->flags.preprocessing) {
 634                                 /*
 635                                 lex_tokench(lex, '*');
 636                                 lex_tokench(lex, '/');
 637                                 */
 638                                 lex_tokench(lex, ' ');
 639                                 lex_tokench(lex, ' ');
 640                             }
 641                             break;
 642                         }
 643                         lex_ungetch(lex, ch);
 644                     }
 645                     if (lex->flags.preprocessing) {
 646                         if (ch == '\n')
 647                             lex_tokench(lex, '\n');
 648                         else
 649                             lex_tokench(lex, ' '); /* ch); */
 650                     }
 651                 }
 652                 ch = ' '; /* cause TRUE in the isspace check */
 653                 continue;
 654             }
 655             /* Otherwise roll back to the slash and break out of the loop */
 656             lex_ungetch(lex, ch);
 657             ch = '/';
 658             break;
 659         }
 660     } while (ch != EOF && isspace(ch));
 661
 662     if (haswhite) {
 663         lex_endtoken(lex);
 664         lex_ungetch(lex, ch);
 665         return TOKEN_WHITE;
 666     }
 667     return ch;
 668 }
 669
 670 /* Get a token */
 671 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
 672 {
 673     int ch;
 674
 675     ch = lex_getch(lex);
 676     while (ch != EOF && isident(ch))
 677     {
 678         lex_tokench(lex, ch);
 679         ch = lex_getch(lex);
 680     }
 681
 682     /* last ch was not an ident ch: */
 683     lex_ungetch(lex, ch);
 684
 685     return true;
 686 }
 687
 688 /* read one ident for the frame list */
 689 static int lex_parse_frame(lex_file *lex)
 690 {
 691     int ch;
 692
 693     lex_token_new(lex);
 694
 695     ch = lex_getch(lex);
 696     while (ch != EOF && ch != '\n' && isspace(ch))
 697         ch = lex_getch(lex);
 698
 699     if (ch == '\n')
 700         return 1;
 701
 702     if (!isident_start(ch)) {
 703         lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
 704         return -1;
 705     }
 706
 707     lex_tokench(lex, ch);
 708     if (!lex_finish_ident(lex))
 709         return -1;
 710     lex_endtoken(lex);
 711     return 0;
 712 }
 713
 714 /* read a list of $frames */
 715 static bool lex_finish_frames(lex_file *lex)
 716 {
 717     do {
 718         size_t i;
 719         int    rc;
 720         frame_macro m;
 721
 722         rc = lex_parse_frame(lex);
 723         if (rc > 0) /* end of line */
 724             return true;
 725         if (rc < 0) /* error */
 726             return false;
 727
 728         for (i = 0; i < vec_size(lex->frames); ++i) {
 729             if (!strcmp(lex->tok.value, lex->frames[i].name)) {
 730                 lex->frames[i].value = lex->framevalue++;
 731                 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
 732                     return false;
 733                 break;
 734             }
 735         }
 736         if (i < vec_size(lex->frames))
 737             continue;
 738
 739         m.value = lex->framevalue++;
 740         m.name = util_strdup(lex->tok.value);
 741         vec_shrinkto(lex->tok.value, 0);
 742         vec_push(lex->frames, m);
 743     } while (true);
 744
 745     return false;
 746 }
 747
 748 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
 749 {
 750     uchar_t chr;
 751     int ch = 0;
 752     int nextch;
 753     bool hex;
 754     char u8buf[8]; /* way more than enough */
 755     int  u8len, uc;
 756
 757     while (ch != EOF)
 758     {
 759         ch = lex_getch(lex);
 760         if (ch == quote)
 761             return TOKEN_STRINGCONST;
 762
 763         if (lex->flags.preprocessing && ch == '\\') {
 764             lex_tokench(lex, ch);
 765             ch = lex_getch(lex);
 766             if (ch == EOF) {
 767                 lexerror(lex, "unexpected end of file");
 768                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 769                 return (lex->tok.ttype = TOKEN_ERROR);
 770             }
 771             lex_tokench(lex, ch);
 772         }
 773         else if (ch == '\\') {
 774             ch = lex_getch(lex);
 775             if (ch == EOF) {
 776                 lexerror(lex, "unexpected end of file");
 777                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 778                 return (lex->tok.ttype = TOKEN_ERROR);
 779             }
 780
 781             switch (ch) {
 782             case '\\': break;
 783             case '\'': break;
 784             case '"':  break;
 785             case 'a':  ch = '\a'; break;
 786             case 'b':  ch = '\b'; break;
 787             case 'r':  ch = '\r'; break;
 788             case 'n':  ch = '\n'; break;
 789             case 't':  ch = '\t'; break;
 790             case 'f':  ch = '\f'; break;
 791             case 'v':  ch = '\v'; break;
 792             case 'x':
 793             case 'X':
 794                 /* same procedure as in fteqcc */
 795                 ch = 0;
 796                 nextch = lex_getch(lex);
 797                 if      (nextch >= '0' && nextch <= '9')
 798                     ch += nextch - '0';
 799                 else if (nextch >= 'a' && nextch <= 'f')
 800                     ch += nextch - 'a' + 10;
 801                 else if (nextch >= 'A' && nextch <= 'F')
 802                     ch += nextch - 'A' + 10;
 803                 else {
 804                     lexerror(lex, "bad character code");
 805                     lex_ungetch(lex, nextch);
 806                     return (lex->tok.ttype = TOKEN_ERROR);
 807                 }
 808
 809                 ch *= 0x10;
 810                 nextch = lex_getch(lex);
 811                 if      (nextch >= '0' && nextch <= '9')
 812                     ch += nextch - '0';
 813                 else if (nextch >= 'a' && nextch <= 'f')
 814                     ch += nextch - 'a' + 10;
 815                 else if (nextch >= 'A' && nextch <= 'F')
 816                     ch += nextch - 'A' + 10;
 817                 else {
 818                     lexerror(lex, "bad character code");
 819                     lex_ungetch(lex, nextch);
 820                     return (lex->tok.ttype = TOKEN_ERROR);
 821                 }
 822                 break;
 823
 824             /* fteqcc support */
 825             case '0': case '1': case '2': case '3':
 826             case '4': case '5': case '6': case '7':
 827             case '8': case '9':
 828                 ch = 18 + ch - '0';
 829                 break;
 830             case '<':  ch = 29; break;
 831             case '-':  ch = 30; break;
 832             case '>':  ch = 31; break;
 833             case '[':  ch = 16; break;
 834             case ']':  ch = 17; break;
 835             case '{':
 836                 chr = 0;
 837                 nextch = lex_getch(lex);
 838                 hex = (nextch == 'x');
 839                 if (!hex)
 840                     lex_ungetch(lex, nextch);
 841                 for (nextch = lex_getch(lex); nextch != '}'; nextch = lex_getch(lex)) {
 842                     if (!hex) {
 843                         if (nextch >= '0' && nextch <= '9')
 844                             chr = chr * 10 + nextch - '0';
 845                         else {
 846                             lexerror(lex, "bad character code");
 847                             return (lex->tok.ttype = TOKEN_ERROR);
 848                         }
 849                     } else {
 850                         if (nextch >= '0' && nextch <= '9')
 851                             chr = chr * 0x10 + nextch - '0';
 852                         else if (nextch >= 'a' && nextch <= 'f')
 853                             chr = chr * 0x10 + nextch - 'a' + 10;
 854                         else if (nextch >= 'A' && nextch <= 'F')
 855                             chr = chr * 0x10 + nextch - 'A' + 10;
 856                         else {
 857                             lexerror(lex, "bad character code");
 858                             return (lex->tok.ttype = TOKEN_ERROR);
 859                         }
 860                     }
 861                     if (chr > 0x10FFFF || (!OPTS_FLAG(UTF8) && chr > 255))
 862                     {
 863                         lexerror(lex, "character code out of range");
 864                         return (lex->tok.ttype = TOKEN_ERROR);
 865                     }
 866                 }
 867                 if (OPTS_FLAG(UTF8) && chr >= 128) {
 868                     u8len = u8_fromchar(chr, u8buf, sizeof(u8buf));
 869                     if (!u8len)
 870                         ch = 0;
 871                     else {
 872                         --u8len;
 873                         for (uc = 0; uc < u8len; ++uc)
 874                             lex_tokench(lex, u8buf[uc]);
 875                         /* the last character will be inserted with the tokench() call
 876                          * below the switch
 877                          */
 878                         ch = u8buf[uc];
 879                     }
 880                 }
 881                 else
 882                     ch = chr;
 883                 break;
 884             case '\n':  ch = '\n'; break;
 885
 886             default:
 887                 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
 888                 /* so we just add the character plus backslash no matter what it actually is */
 889                 lex_tokench(lex, '\\');
 890             }
 891             /* add the character finally */
 892             lex_tokench(lex, ch);
 893         }
 894         else
 895             lex_tokench(lex, ch);
 896     }
 897     lexerror(lex, "unexpected end of file within string constant");
 898     lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
 899     return (lex->tok.ttype = TOKEN_ERROR);
 900 }
 901
 902 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
 903 {
 904     bool ishex = false;
 905
 906     int  ch = lastch;
 907
 908     /* parse a number... */
 909     if (ch == '.')
 910         lex->tok.ttype = TOKEN_FLOATCONST;
 911     else
 912         lex->tok.ttype = TOKEN_INTCONST;
 913
 914     lex_tokench(lex, ch);
 915
 916     ch = lex_getch(lex);
 917     if (ch != '.' && !isdigit(ch))
 918     {
 919         if (lastch != '0' || ch != 'x')
 920         {
 921             /* end of the number or EOF */
 922             lex_ungetch(lex, ch);
 923             lex_endtoken(lex);
 924
 925             lex->tok.constval.i = lastch - '0';
 926             return lex->tok.ttype;
 927         }
 928
 929         ishex = true;
 930     }
 931
 932     /* EOF would have been caught above */
 933
 934     if (ch != '.')
 935     {
 936         lex_tokench(lex, ch);
 937         ch = lex_getch(lex);
 938         while (isdigit(ch) || (ishex && isxdigit_only(ch)))
 939         {
 940             lex_tokench(lex, ch);
 941             ch = lex_getch(lex);
 942         }
 943     }
 944     /* NOT else, '.' can come from above as well */
 945     if (lex->tok.ttype != TOKEN_FLOATCONST && ch == '.' && !ishex)
 946     {
 947         /* Allow floating comma in non-hex mode */
 948         lex->tok.ttype = TOKEN_FLOATCONST;
 949         lex_tokench(lex, ch);
 950
 951         /* continue digits-only */
 952         ch = lex_getch(lex);
 953         while (isdigit(ch))
 954         {
 955             lex_tokench(lex, ch);
 956             ch = lex_getch(lex);
 957         }
 958     }
 959     /* put back the last character */
 960     /* but do not put back the trailing 'f' or a float */
 961     if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
 962         ch = lex_getch(lex);
 963
 964     /* generally we don't want words to follow numbers: */
 965     if (isident(ch)) {
 966         lexerror(lex, "unexpected trailing characters after number");
 967         return (lex->tok.ttype = TOKEN_ERROR);
 968     }
 969     lex_ungetch(lex, ch);
 970
 971     lex_endtoken(lex);
 972     if (lex->tok.ttype == TOKEN_FLOATCONST)
 973         lex->tok.constval.f = strtod(lex->tok.value, NULL);
 974     else
 975         lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
 976     return lex->tok.ttype;
 977 }
 978
 979 int lex_do(lex_file *lex)
 980 {
 981     int ch, nextch, thirdch;
 982     bool hadwhite = false;
 983
 984     lex_token_new(lex);
 985 #if 0
 986     if (!lex->tok)
 987         return TOKEN_FATAL;
 988 #endif
 989
 990     while (true) {
 991         ch = lex_skipwhite(lex, hadwhite);
 992         hadwhite = true;
 993         if (!lex->flags.mergelines || ch != '\\')
 994             break;
 995         ch = lex_getch(lex);
 996         if (ch == '\r')
 997             ch = lex_getch(lex);
 998         if (ch != '\n') {
 999             lex_ungetch(lex, ch);
1000             ch = '\\';
1001             break;
1002         }
1003         /* we reached a linemerge */
1004         lex_tokench(lex, '\n');
1005         continue;
1006     }
1007
1008     if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
1009         return (lex->tok.ttype = ch);
1010     }
1011
1012     lex->sline = lex->line;
1013     lex->tok.ctx.line = lex->sline;
1014     lex->tok.ctx.file = lex->name;
1015
1016     if (lex->eof)
1017         return (lex->tok.ttype = TOKEN_FATAL);
1018
1019     if (ch == EOF) {
1020         lex->eof = true;
1021         return (lex->tok.ttype = TOKEN_EOF);
1022     }
1023
1024     /* modelgen / spiritgen commands */
1025     if (ch == '$' && !lex->flags.preprocessing) {
1026         const char *v;
1027         size_t frame;
1028
1029         ch = lex_getch(lex);
1030         if (!isident_start(ch)) {
1031             lexerror(lex, "hanging '$' modelgen/spritegen command line");
1032             return lex_do(lex);
1033         }
1034         lex_tokench(lex, ch);
1035         if (!lex_finish_ident(lex))
1036             return (lex->tok.ttype = TOKEN_ERROR);
1037         lex_endtoken(lex);
1038         /* skip the known commands */
1039         v = lex->tok.value;
1040
1041         if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
1042         {
1043             /* frame/framesave command works like an enum
1044              * similar to fteqcc we handle this in the lexer.
1045              * The reason for this is that it is sensitive to newlines,
1046              * which the parser is unaware of
1047              */
1048             if (!lex_finish_frames(lex))
1049                  return (lex->tok.ttype = TOKEN_ERROR);
1050             return lex_do(lex);
1051         }
1052
1053         if (!strcmp(v, "framevalue"))
1054         {
1055             ch = lex_getch(lex);
1056             while (ch != EOF && isspace(ch) && ch != '\n')
1057                 ch = lex_getch(lex);
1058
1059             if (!isdigit(ch)) {
1060                 lexerror(lex, "$framevalue requires an integer parameter");
1061                 return lex_do(lex);
1062             }
1063
1064             lex_token_new(lex);
1065             lex->tok.ttype = lex_finish_digit(lex, ch);
1066             lex_endtoken(lex);
1067             if (lex->tok.ttype != TOKEN_INTCONST) {
1068                 lexerror(lex, "$framevalue requires an integer parameter");
1069                 return lex_do(lex);
1070             }
1071             lex->framevalue = lex->tok.constval.i;
1072             return lex_do(lex);
1073         }
1074
1075         if (!strcmp(v, "framerestore"))
1076         {
1077             int rc;
1078
1079             lex_token_new(lex);
1080
1081             rc = lex_parse_frame(lex);
1082
1083             if (rc > 0) {
1084                 lexerror(lex, "$framerestore requires a framename parameter");
1085                 return lex_do(lex);
1086             }
1087             if (rc < 0)
1088                 return (lex->tok.ttype = TOKEN_FATAL);
1089
1090             v = lex->tok.value;
1091             for (frame = 0; frame < vec_size(lex->frames); ++frame) {
1092                 if (!strcmp(v, lex->frames[frame].name)) {
1093                     lex->framevalue = lex->frames[frame].value;
1094                     return lex_do(lex);
1095                 }
1096             }
1097             lexerror(lex, "unknown framename `%s`", v);
1098             return lex_do(lex);
1099         }
1100
1101         if (!strcmp(v, "modelname"))
1102         {
1103             int rc;
1104
1105             lex_token_new(lex);
1106
1107             rc = lex_parse_frame(lex);
1108
1109             if (rc > 0) {
1110                 lexerror(lex, "$modelname requires a parameter");
1111                 return lex_do(lex);
1112             }
1113             if (rc < 0)
1114                 return (lex->tok.ttype = TOKEN_FATAL);
1115
1116             if (lex->modelname) {
1117                 frame_macro m;
1118                 m.value = lex->framevalue;
1119                 m.name = lex->modelname;
1120                 lex->modelname = NULL;
1121                 vec_push(lex->frames, m);
1122             }
1123             lex->modelname = lex->tok.value;
1124             lex->tok.value = NULL;
1125             return lex_do(lex);
1126         }
1127
1128         if (!strcmp(v, "flush"))
1129         {
1130             size_t fi;
1131             for (fi = 0; fi < vec_size(lex->frames); ++fi)
1132                 mem_d(lex->frames[fi].name);
1133             vec_free(lex->frames);
1134             /* skip line (fteqcc does it too) */
1135             ch = lex_getch(lex);
1136             while (ch != EOF && ch != '\n')
1137                 ch = lex_getch(lex);
1138             return lex_do(lex);
1139         }
1140
1141         if (!strcmp(v, "cd") ||
1142             !strcmp(v, "origin") ||
1143             !strcmp(v, "base") ||
1144             !strcmp(v, "flags") ||
1145             !strcmp(v, "scale") ||
1146             !strcmp(v, "skin"))
1147         {
1148             /* skip line */
1149             ch = lex_getch(lex);
1150             while (ch != EOF && ch != '\n')
1151                 ch = lex_getch(lex);
1152             return lex_do(lex);
1153         }
1154
1155         for (frame = 0; frame < vec_size(lex->frames); ++frame) {
1156             if (!strcmp(v, lex->frames[frame].name)) {
1157                 lex->tok.constval.i = lex->frames[frame].value;
1158                 return (lex->tok.ttype = TOKEN_INTCONST);
1159             }
1160         }
1161
1162         lexerror(lex, "invalid frame macro");
1163         return lex_do(lex);
1164     }
1165
1166     /* single-character tokens */
1167     switch (ch)
1168     {
1169         case '[':
1170             nextch = lex_getch(lex);
1171             if (nextch == '[') {
1172                 lex_tokench(lex, ch);
1173                 lex_tokench(lex, nextch);
1174                 lex_endtoken(lex);
1175                 return (lex->tok.ttype = TOKEN_ATTRIBUTE_OPEN);
1176             }
1177             lex_ungetch(lex, nextch);
1178             /* FALL THROUGH */
1179         case '(':
1180         case ':':
1181         case '?':
1182             lex_tokench(lex, ch);
1183             lex_endtoken(lex);
1184             if (lex->flags.noops)
1185                 return (lex->tok.ttype = ch);
1186             else
1187                 return (lex->tok.ttype = TOKEN_OPERATOR);
1188
1189         case ']':
1190             if (lex->flags.noops) {
1191                 nextch = lex_getch(lex);
1192                 if (nextch == ']') {
1193                     lex_tokench(lex, ch);
1194                     lex_tokench(lex, nextch);
1195                     lex_endtoken(lex);
1196                     return (lex->tok.ttype = TOKEN_ATTRIBUTE_CLOSE);
1197                 }
1198                 lex_ungetch(lex, nextch);
1199             }
1200             /* FALL THROUGH */
1201         case ')':
1202         case ';':
1203         case '{':
1204         case '}':
1205
1206         case '#':
1207             lex_tokench(lex, ch);
1208             lex_endtoken(lex);
1209             return (lex->tok.ttype = ch);
1210         default:
1211             break;
1212     }
1213
1214     if (ch == '.') {
1215         nextch = lex_getch(lex);
1216         /* digits starting with a dot */
1217         if (isdigit(nextch)) {
1218             lex_ungetch(lex, nextch);
1219             lex->tok.ttype = lex_finish_digit(lex, ch);
1220             lex_endtoken(lex);
1221             return lex->tok.ttype;
1222         }
1223         lex_ungetch(lex, nextch);
1224     }
1225
1226     if (lex->flags.noops)
1227     {
1228         /* Detect characters early which are normally
1229          * operators OR PART of an operator.
1230          */
1231         switch (ch)
1232         {
1233             /*
1234             case '+':
1235             case '-':
1236             */
1237             case '*':
1238             case '/':
1239             case '<':
1240             case '>':
1241             case '=':
1242             case '&':
1243             case '|':
1244             case '^':
1245             case '~':
1246             case ',':
1247             case '!':
1248                 lex_tokench(lex, ch);
1249                 lex_endtoken(lex);
1250                 return (lex->tok.ttype = ch);
1251             default:
1252                 break;
1253         }
1254     }
1255
1256     if (ch == '.')
1257     {
1258         lex_tokench(lex, ch);
1259         /* peak ahead once */
1260         nextch = lex_getch(lex);
1261         if (nextch != '.') {
1262             lex_ungetch(lex, nextch);
1263             lex_endtoken(lex);
1264             if (lex->flags.noops)
1265                 return (lex->tok.ttype = ch);
1266             else
1267                 return (lex->tok.ttype = TOKEN_OPERATOR);
1268         }
1269         /* peak ahead again */
1270         nextch = lex_getch(lex);
1271         if (nextch != '.') {
1272             lex_ungetch(lex, nextch);
1273             lex_ungetch(lex, '.');
1274             lex_endtoken(lex);
1275             if (lex->flags.noops)
1276                 return (lex->tok.ttype = ch);
1277             else
1278                 return (lex->tok.ttype = TOKEN_OPERATOR);
1279         }
1280         /* fill the token to be "..." */
1281         lex_tokench(lex, ch);
1282         lex_tokench(lex, ch);
1283         lex_endtoken(lex);
1284         return (lex->tok.ttype = TOKEN_DOTS);
1285     }
1286
1287     if (ch == ',' || ch == '.') {
1288         lex_tokench(lex, ch);
1289         lex_endtoken(lex);
1290         return (lex->tok.ttype = TOKEN_OPERATOR);
1291     }
1292
1293     if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
1294         ch == '>' || ch == '<' || /* <<, >>, <=, >=                  */
1295         ch == '=' || ch == '!' || /* <=>, ==, !=                     */
1296         ch == '&' || ch == '|' || /* &&, ||, &=, |=                  */
1297         ch == '~'                 /* ~=, ~                           */
1298     )  {
1299         lex_tokench(lex, ch);
1300
1301         nextch = lex_getch(lex);
1302         if ((nextch == '=' && ch != '<') || (nextch == ch && ch != '!')) {
1303             lex_tokench(lex, nextch);
1304         } else if (ch == '<' && nextch == '=') {
1305             lex_tokench(lex, nextch);
1306             if ((thirdch = lex_getch(lex)) == '>')
1307                 lex_tokench(lex, thirdch);
1308             else
1309                 lex_ungetch(lex, thirdch);
1310
1311         } else if (ch == '-' && nextch == '>') {
1312             lex_tokench(lex, nextch);
1313         } else if (ch == '&' && nextch == '~') {
1314             thirdch = lex_getch(lex);
1315             if (thirdch != '=') {
1316                 lex_ungetch(lex, thirdch);
1317                 lex_ungetch(lex, nextch);
1318             }
1319             else {
1320                 lex_tokench(lex, nextch);
1321                 lex_tokench(lex, thirdch);
1322             }
1323         }
1324         else if (lex->flags.preprocessing &&
1325                  ch == '-' && isdigit(nextch))
1326         {
1327             lex->tok.ttype = lex_finish_digit(lex, nextch);
1328             if (lex->tok.ttype == TOKEN_INTCONST)
1329                 lex->tok.constval.i = -lex->tok.constval.i;
1330             else
1331                 lex->tok.constval.f = -lex->tok.constval.f;
1332             lex_endtoken(lex);
1333             return lex->tok.ttype;
1334         } else {
1335             lex_ungetch(lex, nextch);
1336         }
1337
1338         lex_endtoken(lex);
1339         return (lex->tok.ttype = TOKEN_OPERATOR);
1340     }
1341
1342     /*
1343     if (ch == '^' || ch == '~' || ch == '!')
1344     {
1345         lex_tokench(lex, ch);
1346         lex_endtoken(lex);
1347         return (lex->tok.ttype = TOKEN_OPERATOR);
1348     }
1349     */
1350
1351     if (ch == '*' || ch == '/') /* *=, /= */
1352     {
1353         lex_tokench(lex, ch);
1354
1355         nextch = lex_getch(lex);
1356         if (nextch == '=' || nextch == '*') {
1357             lex_tokench(lex, nextch);
1358         } else
1359             lex_ungetch(lex, nextch);
1360
1361         lex_endtoken(lex);
1362         return (lex->tok.ttype = TOKEN_OPERATOR);
1363     }
1364
1365     if (ch == '%') {
1366         lex_tokench(lex, ch);
1367         lex_endtoken(lex);
1368         return (lex->tok.ttype = TOKEN_OPERATOR);
1369     }
1370
1371     if (isident_start(ch))
1372     {
1373         const char *v;
1374
1375         lex_tokench(lex, ch);
1376         if (!lex_finish_ident(lex)) {
1377             /* error? */
1378             return (lex->tok.ttype = TOKEN_ERROR);
1379         }
1380         lex_endtoken(lex);
1381         lex->tok.ttype = TOKEN_IDENT;
1382
1383         v = lex->tok.value;
1384         if (!strcmp(v, "void")) {
1385             lex->tok.ttype = TOKEN_TYPENAME;
1386             lex->tok.constval.t = TYPE_VOID;
1387         } else if (!strcmp(v, "int")) {
1388             lex->tok.ttype = TOKEN_TYPENAME;
1389             lex->tok.constval.t = TYPE_INTEGER;
1390         } else if (!strcmp(v, "float")) {
1391             lex->tok.ttype = TOKEN_TYPENAME;
1392             lex->tok.constval.t = TYPE_FLOAT;
1393         } else if (!strcmp(v, "string")) {
1394             lex->tok.ttype = TOKEN_TYPENAME;
1395             lex->tok.constval.t = TYPE_STRING;
1396         } else if (!strcmp(v, "entity")) {
1397             lex->tok.ttype = TOKEN_TYPENAME;
1398             lex->tok.constval.t = TYPE_ENTITY;
1399         } else if (!strcmp(v, "vector")) {
1400             lex->tok.ttype = TOKEN_TYPENAME;
1401             lex->tok.constval.t = TYPE_VECTOR;
1402         } else {
1403             size_t kw;
1404             for (kw = 0; kw < num_keywords_qc; ++kw) {
1405                 if (!strcmp(v, keywords_qc[kw]))
1406                     return (lex->tok.ttype = TOKEN_KEYWORD);
1407             }
1408             if (OPTS_OPTION_U32(OPTION_STANDARD) != COMPILER_QCC) {
1409                 for (kw = 0; kw < num_keywords_fg; ++kw) {
1410                     if (!strcmp(v, keywords_fg[kw]))
1411                         return (lex->tok.ttype = TOKEN_KEYWORD);
1412                 }
1413             }
1414         }
1415
1416         return lex->tok.ttype;
1417     }
1418
1419     if (ch == '"')
1420     {
1421         lex->flags.nodigraphs = true;
1422         if (lex->flags.preprocessing)
1423             lex_tokench(lex, ch);
1424         lex->tok.ttype = lex_finish_string(lex, '"');
1425         if (lex->flags.preprocessing)
1426             lex_tokench(lex, ch);
1427         while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
1428         {
1429             /* Allow c style "string" "continuation" */
1430             ch = lex_skipwhite(lex, false);
1431             if (ch != '"') {
1432                 lex_ungetch(lex, ch);
1433                 break;
1434             }
1435
1436             lex->tok.ttype = lex_finish_string(lex, '"');
1437         }
1438         lex->flags.nodigraphs = false;
1439         lex_endtoken(lex);
1440         return lex->tok.ttype;
1441     }
1442
1443     if (ch == '\'')
1444     {
1445         /* we parse character constants like string,
1446          * but return TOKEN_CHARCONST, or a vector type if it fits...
1447          * Likewise actual unescaping has to be done by the parser.
1448          * The difference is we don't allow 'char' 'continuation'.
1449          */
1450         if (lex->flags.preprocessing)
1451             lex_tokench(lex, ch);
1452         lex->tok.ttype = lex_finish_string(lex, '\'');
1453         if (lex->flags.preprocessing)
1454             lex_tokench(lex, ch);
1455         lex_endtoken(lex);
1456
1457         lex->tok.ttype = TOKEN_CHARCONST;
1458          /* It's a vector if we can successfully scan 3 floats */
1459 #ifdef _MSC_VER
1460         if (sscanf_s(lex->tok.value, " %f %f %f ",
1461                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1462 #else
1463         if (sscanf(lex->tok.value, " %f %f %f ",
1464                    &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1465 #endif
1466
1467         {
1468              lex->tok.ttype = TOKEN_VECTORCONST;
1469         }
1470         else
1471         {
1472             if (!lex->flags.preprocessing && strlen(lex->tok.value) > 1) {
1473                 uchar_t u8char;
1474                 /* check for a valid utf8 character */
1475                 if (!OPTS_FLAG(UTF8) || !u8_analyze(lex->tok.value, NULL, NULL, &u8char, 8)) {
1476                     if (lexwarn(lex, WARN_MULTIBYTE_CHARACTER,
1477                                 ( OPTS_FLAG(UTF8) ? "invalid multibyte character sequence `%s`"
1478                                                   : "multibyte character: `%s`" ),
1479                                 lex->tok.value))
1480                         return (lex->tok.ttype = TOKEN_ERROR);
1481                 }
1482                 else
1483                     lex->tok.constval.i = u8char;
1484             }
1485             else
1486                 lex->tok.constval.i = lex->tok.value[0];
1487         }
1488
1489         return lex->tok.ttype;
1490     }
1491
1492     if (isdigit(ch))
1493     {
1494         lex->tok.ttype = lex_finish_digit(lex, ch);
1495         lex_endtoken(lex);
1496         return lex->tok.ttype;
1497     }
1498
1499     if (lex->flags.preprocessing) {
1500         lex_tokench(lex, ch);
1501         lex_endtoken(lex);
1502         return (lex->tok.ttype = ch);
1503     }
1504
1505     lexerror(lex, "unknown token: `%s`", lex->tok.value);
1506     return (lex->tok.ttype = TOKEN_ERROR);
1507 }