9 MEM_VEC_FUNCTIONS(token, char, value)
10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
12 VECTOR_MAKE(char*, lex_filenames);
14 void lexerror(lex_file *lex, const char *fmt, ...)
19 printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline);
30 void lexwarn(lex_file *lex, int warn, const char *fmt, ...)
38 printf("warning %s:%lu: ", lex->name, (unsigned long)lex->sline);
51 token *tok = (token*)mem_a(sizeof(token));
54 memset(tok, 0, sizeof(*tok));
58 void token_delete(token *self)
60 if (self->next && self->next->prev == self)
61 self->next->prev = self->prev;
62 if (self->prev && self->prev->next == self)
63 self->prev->next = self->next;
64 MEM_VECTOR_CLEAR(self, value);
68 token* token_copy(const token *cp)
70 token* self = token_new();
74 self->value_alloc = cp->value_count + 1;
75 self->value_count = cp->value_count;
76 self->value = (char*)mem_a(self->value_alloc);
81 memcpy(self->value, cp->value, cp->value_count);
82 self->value[self->value_alloc-1] = 0;
86 self->ttype = cp->ttype;
87 memcpy(&self->constval, &cp->constval, sizeof(self->constval));
91 void token_delete_all(token *t)
102 token* token_copy_all(const token *cp)
107 out = cur = token_copy(cp);
113 cur->next = token_copy(cp);
115 token_delete_all(out);
118 cur->next->prev = cur;
125 lex_file* lex_open(const char *file)
128 FILE *in = util_fopen(file, "rb");
131 lexerror(NULL, "open failed: '%s'\n", file);
135 lex = (lex_file*)mem_a(sizeof(*lex));
138 lexerror(NULL, "out of memory\n");
142 memset(lex, 0, sizeof(*lex));
145 lex->name = util_strdup(file);
146 lex->line = 1; /* we start counting at 1 */
150 lex_filenames_add(lex->name);
155 void lex_cleanup(void)
158 for (i = 0; i < lex_filenames_elements; ++i)
159 mem_d(lex_filenames_data[i]);
160 mem_d(lex_filenames_data);
163 void lex_close(lex_file *lex)
166 for (i = 0; i < lex->frames_count; ++i)
167 mem_d(lex->frames[i].name);
168 MEM_VECTOR_CLEAR(lex, frames);
171 mem_d(lex->modelname);
176 token_delete(lex->tok);
177 /* mem_d(lex->name); collected in lex_filenames */
181 /* Get or put-back data
182 * The following to functions do NOT understand what kind of data they
184 * The are merely wrapping get/put in order to count line numbers.
186 static int lex_getch(lex_file *lex)
192 if (lex->peek[lex->peekpos] == '\n')
194 return lex->peek[lex->peekpos];
197 ch = fgetc(lex->file);
203 static void lex_ungetch(lex_file *lex, int ch)
205 lex->peek[lex->peekpos++] = ch;
210 /* classify characters
211 * some additions to the is*() functions of ctype.h
214 /* Idents are alphanumberic, but they start with alpha or _ */
215 static bool isident_start(int ch)
217 return isalpha(ch) || ch == '_';
220 static bool isident(int ch)
222 return isident_start(ch) || isdigit(ch);
225 /* isxdigit_only is used when we already know it's not a digit
226 * and want to see if it's a hex digit anyway.
228 static bool isxdigit_only(int ch)
230 return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
233 /* Skip whitespace and comments and return the first
234 * non-white character.
235 * As this makes use of the above getch() ungetch() functions,
236 * we don't need to care at all about line numbering anymore.
238 * In theory, this function should only be used at the beginning
239 * of lexing, or when we *know* the next character is part of the token.
240 * Otherwise, if the parser throws an error, the linenumber may not be
241 * the line of the error, but the line of the next token AFTER the error.
243 * This is currently only problematic when using c-like string-continuation,
244 * since comments and whitespaces are allowed between 2 such strings.
248 "A continuation of the previous string"
249 // This line is skipped
252 * In this case, if the parse decides it didn't actually want a string,
253 * and uses lex->line to print an error, it will show the ', foo);' line's
256 * On the other hand, the parser is supposed to remember the line of the next
257 * token's beginning. In this case we would want skipwhite() to be called
258 * AFTER reading a token, so that the parser, before reading the NEXT token,
259 * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
262 * here is to store the line of the first character after skipping
263 * the initial whitespace in lex->sline, this happens in lex_do.
265 static int lex_skipwhite(lex_file *lex)
272 while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
278 /* one line comment */
281 /* check for special: '/', '/', '*', '/' */
290 while (ch != EOF && ch != '\n') {
297 /* multiline comment */
309 if (ch == '/') /* allow *//* direct following comment */
311 lex_ungetch(lex, ch);
312 ch = ' '; /* cause TRUE in the isspace check */
316 /* Otherwise roll back to the slash and break out of the loop */
317 lex_ungetch(lex, ch);
321 } while (ch != EOF && isspace(ch));
326 /* Append a character to the token buffer */
327 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
329 if (!token_value_add(lex->tok, ch)) {
330 lexerror(lex, "out of memory");
336 /* Append a trailing null-byte */
337 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
339 if (!token_value_add(lex->tok, 0)) {
340 lexerror(lex, "out of memory");
343 lex->tok->value_count--;
348 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
353 while (ch != EOF && isident(ch))
355 if (!lex_tokench(lex, ch))
356 return (lex->tok->ttype = TOKEN_FATAL);
360 /* last ch was not an ident ch: */
361 lex_ungetch(lex, ch);
366 /* read one ident for the frame list */
367 static int lex_parse_frame(lex_file *lex)
372 token_delete(lex->tok);
373 lex->tok = token_new();
376 while (ch != EOF && ch != '\n' && isspace(ch))
382 if (!isident_start(ch)) {
383 lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
387 if (!lex_tokench(lex, ch))
389 if (!lex_finish_ident(lex))
391 if (!lex_endtoken(lex))
396 /* read a list of $frames */
397 static bool lex_finish_frames(lex_file *lex)
403 rc = lex_parse_frame(lex);
404 if (rc > 0) /* end of line */
406 if (rc < 0) /* error */
409 m.value = lex->framevalue++;
410 m.name = lex->tok->value;
411 lex->tok->value = NULL;
412 if (!lex_file_frames_add(lex, m)) {
413 lexerror(lex, "out of memory");
419 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
427 return TOKEN_STRINGCONST;
432 lexerror(lex, "unexpected end of file");
433 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
434 return (lex->tok->ttype = TOKEN_ERROR);
439 case 'a': ch = '\a'; break;
440 case 'b': ch = '\b'; break;
441 case 'r': ch = '\r'; break;
442 case 'n': ch = '\n'; break;
443 case 't': ch = '\t'; break;
444 case 'f': ch = '\f'; break;
445 case 'v': ch = '\v'; break;
447 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
448 /* so we just add the character plus backslash no matter what it actually is */
449 if (!lex_tokench(lex, '\\'))
450 return (lex->tok->ttype = TOKEN_FATAL);
452 /* add the character finally */
453 if (!lex_tokench(lex, ch))
454 return (lex->tok->ttype = TOKEN_FATAL);
456 else if (!lex_tokench(lex, ch))
457 return (lex->tok->ttype = TOKEN_FATAL);
459 lexerror(lex, "unexpected end of file within string constant");
460 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
461 return (lex->tok->ttype = TOKEN_ERROR);
464 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
470 /* parse a number... */
471 lex->tok->ttype = TOKEN_INTCONST;
473 if (!lex_tokench(lex, ch))
474 return (lex->tok->ttype = TOKEN_FATAL);
477 if (ch != '.' && !isdigit(ch))
479 if (lastch != '0' || ch != 'x')
481 /* end of the number or EOF */
482 lex_ungetch(lex, ch);
483 if (!lex_endtoken(lex))
484 return (lex->tok->ttype = TOKEN_FATAL);
486 lex->tok->constval.i = lastch - '0';
487 return lex->tok->ttype;
493 /* EOF would have been caught above */
497 if (!lex_tokench(lex, ch))
498 return (lex->tok->ttype = TOKEN_FATAL);
500 while (isdigit(ch) || (ishex && isxdigit_only(ch)))
502 if (!lex_tokench(lex, ch))
503 return (lex->tok->ttype = TOKEN_FATAL);
507 /* NOT else, '.' can come from above as well */
508 if (ch == '.' && !ishex)
510 /* Allow floating comma in non-hex mode */
511 lex->tok->ttype = TOKEN_FLOATCONST;
512 if (!lex_tokench(lex, ch))
513 return (lex->tok->ttype = TOKEN_FATAL);
515 /* continue digits-only */
519 if (!lex_tokench(lex, ch))
520 return (lex->tok->ttype = TOKEN_FATAL);
524 /* put back the last character */
525 /* but do not put back the trailing 'f' or a float */
526 if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f')
529 /* generally we don't want words to follow numbers: */
531 lexerror(lex, "unexpected trailing characters after number");
532 return (lex->tok->ttype = TOKEN_ERROR);
534 lex_ungetch(lex, ch);
536 if (!lex_endtoken(lex))
537 return (lex->tok->ttype = TOKEN_FATAL);
538 if (lex->tok->ttype == TOKEN_FLOATCONST)
539 lex->tok->constval.f = strtod(lex->tok->value, NULL);
541 lex->tok->constval.i = strtol(lex->tok->value, NULL, 0);
542 return lex->tok->ttype;
545 int lex_do(lex_file *lex)
550 token_delete(lex->tok);
551 lex->tok = token_new();
555 ch = lex_skipwhite(lex);
556 lex->sline = lex->line;
557 lex->tok->ctx.line = lex->sline;
558 lex->tok->ctx.file = lex->name;
561 return (lex->tok->ttype = TOKEN_EOF);
563 /* modelgen / spiritgen commands */
569 if (!isident_start(ch)) {
570 lexerror(lex, "hanging '$' modelgen/spritegen command line");
573 if (!lex_tokench(lex, ch))
574 return (lex->tok->ttype = TOKEN_FATAL);
575 if (!lex_finish_ident(lex))
576 return (lex->tok->ttype = TOKEN_ERROR);
577 if (!lex_endtoken(lex))
578 return (lex->tok->ttype = TOKEN_FATAL);
579 /* skip the known commands */
582 if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
584 /* frame/framesave command works like an enum
585 * similar to fteqcc we handle this in the lexer.
586 * The reason for this is that it is sensitive to newlines,
587 * which the parser is unaware of
589 if (!lex_finish_frames(lex))
590 return (lex->tok->ttype = TOKEN_ERROR);
594 if (!strcmp(v, "framevalue"))
597 while (ch != EOF && isspace(ch) && ch != '\n')
601 lexerror(lex, "$framevalue requires an integer parameter");
605 token_delete(lex->tok);
606 lex->tok = token_new();
607 lex->tok->ttype = lex_finish_digit(lex, ch);
608 if (!lex_endtoken(lex))
609 return (lex->tok->ttype = TOKEN_FATAL);
610 if (lex->tok->ttype != TOKEN_INTCONST) {
611 lexerror(lex, "$framevalue requires an integer parameter");
614 lex->framevalue = lex->tok->constval.i;
618 if (!strcmp(v, "framerestore"))
622 token_delete(lex->tok);
623 lex->tok = token_new();
625 rc = lex_parse_frame(lex);
628 lexerror(lex, "$framerestore requires a framename parameter");
632 return (lex->tok->ttype = TOKEN_FATAL);
635 for (frame = 0; frame < lex->frames_count; ++frame) {
636 if (!strcmp(v, lex->frames[frame].name)) {
637 lex->framevalue = lex->frames[frame].value;
641 lexerror(lex, "unknown framename `%s`", v);
645 if (!strcmp(v, "modelname"))
649 token_delete(lex->tok);
650 lex->tok = token_new();
652 rc = lex_parse_frame(lex);
655 lexerror(lex, "$framerestore requires a framename parameter");
659 return (lex->tok->ttype = TOKEN_FATAL);
662 if (lex->modelname) {
664 m.value = lex->framevalue;
665 m.name = lex->modelname;
666 lex->modelname = NULL;
667 if (!lex_file_frames_add(lex, m)) {
668 lexerror(lex, "out of memory");
669 return (lex->tok->ttype = TOKEN_FATAL);
672 lex->modelname = lex->tok->value;
673 lex->tok->value = NULL;
674 for (frame = 0; frame < lex->frames_count; ++frame) {
675 if (!strcmp(v, lex->frames[frame].name)) {
676 lex->framevalue = lex->frames[frame].value;
683 if (!strcmp(v, "flush"))
686 for (frame = 0; frame < lex->frames_count; ++frame)
687 mem_d(lex->frames[frame].name);
688 MEM_VECTOR_CLEAR(lex, frames);
689 /* skip line (fteqcc does it too) */
691 while (ch != EOF && ch != '\n')
696 if (!strcmp(v, "cd") ||
697 !strcmp(v, "origin") ||
698 !strcmp(v, "base") ||
699 !strcmp(v, "flags") ||
700 !strcmp(v, "scale") ||
705 while (ch != EOF && ch != '\n')
710 for (frame = 0; frame < lex->frames_count; ++frame) {
711 if (!strcmp(v, lex->frames[frame].name)) {
712 lex->tok->constval.i = lex->frames[frame].value;
713 return (lex->tok->ttype = TOKEN_INTCONST);
717 lexerror(lex, "invalid frame macro");
721 /* single-character tokens */
725 if (!lex_tokench(lex, ch) ||
728 return (lex->tok->ttype = TOKEN_FATAL);
730 if (lex->flags.noops)
731 return (lex->tok->ttype = ch);
733 return (lex->tok->ttype = TOKEN_OPERATOR);
742 if (!lex_tokench(lex, ch) ||
745 return (lex->tok->ttype = TOKEN_FATAL);
747 return (lex->tok->ttype = ch);
752 if (lex->flags.noops)
754 /* Detect characters early which are normally
755 * operators OR PART of an operator.
773 if (!lex_tokench(lex, ch) ||
776 return (lex->tok->ttype = TOKEN_FATAL);
778 return (lex->tok->ttype = ch);
784 if (ch == ',' || ch == '.') {
785 if (!lex_tokench(lex, ch) ||
788 return (lex->tok->ttype = TOKEN_FATAL);
790 return (lex->tok->ttype = TOKEN_OPERATOR);
793 if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */
794 ch == '>' || ch == '<' || /* <<, >>, <=, >= */
795 ch == '=' || ch == '!' || /* ==, != */
796 ch == '&' || ch == '|') /* &&, ||, &=, |= */
798 if (!lex_tokench(lex, ch))
799 return (lex->tok->ttype = TOKEN_FATAL);
801 nextch = lex_getch(lex);
802 if (nextch == ch || nextch == '=') {
803 if (!lex_tokench(lex, nextch))
804 return (lex->tok->ttype = TOKEN_FATAL);
805 } else if (ch == '-' && nextch == '>') {
806 if (!lex_tokench(lex, nextch))
807 return (lex->tok->ttype = TOKEN_FATAL);
809 lex_ungetch(lex, nextch);
811 if (!lex_endtoken(lex))
812 return (lex->tok->ttype = TOKEN_FATAL);
813 return (lex->tok->ttype = TOKEN_OPERATOR);
817 if (ch == '^' || ch == '~' || ch == '!')
819 if (!lex_tokench(lex, ch) ||
822 return (lex->tok->ttype = TOKEN_FATAL);
824 return (lex->tok->ttype = TOKEN_OPERATOR);
828 if (ch == '*' || ch == '/') /* *=, /= */
830 if (!lex_tokench(lex, ch))
831 return (lex->tok->ttype = TOKEN_FATAL);
833 nextch = lex_getch(lex);
835 if (!lex_tokench(lex, nextch))
836 return (lex->tok->ttype = TOKEN_FATAL);
838 lex_ungetch(lex, nextch);
840 if (!lex_endtoken(lex))
841 return (lex->tok->ttype = TOKEN_FATAL);
842 return (lex->tok->ttype = TOKEN_OPERATOR);
845 if (isident_start(ch))
849 if (!lex_tokench(lex, ch))
850 return (lex->tok->ttype = TOKEN_FATAL);
851 if (!lex_finish_ident(lex)) {
853 return (lex->tok->ttype = TOKEN_ERROR);
855 if (!lex_endtoken(lex))
856 return (lex->tok->ttype = TOKEN_FATAL);
857 lex->tok->ttype = TOKEN_IDENT;
860 if (!strcmp(v, "void")) {
861 lex->tok->ttype = TOKEN_TYPENAME;
862 lex->tok->constval.t = TYPE_VOID;
863 } else if (!strcmp(v, "int")) {
864 lex->tok->ttype = TOKEN_TYPENAME;
865 lex->tok->constval.t = TYPE_INTEGER;
866 } else if (!strcmp(v, "float")) {
867 lex->tok->ttype = TOKEN_TYPENAME;
868 lex->tok->constval.t = TYPE_FLOAT;
869 } else if (!strcmp(v, "string")) {
870 lex->tok->ttype = TOKEN_TYPENAME;
871 lex->tok->constval.t = TYPE_STRING;
872 } else if (!strcmp(v, "entity")) {
873 lex->tok->ttype = TOKEN_TYPENAME;
874 lex->tok->constval.t = TYPE_ENTITY;
875 } else if (!strcmp(v, "vector")) {
876 lex->tok->ttype = TOKEN_TYPENAME;
877 lex->tok->constval.t = TYPE_VECTOR;
878 } else if (!strcmp(v, "for") ||
879 !strcmp(v, "while") ||
882 !strcmp(v, "else") ||
883 !strcmp(v, "local") ||
884 !strcmp(v, "return") ||
886 lex->tok->ttype = TOKEN_KEYWORD;
888 return lex->tok->ttype;
893 lex->tok->ttype = lex_finish_string(lex, '"');
894 while (lex->tok->ttype == TOKEN_STRINGCONST)
896 /* Allow c style "string" "continuation" */
897 ch = lex_skipwhite(lex);
899 lex_ungetch(lex, ch);
903 lex->tok->ttype = lex_finish_string(lex, '"');
905 if (!lex_endtoken(lex))
906 return (lex->tok->ttype = TOKEN_FATAL);
907 return lex->tok->ttype;
912 /* we parse character constants like string,
913 * but return TOKEN_CHARCONST, or a vector type if it fits...
914 * Likewise actual unescaping has to be done by the parser.
915 * The difference is we don't allow 'char' 'continuation'.
917 lex->tok->ttype = lex_finish_string(lex, '\'');
918 if (!lex_endtoken(lex))
919 return (lex->tok->ttype = TOKEN_FATAL);
921 /* It's a vector if we can successfully scan 3 floats */
923 if (sscanf_s(lex->tok->value, " %f %f %f ",
924 &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
926 if (sscanf(lex->tok->value, " %f %f %f ",
927 &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
930 lex->tok->ttype = TOKEN_VECTORCONST;
933 return lex->tok->ttype;
938 lex->tok->ttype = lex_finish_digit(lex, ch);
939 if (!lex_endtoken(lex))
940 return (lex->tok->ttype = TOKEN_FATAL);
941 return lex->tok->ttype;
944 lexerror(lex, "unknown token");
945 return (lex->tok->ttype = TOKEN_ERROR);