9 MEM_VEC_FUNCTIONS(token, char, value)
10 MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
12 VECTOR_MAKE(char*, lex_filenames);
14 void lexerror(lex_file *lex, const char *fmt, ...)
19 vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
23 bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
26 int lvl = LVL_WARNING;
28 if (!OPTS_WARN(warntype))
35 vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
45 token *tok = (token*)mem_a(sizeof(token));
48 memset(tok, 0, sizeof(*tok));
52 void token_delete(token *self)
54 if (self->next && self->next->prev == self)
55 self->next->prev = self->prev;
56 if (self->prev && self->prev->next == self)
57 self->prev->next = self->next;
58 MEM_VECTOR_CLEAR(self, value);
62 token* token_copy(const token *cp)
64 token* self = token_new();
68 self->value_alloc = cp->value_count + 1;
69 self->value_count = cp->value_count;
70 self->value = (char*)mem_a(self->value_alloc);
75 memcpy(self->value, cp->value, cp->value_count);
76 self->value[self->value_alloc-1] = 0;
80 self->ttype = cp->ttype;
81 memcpy(&self->constval, &cp->constval, sizeof(self->constval));
85 void token_delete_all(token *t)
96 token* token_copy_all(const token *cp)
101 out = cur = token_copy(cp);
107 cur->next = token_copy(cp);
109 token_delete_all(out);
112 cur->next->prev = cur;
119 static void lex_token_new(lex_file *lex)
123 token_delete(lex->tok);
124 lex->tok = token_new();
126 lex->tok.value_count = 0;
127 lex->tok.constval.t = 0;
128 lex->tok.ctx.line = lex->sline;
129 lex->tok.ctx.file = lex->name;
134 lex_file* lex_open(const char *file)
137 FILE *in = util_fopen(file, "rb");
140 lexerror(NULL, "open failed: '%s'\n", file);
144 lex = (lex_file*)mem_a(sizeof(*lex));
147 lexerror(NULL, "out of memory\n");
151 memset(lex, 0, sizeof(*lex));
154 lex->name = util_strdup(file);
155 lex->line = 1; /* we start counting at 1 */
160 lex_filenames_add(lex->name);
165 void lex_cleanup(void)
168 for (i = 0; i < lex_filenames_elements; ++i)
169 mem_d(lex_filenames_data[i]);
170 mem_d(lex_filenames_data);
173 void lex_close(lex_file *lex)
176 for (i = 0; i < lex->frames_count; ++i)
177 mem_d(lex->frames[i].name);
178 MEM_VECTOR_CLEAR(lex, frames);
181 mem_d(lex->modelname);
187 token_delete(lex->tok);
189 MEM_VECTOR_CLEAR(&(lex->tok), value);
191 /* mem_d(lex->name); collected in lex_filenames */
195 /* Get or put-back data
196 * The following to functions do NOT understand what kind of data they
198 * The are merely wrapping get/put in order to count line numbers.
200 static void lex_ungetch(lex_file *lex, int ch);
201 static int lex_try_trigraph(lex_file *lex, int old)
204 c2 = fgetc(lex->file);
206 lex_ungetch(lex, c2);
210 c3 = fgetc(lex->file);
212 case '=': return '#';
213 case '/': return '\\';
214 case '\'': return '^';
215 case '(': return '[';
216 case ')': return ']';
217 case '!': return '|';
218 case '<': return '{';
219 case '>': return '}';
220 case '-': return '~';
222 lex_ungetch(lex, c3);
223 lex_ungetch(lex, c2);
228 static int lex_try_digraph(lex_file *lex, int ch)
231 c2 = fgetc(lex->file);
232 if (ch == '<' && c2 == ':')
234 else if (ch == ':' && c2 == '>')
236 else if (ch == '<' && c2 == '%')
238 else if (ch == '%' && c2 == '>')
240 else if (ch == '%' && c2 == ':')
242 lex_ungetch(lex, c2);
246 static int lex_getch(lex_file *lex)
252 if (lex->peek[lex->peekpos] == '\n')
254 return lex->peek[lex->peekpos];
257 ch = fgetc(lex->file);
261 return lex_try_trigraph(lex, ch);
262 else if (!lex->flags.nodigraphs && (ch == '<' || ch == ':' || ch == '%'))
263 return lex_try_digraph(lex, ch);
267 static void lex_ungetch(lex_file *lex, int ch)
269 lex->peek[lex->peekpos++] = ch;
274 /* classify characters
275 * some additions to the is*() functions of ctype.h
278 /* Idents are alphanumberic, but they start with alpha or _ */
279 static bool isident_start(int ch)
281 return isalpha(ch) || ch == '_';
284 static bool isident(int ch)
286 return isident_start(ch) || isdigit(ch);
289 /* isxdigit_only is used when we already know it's not a digit
290 * and want to see if it's a hex digit anyway.
292 static bool isxdigit_only(int ch)
294 return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
297 /* Skip whitespace and comments and return the first
298 * non-white character.
299 * As this makes use of the above getch() ungetch() functions,
300 * we don't need to care at all about line numbering anymore.
302 * In theory, this function should only be used at the beginning
303 * of lexing, or when we *know* the next character is part of the token.
304 * Otherwise, if the parser throws an error, the linenumber may not be
305 * the line of the error, but the line of the next token AFTER the error.
307 * This is currently only problematic when using c-like string-continuation,
308 * since comments and whitespaces are allowed between 2 such strings.
312 "A continuation of the previous string"
313 // This line is skipped
316 * In this case, if the parse decides it didn't actually want a string,
317 * and uses lex->line to print an error, it will show the ', foo);' line's
320 * On the other hand, the parser is supposed to remember the line of the next
321 * token's beginning. In this case we would want skipwhite() to be called
322 * AFTER reading a token, so that the parser, before reading the NEXT token,
323 * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
326 * here is to store the line of the first character after skipping
327 * the initial whitespace in lex->sline, this happens in lex_do.
329 static int lex_skipwhite(lex_file *lex)
336 while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
342 /* one line comment */
345 /* check for special: '/', '/', '*', '/' */
354 while (ch != EOF && ch != '\n') {
361 /* multiline comment */
371 ch = ' '; /* cause TRUE in the isspace check */
374 /* Otherwise roll back to the slash and break out of the loop */
375 lex_ungetch(lex, ch);
379 } while (ch != EOF && isspace(ch));
384 /* Append a character to the token buffer */
385 static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
387 if (!token_value_add(&lex->tok, ch)) {
388 lexerror(lex, "out of memory");
394 /* Append a trailing null-byte */
395 static bool GMQCC_WARN lex_endtoken(lex_file *lex)
397 if (!token_value_add(&lex->tok, 0)) {
398 lexerror(lex, "out of memory");
401 lex->tok.value_count--;
406 static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
411 while (ch != EOF && isident(ch))
413 if (!lex_tokench(lex, ch))
414 return (lex->tok.ttype = TOKEN_FATAL);
418 /* last ch was not an ident ch: */
419 lex_ungetch(lex, ch);
424 /* read one ident for the frame list */
425 static int lex_parse_frame(lex_file *lex)
432 while (ch != EOF && ch != '\n' && isspace(ch))
438 if (!isident_start(ch)) {
439 lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
443 if (!lex_tokench(lex, ch))
445 if (!lex_finish_ident(lex))
447 if (!lex_endtoken(lex))
452 /* read a list of $frames */
453 static bool lex_finish_frames(lex_file *lex)
460 rc = lex_parse_frame(lex);
461 if (rc > 0) /* end of line */
463 if (rc < 0) /* error */
466 for (i = 0; i < lex->frames_count; ++i) {
467 if (!strcmp(lex->tok.value, lex->frames[i].name)) {
468 lex->frames[i].value = lex->framevalue++;
469 if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value))
474 if (i < lex->frames_count)
477 m.value = lex->framevalue++;
478 m.name = lex->tok.value;
479 lex->tok.value = NULL;
480 lex->tok.value_alloc = lex->tok.value_count = 0;
481 if (!lex_file_frames_add(lex, m)) {
482 lexerror(lex, "out of memory");
488 static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
496 return TOKEN_STRINGCONST;
501 lexerror(lex, "unexpected end of file");
502 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
503 return (lex->tok.ttype = TOKEN_ERROR);
508 case 'a': ch = '\a'; break;
509 case 'b': ch = '\b'; break;
510 case 'r': ch = '\r'; break;
511 case 'n': ch = '\n'; break;
512 case 't': ch = '\t'; break;
513 case 'f': ch = '\f'; break;
514 case 'v': ch = '\v'; break;
516 lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
517 /* so we just add the character plus backslash no matter what it actually is */
518 if (!lex_tokench(lex, '\\'))
519 return (lex->tok.ttype = TOKEN_FATAL);
521 /* add the character finally */
522 if (!lex_tokench(lex, ch))
523 return (lex->tok.ttype = TOKEN_FATAL);
525 else if (!lex_tokench(lex, ch))
526 return (lex->tok.ttype = TOKEN_FATAL);
528 lexerror(lex, "unexpected end of file within string constant");
529 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
530 return (lex->tok.ttype = TOKEN_ERROR);
533 static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
539 /* parse a number... */
540 lex->tok.ttype = TOKEN_INTCONST;
542 if (!lex_tokench(lex, ch))
543 return (lex->tok.ttype = TOKEN_FATAL);
546 if (ch != '.' && !isdigit(ch))
548 if (lastch != '0' || ch != 'x')
550 /* end of the number or EOF */
551 lex_ungetch(lex, ch);
552 if (!lex_endtoken(lex))
553 return (lex->tok.ttype = TOKEN_FATAL);
555 lex->tok.constval.i = lastch - '0';
556 return lex->tok.ttype;
562 /* EOF would have been caught above */
566 if (!lex_tokench(lex, ch))
567 return (lex->tok.ttype = TOKEN_FATAL);
569 while (isdigit(ch) || (ishex && isxdigit_only(ch)))
571 if (!lex_tokench(lex, ch))
572 return (lex->tok.ttype = TOKEN_FATAL);
576 /* NOT else, '.' can come from above as well */
577 if (ch == '.' && !ishex)
579 /* Allow floating comma in non-hex mode */
580 lex->tok.ttype = TOKEN_FLOATCONST;
581 if (!lex_tokench(lex, ch))
582 return (lex->tok.ttype = TOKEN_FATAL);
584 /* continue digits-only */
588 if (!lex_tokench(lex, ch))
589 return (lex->tok.ttype = TOKEN_FATAL);
593 /* put back the last character */
594 /* but do not put back the trailing 'f' or a float */
595 if (lex->tok.ttype == TOKEN_FLOATCONST && ch == 'f')
598 /* generally we don't want words to follow numbers: */
600 lexerror(lex, "unexpected trailing characters after number");
601 return (lex->tok.ttype = TOKEN_ERROR);
603 lex_ungetch(lex, ch);
605 if (!lex_endtoken(lex))
606 return (lex->tok.ttype = TOKEN_FATAL);
607 if (lex->tok.ttype == TOKEN_FLOATCONST)
608 lex->tok.constval.f = strtod(lex->tok.value, NULL);
610 lex->tok.constval.i = strtol(lex->tok.value, NULL, 0);
611 return lex->tok.ttype;
614 int lex_do(lex_file *lex)
624 ch = lex_skipwhite(lex);
625 lex->sline = lex->line;
626 lex->tok.ctx.line = lex->sline;
627 lex->tok.ctx.file = lex->name;
630 return (lex->tok.ttype = TOKEN_FATAL);
634 return (lex->tok.ttype = TOKEN_EOF);
637 /* modelgen / spiritgen commands */
643 if (!isident_start(ch)) {
644 lexerror(lex, "hanging '$' modelgen/spritegen command line");
647 if (!lex_tokench(lex, ch))
648 return (lex->tok.ttype = TOKEN_FATAL);
649 if (!lex_finish_ident(lex))
650 return (lex->tok.ttype = TOKEN_ERROR);
651 if (!lex_endtoken(lex))
652 return (lex->tok.ttype = TOKEN_FATAL);
653 /* skip the known commands */
656 if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
658 /* frame/framesave command works like an enum
659 * similar to fteqcc we handle this in the lexer.
660 * The reason for this is that it is sensitive to newlines,
661 * which the parser is unaware of
663 if (!lex_finish_frames(lex))
664 return (lex->tok.ttype = TOKEN_ERROR);
668 if (!strcmp(v, "framevalue"))
671 while (ch != EOF && isspace(ch) && ch != '\n')
675 lexerror(lex, "$framevalue requires an integer parameter");
680 lex->tok.ttype = lex_finish_digit(lex, ch);
681 if (!lex_endtoken(lex))
682 return (lex->tok.ttype = TOKEN_FATAL);
683 if (lex->tok.ttype != TOKEN_INTCONST) {
684 lexerror(lex, "$framevalue requires an integer parameter");
687 lex->framevalue = lex->tok.constval.i;
691 if (!strcmp(v, "framerestore"))
697 rc = lex_parse_frame(lex);
700 lexerror(lex, "$framerestore requires a framename parameter");
704 return (lex->tok.ttype = TOKEN_FATAL);
707 for (frame = 0; frame < lex->frames_count; ++frame) {
708 if (!strcmp(v, lex->frames[frame].name)) {
709 lex->framevalue = lex->frames[frame].value;
713 lexerror(lex, "unknown framename `%s`", v);
717 if (!strcmp(v, "modelname"))
723 rc = lex_parse_frame(lex);
726 lexerror(lex, "$framerestore requires a framename parameter");
730 return (lex->tok.ttype = TOKEN_FATAL);
733 if (lex->modelname) {
735 m.value = lex->framevalue;
736 m.name = lex->modelname;
737 lex->modelname = NULL;
738 if (!lex_file_frames_add(lex, m)) {
739 lexerror(lex, "out of memory");
740 return (lex->tok.ttype = TOKEN_FATAL);
743 lex->modelname = lex->tok.value;
744 lex->tok.value = NULL;
745 lex->tok.value_alloc = lex->tok.value_count = 0;
746 for (frame = 0; frame < lex->frames_count; ++frame) {
747 if (!strcmp(v, lex->frames[frame].name)) {
748 lex->framevalue = lex->frames[frame].value;
755 if (!strcmp(v, "flush"))
758 for (frame = 0; frame < lex->frames_count; ++frame)
759 mem_d(lex->frames[frame].name);
760 MEM_VECTOR_CLEAR(lex, frames);
761 /* skip line (fteqcc does it too) */
763 while (ch != EOF && ch != '\n')
768 if (!strcmp(v, "cd") ||
769 !strcmp(v, "origin") ||
770 !strcmp(v, "base") ||
771 !strcmp(v, "flags") ||
772 !strcmp(v, "scale") ||
777 while (ch != EOF && ch != '\n')
782 for (frame = 0; frame < lex->frames_count; ++frame) {
783 if (!strcmp(v, lex->frames[frame].name)) {
784 lex->tok.constval.i = lex->frames[frame].value;
785 return (lex->tok.ttype = TOKEN_INTCONST);
789 lexerror(lex, "invalid frame macro");
793 /* single-character tokens */
797 if (!lex_tokench(lex, ch) ||
800 return (lex->tok.ttype = TOKEN_FATAL);
802 if (lex->flags.noops)
803 return (lex->tok.ttype = ch);
805 return (lex->tok.ttype = TOKEN_OPERATOR);
814 if (!lex_tokench(lex, ch) ||
817 return (lex->tok.ttype = TOKEN_FATAL);
819 return (lex->tok.ttype = ch);
824 if (lex->flags.noops)
826 /* Detect characters early which are normally
827 * operators OR PART of an operator.
844 if (!lex_tokench(lex, ch) ||
847 return (lex->tok.ttype = TOKEN_FATAL);
849 return (lex->tok.ttype = ch);
856 if (!lex_tokench(lex, ch))
857 return (lex->tok.ttype = TOKEN_FATAL);
858 /* peak ahead once */
859 nextch = lex_getch(lex);
861 lex_ungetch(lex, nextch);
862 if (!lex_endtoken(lex))
863 return (lex->tok.ttype = TOKEN_FATAL);
864 return (lex->tok.ttype = ch);
866 /* peak ahead again */
867 nextch = lex_getch(lex);
869 lex_ungetch(lex, nextch);
870 lex_ungetch(lex, nextch);
871 if (!lex_endtoken(lex))
872 return (lex->tok.ttype = TOKEN_FATAL);
873 return (lex->tok.ttype = ch);
875 /* fill the token to be "..." */
876 if (!lex_tokench(lex, ch) ||
877 !lex_tokench(lex, ch) ||
880 return (lex->tok.ttype = TOKEN_FATAL);
882 return (lex->tok.ttype = TOKEN_DOTS);
886 if (ch == ',' || ch == '.') {
887 if (!lex_tokench(lex, ch) ||
890 return (lex->tok.ttype = TOKEN_FATAL);
892 return (lex->tok.ttype = TOKEN_OPERATOR);
895 if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */
896 ch == '>' || ch == '<' || /* <<, >>, <=, >= */
897 ch == '=' || ch == '!' || /* ==, != */
898 ch == '&' || ch == '|') /* &&, ||, &=, |= */
900 if (!lex_tokench(lex, ch))
901 return (lex->tok.ttype = TOKEN_FATAL);
903 nextch = lex_getch(lex);
904 if (nextch == ch || nextch == '=') {
905 if (!lex_tokench(lex, nextch))
906 return (lex->tok.ttype = TOKEN_FATAL);
907 } else if (ch == '-' && nextch == '>') {
908 if (!lex_tokench(lex, nextch))
909 return (lex->tok.ttype = TOKEN_FATAL);
911 lex_ungetch(lex, nextch);
913 if (!lex_endtoken(lex))
914 return (lex->tok.ttype = TOKEN_FATAL);
915 return (lex->tok.ttype = TOKEN_OPERATOR);
919 if (ch == '^' || ch == '~' || ch == '!')
921 if (!lex_tokench(lex, ch) ||
924 return (lex->tok.ttype = TOKEN_FATAL);
926 return (lex->tok.ttype = TOKEN_OPERATOR);
930 if (ch == '*' || ch == '/') /* *=, /= */
932 if (!lex_tokench(lex, ch))
933 return (lex->tok.ttype = TOKEN_FATAL);
935 nextch = lex_getch(lex);
937 if (!lex_tokench(lex, nextch))
938 return (lex->tok.ttype = TOKEN_FATAL);
940 lex_ungetch(lex, nextch);
942 if (!lex_endtoken(lex))
943 return (lex->tok.ttype = TOKEN_FATAL);
944 return (lex->tok.ttype = TOKEN_OPERATOR);
947 if (isident_start(ch))
951 if (!lex_tokench(lex, ch))
952 return (lex->tok.ttype = TOKEN_FATAL);
953 if (!lex_finish_ident(lex)) {
955 return (lex->tok.ttype = TOKEN_ERROR);
957 if (!lex_endtoken(lex))
958 return (lex->tok.ttype = TOKEN_FATAL);
959 lex->tok.ttype = TOKEN_IDENT;
962 if (!strcmp(v, "void")) {
963 lex->tok.ttype = TOKEN_TYPENAME;
964 lex->tok.constval.t = TYPE_VOID;
965 } else if (!strcmp(v, "int")) {
966 lex->tok.ttype = TOKEN_TYPENAME;
967 lex->tok.constval.t = TYPE_INTEGER;
968 } else if (!strcmp(v, "float")) {
969 lex->tok.ttype = TOKEN_TYPENAME;
970 lex->tok.constval.t = TYPE_FLOAT;
971 } else if (!strcmp(v, "string")) {
972 lex->tok.ttype = TOKEN_TYPENAME;
973 lex->tok.constval.t = TYPE_STRING;
974 } else if (!strcmp(v, "entity")) {
975 lex->tok.ttype = TOKEN_TYPENAME;
976 lex->tok.constval.t = TYPE_ENTITY;
977 } else if (!strcmp(v, "vector")) {
978 lex->tok.ttype = TOKEN_TYPENAME;
979 lex->tok.constval.t = TYPE_VECTOR;
980 } else if (!strcmp(v, "for") ||
981 !strcmp(v, "while") ||
984 !strcmp(v, "else") ||
985 !strcmp(v, "local") ||
986 !strcmp(v, "return") ||
989 lex->tok.ttype = TOKEN_KEYWORD;
991 else if (opts_standard != COMPILER_QCC)
993 /* other standards reserve these keywords */
994 if (!strcmp(v, "switch") ||
995 !strcmp(v, "struct") ||
996 !strcmp(v, "union") ||
997 !strcmp(v, "break") ||
998 !strcmp(v, "continue"))
1000 lex->tok.ttype = TOKEN_KEYWORD;
1004 return lex->tok.ttype;
1009 lex->flags.nodigraphs = true;
1010 lex->tok.ttype = lex_finish_string(lex, '"');
1011 while (lex->tok.ttype == TOKEN_STRINGCONST)
1013 /* Allow c style "string" "continuation" */
1014 ch = lex_skipwhite(lex);
1016 lex_ungetch(lex, ch);
1020 lex->tok.ttype = lex_finish_string(lex, '"');
1022 lex->flags.nodigraphs = false;
1023 if (!lex_endtoken(lex))
1024 return (lex->tok.ttype = TOKEN_FATAL);
1025 return lex->tok.ttype;
1030 /* we parse character constants like string,
1031 * but return TOKEN_CHARCONST, or a vector type if it fits...
1032 * Likewise actual unescaping has to be done by the parser.
1033 * The difference is we don't allow 'char' 'continuation'.
1035 lex->tok.ttype = lex_finish_string(lex, '\'');
1036 if (!lex_endtoken(lex))
1037 return (lex->tok.ttype = TOKEN_FATAL);
1039 /* It's a vector if we can successfully scan 3 floats */
1041 if (sscanf_s(lex->tok.value, " %f %f %f ",
1042 &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1044 if (sscanf(lex->tok.value, " %f %f %f ",
1045 &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
1048 lex->tok.ttype = TOKEN_VECTORCONST;
1051 return lex->tok.ttype;
1056 lex->tok.ttype = lex_finish_digit(lex, ch);
1057 if (!lex_endtoken(lex))
1058 return (lex->tok.ttype = TOKEN_FATAL);
1059 return lex->tok.ttype;
1062 lexerror(lex, "unknown token");
1063 return (lex->tok.ttype = TOKEN_ERROR);