- int ch, nextch;
-
- if (lex->tok)
- token_delete(lex->tok);
- lex->tok = token_new();
- if (!lex->tok)
- return TOKEN_FATAL;
-
- ch = lex_skipwhite(lex);
- lex->sline = lex->line;
- lex->tok->ctx.line = lex->sline;
- lex->tok->ctx.file = lex->name;
-
- if (ch == EOF)
- return (lex->tok->ttype = TOKEN_EOF);
-
- /* single-character tokens */
- switch (ch)
- {
- case ';':
- case '(':
- case ')':
- case '{':
- case '}':
- case '[':
- case ']':
-
- case ',':
-
- return (lex->tok->ttype = ch);
- default:
- break;
- }
-
- if (lex->flags.noops)
- {
- /* Detect characters early which are normally
- * operators OR PART of an operator.
- */
- switch (ch)
- {
- case '+':
- case '-':
- case '*':
- case '/':
- case '<':
- case '>':
- case '=':
- case '&':
- case '|':
- case '^':
- case '~':
- return ch;
- default:
- break;
- }
- }
-
- if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */
- ch == '>' || ch == '<' || /* <<, >>, <=, >= */
- ch == '=' || /* == */
- ch == '&' || ch == '|') /* &&, ||, &=, |= */
- {
- if (!lex_tokench(lex, ch))
- return (lex->tok->ttype = TOKEN_FATAL);
-
- nextch = lex_getch(lex);
- if (nextch == ch || nextch == '=') {
- if (!lex_tokench(lex, nextch))
- return (lex->tok->ttype = TOKEN_FATAL);
- } else if (ch == '-' && nextch == '>') {
- if (!lex_tokench(lex, nextch))
- return (lex->tok->ttype = TOKEN_FATAL);
- } else
- lex_ungetch(lex, nextch);
-
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return (lex->tok->ttype = TOKEN_OPERATOR);
- }
-
- if (ch == '^' || ch == '~' || ch == '!')
- {
- if (!lex_tokench(lex, ch) ||
- !lex_endtoken(lex))
- {
- return (lex->tok->ttype = TOKEN_FATAL);
- }
- return (lex->tok->ttype = TOKEN_OPERATOR);
- }
-
- if (ch == '*' || ch == '/') /* *=, /= */
- {
- if (!lex_tokench(lex, ch))
- return (lex->tok->ttype = TOKEN_FATAL);
-
- nextch = lex_getch(lex);
- if (nextch == '=') {
- if (!lex_tokench(lex, nextch))
- return (lex->tok->ttype = TOKEN_FATAL);
- } else
- lex_ungetch(lex, nextch);
-
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return (lex->tok->ttype = TOKEN_OPERATOR);
- }
-
- if (isident_start(ch))
- {
- const char *v;
- if (!lex_tokench(lex, ch))
- return (lex->tok->ttype = TOKEN_FATAL);
- if (!lex_finish_ident(lex)) {
- /* error? */
- return (lex->tok->ttype = TOKEN_ERROR);
- }
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- lex->tok->ttype = TOKEN_IDENT;
-
- v = lex->tok->value;
- if (!strcmp(v, "void")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_VOID;
- } else if (!strcmp(v, "int")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_INTEGER;
- } else if (!strcmp(v, "float")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_FLOAT;
- } else if (!strcmp(v, "string")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_STRING;
- } else if (!strcmp(v, "entity")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_ENTITY;
- } else if (!strcmp(v, "vector")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_VECTOR;
- } else if (!strcmp(v, "for") ||
- !strcmp(v, "while") ||
- !strcmp(v, "do"))
- lex->tok->ttype = TOKEN_KEYWORD;
-
- return lex->tok->ttype;
- }
-
- if (ch == '"')
- {
- lex->tok->ttype = lex_finish_string(lex, '"');
- while (lex->tok->ttype == TOKEN_STRINGCONST)
- {
- /* Allow c style "string" "continuation" */
- ch = lex_skipwhite(lex);
- if (ch != '"') {
- lex_ungetch(lex, ch);
- break;
- }
-
- lex->tok->ttype = lex_finish_string(lex, '"');
- }
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return lex->tok->ttype;
- }
-
- if (ch == '\'')
- {
- /* we parse character constants like string,
- * but return TOKEN_CHARCONST, or a vector type if it fits...
- * Likewise actual unescaping has to be done by the parser.
- * The difference is we don't allow 'char' 'continuation'.
- */
- lex->tok->ttype = lex_finish_string(lex, '\'');
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
-
- /* It's a vector if we can successfully scan 3 floats */
- if (sscanf(lex->tok->value, " %f %f %f ", &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
- {
- lex->tok->ttype = TOKEN_VECTORCONST;
- }
-
- return lex->tok->ttype;
- }
-
- if (isdigit(ch))
- {
- lex->tok->ttype = lex_finish_digit(lex, ch);
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return lex->tok->ttype;
- }
-
- lexerror(lex, "unknown token");
- return (lex->tok->ttype = TOKEN_ERROR);
+ int ch, nextch, thirdch;
+ bool hadwhite = false;
+
+ lex_token_new(lex);
+#if 0
+ if (!lex->tok)
+ return TOKEN_FATAL;
+#endif
+
+ while (true) {
+ ch = lex_skipwhite(lex, hadwhite);
+ hadwhite = true;
+ if (!lex->flags.mergelines || ch != '\\')
+ break;
+ ch = lex_getch(lex);
+ if (ch == '\r')
+ ch = lex_getch(lex);
+ if (ch != '\n') {
+ lex_ungetch(lex, ch);
+ ch = '\\';
+ break;
+ }
+ /* we reached a linemerge */
+ lex_tokench(lex, '\n');
+ continue;
+ }
+
+ if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
+ return (lex->tok.ttype = ch);
+ }
+
+ lex->sline = lex->line;
+ lex->tok.ctx.line = lex->sline;
+ lex->tok.ctx.file = lex->name;
+
+ if (lex->eof)
+ return (lex->tok.ttype = TOKEN_FATAL);
+
+ if (ch == EOF) {
+ lex->eof = true;
+ return (lex->tok.ttype = TOKEN_EOF);
+ }
+
+ /* modelgen / spiritgen commands */
+ if (ch == '$' && !lex->flags.preprocessing) {
+ const char *v;
+ size_t frame;
+
+ ch = lex_getch(lex);
+ if (!isident_start(ch)) {
+ lexerror(lex, "hanging '$' modelgen/spritegen command line");
+ return lex_do(lex);
+ }
+ lex_tokench(lex, ch);
+ if (!lex_finish_ident(lex))
+ return (lex->tok.ttype = TOKEN_ERROR);
+ lex_endtoken(lex);
+ /* skip the known commands */
+ v = lex->tok.value;
+
+ if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
+ {
+ /* frame/framesave command works like an enum
+ * similar to fteqcc we handle this in the lexer.
+ * The reason for this is that it is sensitive to newlines,
+ * which the parser is unaware of
+ */
+ if (!lex_finish_frames(lex))
+ return (lex->tok.ttype = TOKEN_ERROR);
+ return lex_do(lex);
+ }
+
+ if (!strcmp(v, "framevalue"))
+ {
+ ch = lex_getch(lex);
+ while (ch != EOF && isspace(ch) && ch != '\n')
+ ch = lex_getch(lex);
+
+ if (!isdigit(ch)) {
+ lexerror(lex, "$framevalue requires an integer parameter");
+ return lex_do(lex);
+ }
+
+ lex_token_new(lex);
+ lex->tok.ttype = lex_finish_digit(lex, ch);
+ lex_endtoken(lex);
+ if (lex->tok.ttype != TOKEN_INTCONST) {
+ lexerror(lex, "$framevalue requires an integer parameter");
+ return lex_do(lex);
+ }
+ lex->framevalue = lex->tok.constval.i;
+ return lex_do(lex);
+ }
+
+ if (!strcmp(v, "framerestore"))
+ {
+ int rc;
+
+ lex_token_new(lex);
+
+ rc = lex_parse_frame(lex);
+
+ if (rc > 0) {
+ lexerror(lex, "$framerestore requires a framename parameter");
+ return lex_do(lex);
+ }
+ if (rc < 0)
+ return (lex->tok.ttype = TOKEN_FATAL);
+
+ v = lex->tok.value;
+ for (frame = 0; frame < vec_size(lex->frames); ++frame) {
+ if (!strcmp(v, lex->frames[frame].name)) {
+ lex->framevalue = lex->frames[frame].value;
+ return lex_do(lex);
+ }
+ }
+ lexerror(lex, "unknown framename `%s`", v);
+ return lex_do(lex);
+ }
+
+ if (!strcmp(v, "modelname"))
+ {
+ int rc;
+
+ lex_token_new(lex);
+
+ rc = lex_parse_frame(lex);
+
+ if (rc > 0) {
+ lexerror(lex, "$modelname requires a parameter");
+ return lex_do(lex);
+ }
+ if (rc < 0)
+ return (lex->tok.ttype = TOKEN_FATAL);
+
+ if (lex->modelname) {
+ frame_macro m;
+ m.value = lex->framevalue;
+ m.name = lex->modelname;
+ lex->modelname = NULL;
+ vec_push(lex->frames, m);
+ }
+ lex->modelname = lex->tok.value;
+ lex->tok.value = NULL;
+ return lex_do(lex);
+ }
+
+ if (!strcmp(v, "flush"))
+ {
+ size_t fi;
+ for (fi = 0; fi < vec_size(lex->frames); ++fi)
+ mem_d(lex->frames[fi].name);
+ vec_free(lex->frames);
+ /* skip line (fteqcc does it too) */
+ ch = lex_getch(lex);
+ while (ch != EOF && ch != '\n')
+ ch = lex_getch(lex);
+ return lex_do(lex);
+ }
+
+ if (!strcmp(v, "cd") ||
+ !strcmp(v, "origin") ||
+ !strcmp(v, "base") ||
+ !strcmp(v, "flags") ||
+ !strcmp(v, "scale") ||
+ !strcmp(v, "skin"))
+ {
+ /* skip line */
+ ch = lex_getch(lex);
+ while (ch != EOF && ch != '\n')
+ ch = lex_getch(lex);
+ return lex_do(lex);
+ }
+
+ for (frame = 0; frame < vec_size(lex->frames); ++frame) {
+ if (!strcmp(v, lex->frames[frame].name)) {
+ lex->tok.constval.i = lex->frames[frame].value;
+ return (lex->tok.ttype = TOKEN_INTCONST);
+ }
+ }
+
+ lexerror(lex, "invalid frame macro");
+ return lex_do(lex);
+ }
+
+ /* single-character tokens */
+ switch (ch)
+ {
+ case '[':
+ nextch = lex_getch(lex);
+ if (nextch == '[') {
+ lex_tokench(lex, ch);
+ lex_tokench(lex, nextch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_ATTRIBUTE_OPEN);
+ }
+ lex_ungetch(lex, nextch);
+ /* FALL THROUGH */
+ case '(':
+ case ':':
+ case '?':
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ if (lex->flags.noops)
+ return (lex->tok.ttype = ch);
+ else
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+
+ case ']':
+ if (lex->flags.noops) {
+ nextch = lex_getch(lex);
+ if (nextch == ']') {
+ lex_tokench(lex, ch);
+ lex_tokench(lex, nextch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_ATTRIBUTE_CLOSE);
+ }
+ lex_ungetch(lex, nextch);
+ }
+ /* FALL THROUGH */
+ case ')':
+ case ';':
+ case '{':
+ case '}':
+
+ case '#':
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = ch);
+ default:
+ break;
+ }
+
+ if (ch == '.') {
+ nextch = lex_getch(lex);
+ /* digits starting with a dot */
+ if (isdigit(nextch)) {
+ lex_ungetch(lex, nextch);
+ lex->tok.ttype = lex_finish_digit(lex, ch);
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ }
+ lex_ungetch(lex, nextch);
+ }
+
+ if (lex->flags.noops)
+ {
+ /* Detect characters early which are normally
+ * operators OR PART of an operator.
+ */
+ switch (ch)
+ {
+ /*
+ case '+':
+ case '-':
+ */
+ case '*':
+ case '/':
+ case '<':
+ case '>':
+ case '=':
+ case '&':
+ case '|':
+ case '^':
+ case '~':
+ case ',':
+ case '!':
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = ch);
+ default:
+ break;
+ }
+ }
+
+ if (ch == '.')
+ {
+ lex_tokench(lex, ch);
+ /* peak ahead once */
+ nextch = lex_getch(lex);
+ if (nextch != '.') {
+ lex_ungetch(lex, nextch);
+ lex_endtoken(lex);
+ if (lex->flags.noops)
+ return (lex->tok.ttype = ch);
+ else
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+ /* peak ahead again */
+ nextch = lex_getch(lex);
+ if (nextch != '.') {
+ lex_ungetch(lex, nextch);
+ lex_ungetch(lex, '.');
+ lex_endtoken(lex);
+ if (lex->flags.noops)
+ return (lex->tok.ttype = ch);
+ else
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+ /* fill the token to be "..." */
+ lex_tokench(lex, ch);
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_DOTS);
+ }
+
+ if (ch == ',' || ch == '.') {
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */
+ ch == '>' || ch == '<' || /* <<, >>, <=, >= */
+ ch == '=' || ch == '!' || /* <=>, ==, != */
+ ch == '&' || ch == '|' || /* &&, ||, &=, |= */
+ ch == '~' /* ~=, ~ */
+ ) {
+ lex_tokench(lex, ch);
+
+ nextch = lex_getch(lex);
+ if ((nextch == '=' && ch != '<') || (nextch == ch && ch != '!')) {
+ lex_tokench(lex, nextch);
+ } else if (ch == '<' && nextch == '=') {
+ lex_tokench(lex, nextch);
+ if ((thirdch = lex_getch(lex)) == '>')
+ lex_tokench(lex, thirdch);
+ else
+ lex_ungetch(lex, thirdch);
+
+ } else if (ch == '-' && nextch == '>') {
+ lex_tokench(lex, nextch);
+ } else if (ch == '&' && nextch == '~') {
+ thirdch = lex_getch(lex);
+ if (thirdch != '=') {
+ lex_ungetch(lex, thirdch);
+ lex_ungetch(lex, nextch);
+ }
+ else {
+ lex_tokench(lex, nextch);
+ lex_tokench(lex, thirdch);
+ }
+ }
+ else if (lex->flags.preprocessing &&
+ ch == '-' && isdigit(nextch))
+ {
+ lex->tok.ttype = lex_finish_digit(lex, nextch);
+ if (lex->tok.ttype == TOKEN_INTCONST)
+ lex->tok.constval.i = -lex->tok.constval.i;
+ else
+ lex->tok.constval.f = -lex->tok.constval.f;
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ } else {
+ lex_ungetch(lex, nextch);
+ }
+
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ /*
+ if (ch == '^' || ch == '~' || ch == '!')
+ {
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+ */
+
+ if (ch == '*' || ch == '/') /* *=, /= */
+ {
+ lex_tokench(lex, ch);
+
+ nextch = lex_getch(lex);
+ if (nextch == '=' || nextch == '*') {
+ lex_tokench(lex, nextch);
+ } else
+ lex_ungetch(lex, nextch);
+
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ if (ch == '%') {
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ if (isident_start(ch))
+ {
+ const char *v;
+
+ lex_tokench(lex, ch);
+ if (!lex_finish_ident(lex)) {
+ /* error? */
+ return (lex->tok.ttype = TOKEN_ERROR);
+ }
+ lex_endtoken(lex);
+ lex->tok.ttype = TOKEN_IDENT;
+
+ v = lex->tok.value;
+ if (!strcmp(v, "void")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_VOID;
+ } else if (!strcmp(v, "int")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_INTEGER;
+ } else if (!strcmp(v, "float")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_FLOAT;
+ } else if (!strcmp(v, "string")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_STRING;
+ } else if (!strcmp(v, "entity")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_ENTITY;
+ } else if (!strcmp(v, "vector")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_VECTOR;
+ } else {
+ size_t kw;
+ for (kw = 0; kw < num_keywords_qc; ++kw) {
+ if (!strcmp(v, keywords_qc[kw]))
+ return (lex->tok.ttype = TOKEN_KEYWORD);
+ }
+ if (OPTS_OPTION_U32(OPTION_STANDARD) != COMPILER_QCC) {
+ for (kw = 0; kw < num_keywords_fg; ++kw) {
+ if (!strcmp(v, keywords_fg[kw]))
+ return (lex->tok.ttype = TOKEN_KEYWORD);
+ }
+ }
+ }
+
+ return lex->tok.ttype;
+ }
+
+ if (ch == '"')
+ {
+ lex->flags.nodigraphs = true;
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ lex->tok.ttype = lex_finish_string(lex, '"');
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
+ {
+ /* Allow c style "string" "continuation" */
+ ch = lex_skipwhite(lex, false);
+ if (ch != '"') {
+ lex_ungetch(lex, ch);
+ break;
+ }
+
+ lex->tok.ttype = lex_finish_string(lex, '"');
+ }
+ lex->flags.nodigraphs = false;
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ }
+
+ if (ch == '\'')
+ {
+ /* we parse character constants like string,
+ * but return TOKEN_CHARCONST, or a vector type if it fits...
+ * Likewise actual unescaping has to be done by the parser.
+ * The difference is we don't allow 'char' 'continuation'.
+ */
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ lex->tok.ttype = lex_finish_string(lex, '\'');
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+
+ lex->tok.ttype = TOKEN_CHARCONST;
+ /* It's a vector if we can successfully scan 3 floats */
+#ifdef _MSC_VER
+ if (sscanf_s(lex->tok.value, " %f %f %f ",
+ &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
+#else
+ if (sscanf(lex->tok.value, " %f %f %f ",
+ &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
+#endif
+
+ {
+ lex->tok.ttype = TOKEN_VECTORCONST;
+ }
+ else
+ {
+ if (!lex->flags.preprocessing && strlen(lex->tok.value) > 1) {
+ uchar_t u8char;
+ /* check for a valid utf8 character */
+ if (!OPTS_FLAG(UTF8) || !u8_analyze(lex->tok.value, NULL, NULL, &u8char, 8)) {
+ if (lexwarn(lex, WARN_MULTIBYTE_CHARACTER,
+ ( OPTS_FLAG(UTF8) ? "invalid multibyte character sequence `%s`"
+ : "multibyte character: `%s`" ),
+ lex->tok.value))
+ return (lex->tok.ttype = TOKEN_ERROR);
+ }
+ else
+ lex->tok.constval.i = u8char;
+ }
+ else
+ lex->tok.constval.i = lex->tok.value[0];
+ }
+
+ return lex->tok.ttype;
+ }
+
+ if (isdigit(ch))
+ {
+ lex->tok.ttype = lex_finish_digit(lex, ch);
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ }
+
+ if (lex->flags.preprocessing) {
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = ch);
+ }
+
+ lexerror(lex, "unknown token: `%s`", lex->tok.value);
+ return (lex->tok.ttype = TOKEN_ERROR);