- }
-
- /* single-character tokens */
- switch (ch)
- {
- case ';':
- case '(':
- case ')':
- case '{':
- case '}':
- case '[':
- case ']':
-
- case '#':
- if (!lex_tokench(lex, ch) ||
- !lex_endtoken(lex))
- {
- return (lex->tok->ttype = TOKEN_FATAL);
- }
- return (lex->tok->ttype = ch);
- default:
- break;
- }
-
- if (lex->flags.noops)
- {
- /* Detect characters early which are normally
- * operators OR PART of an operator.
- */
- switch (ch)
- {
- case '+':
- case '-':
- case '*':
- case '/':
- case '<':
- case '>':
- case '=':
- case '&':
- case '|':
- case '^':
- case '~':
- case ',':
- case '.':
- case '!':
- if (!lex_tokench(lex, ch) ||
- !lex_endtoken(lex))
- {
- return (lex->tok->ttype = TOKEN_FATAL);
- }
- return (lex->tok->ttype = ch);
- default:
- break;
- }
- }
-
- if (ch == ',' || ch == '.') {
- if (!lex_tokench(lex, ch) ||
- !lex_endtoken(lex))
- {
- return (lex->tok->ttype = TOKEN_FATAL);
- }
- return (lex->tok->ttype = TOKEN_OPERATOR);
- }
-
- if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */
- ch == '>' || ch == '<' || /* <<, >>, <=, >= */
- ch == '=' || ch == '!' || /* ==, != */
- ch == '&' || ch == '|') /* &&, ||, &=, |= */
- {
- if (!lex_tokench(lex, ch))
- return (lex->tok->ttype = TOKEN_FATAL);
-
- nextch = lex_getch(lex);
- if (nextch == ch || nextch == '=') {
- if (!lex_tokench(lex, nextch))
- return (lex->tok->ttype = TOKEN_FATAL);
- } else if (ch == '-' && nextch == '>') {
- if (!lex_tokench(lex, nextch))
- return (lex->tok->ttype = TOKEN_FATAL);
- } else
- lex_ungetch(lex, nextch);
-
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return (lex->tok->ttype = TOKEN_OPERATOR);
- }
-
- /*
- if (ch == '^' || ch == '~' || ch == '!')
- {
- if (!lex_tokench(lex, ch) ||
- !lex_endtoken(lex))
- {
- return (lex->tok->ttype = TOKEN_FATAL);
- }
- return (lex->tok->ttype = TOKEN_OPERATOR);
- }
- */
-
- if (ch == '*' || ch == '/') /* *=, /= */
- {
- if (!lex_tokench(lex, ch))
- return (lex->tok->ttype = TOKEN_FATAL);
-
- nextch = lex_getch(lex);
- if (nextch == '=') {
- if (!lex_tokench(lex, nextch))
- return (lex->tok->ttype = TOKEN_FATAL);
- } else
- lex_ungetch(lex, nextch);
-
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return (lex->tok->ttype = TOKEN_OPERATOR);
- }
-
- if (isident_start(ch))
- {
- const char *v;
-
- if (!lex_tokench(lex, ch))
- return (lex->tok->ttype = TOKEN_FATAL);
- if (!lex_finish_ident(lex)) {
- /* error? */
- return (lex->tok->ttype = TOKEN_ERROR);
- }
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- lex->tok->ttype = TOKEN_IDENT;
-
- v = lex->tok->value;
- if (!strcmp(v, "void")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_VOID;
- } else if (!strcmp(v, "int")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_INTEGER;
- } else if (!strcmp(v, "float")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_FLOAT;
- } else if (!strcmp(v, "string")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_STRING;
- } else if (!strcmp(v, "entity")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_ENTITY;
- } else if (!strcmp(v, "vector")) {
- lex->tok->ttype = TOKEN_TYPENAME;
- lex->tok->constval.t = TYPE_VECTOR;
- } else if (!strcmp(v, "for") ||
- !strcmp(v, "while") ||
- !strcmp(v, "do") ||
- !strcmp(v, "if") ||
- !strcmp(v, "else") ||
- !strcmp(v, "local") ||
- !strcmp(v, "return") ||
- !strcmp(v, "const"))
- lex->tok->ttype = TOKEN_KEYWORD;
-
- return lex->tok->ttype;
- }
-
- if (ch == '"')
- {
- lex->tok->ttype = lex_finish_string(lex, '"');
- while (lex->tok->ttype == TOKEN_STRINGCONST)
- {
- /* Allow c style "string" "continuation" */
- ch = lex_skipwhite(lex);
- if (ch != '"') {
- lex_ungetch(lex, ch);
- break;
- }
-
- lex->tok->ttype = lex_finish_string(lex, '"');
- }
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return lex->tok->ttype;
- }
-
- if (ch == '\'')
- {
- /* we parse character constants like string,
- * but return TOKEN_CHARCONST, or a vector type if it fits...
- * Likewise actual unescaping has to be done by the parser.
- * The difference is we don't allow 'char' 'continuation'.
- */
- lex->tok->ttype = lex_finish_string(lex, '\'');
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
-
- /* It's a vector if we can successfully scan 3 floats */
-#ifdef WIN32
- if (sscanf_s(lex->tok->value, " %f %f %f ",
- &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
-#else
- if (sscanf(lex->tok->value, " %f %f %f ",
- &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
-#endif
- {
- lex->tok->ttype = TOKEN_VECTORCONST;
- }
-
- return lex->tok->ttype;
- }
-
- if (isdigit(ch))
- {
- lex->tok->ttype = lex_finish_digit(lex, ch);
- if (!lex_endtoken(lex))
- return (lex->tok->ttype = TOKEN_FATAL);
- return lex->tok->ttype;
- }
-
- lexerror(lex, "unknown token");
- return (lex->tok->ttype = TOKEN_ERROR);
+ }
+
+ /* single-character tokens */
+ switch (ch)
+ {
+ case '[':
+ nextch = lex_getch(lex);
+ if (nextch == '[') {
+ lex_tokench(lex, ch);
+ lex_tokench(lex, nextch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_ATTRIBUTE_OPEN);
+ }
+ lex_ungetch(lex, nextch);
+ /* FALL THROUGH */
+ case '(':
+ case ':':
+ case '?':
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ if (lex->flags.noops)
+ return (lex->tok.ttype = ch);
+ else
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+
+ case ']':
+ if (lex->flags.noops) {
+ nextch = lex_getch(lex);
+ if (nextch == ']') {
+ lex_tokench(lex, ch);
+ lex_tokench(lex, nextch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_ATTRIBUTE_CLOSE);
+ }
+ lex_ungetch(lex, nextch);
+ }
+ /* FALL THROUGH */
+ case ')':
+ case ';':
+ case '{':
+ case '}':
+
+ case '#':
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = ch);
+ default:
+ break;
+ }
+
+ if (ch == '.') {
+ nextch = lex_getch(lex);
+ /* digits starting with a dot */
+ if (util_isdigit(nextch)) {
+ lex_ungetch(lex, nextch);
+ lex->tok.ttype = lex_finish_digit(lex, ch);
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ }
+ lex_ungetch(lex, nextch);
+ }
+
+ if (lex->flags.noops)
+ {
+ /* Detect characters early which are normally
+ * operators OR PART of an operator.
+ */
+ switch (ch)
+ {
+ case '*':
+ case '/':
+ case '<':
+ case '>':
+ case '=':
+ case '&':
+ case '|':
+ case '^':
+ case '~':
+ case ',':
+ case '!':
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = ch);
+ default:
+ break;
+ }
+ }
+
+ if (ch == '.')
+ {
+ lex_tokench(lex, ch);
+ /* peak ahead once */
+ nextch = lex_getch(lex);
+ if (nextch != '.') {
+ lex_ungetch(lex, nextch);
+ lex_endtoken(lex);
+ if (lex->flags.noops)
+ return (lex->tok.ttype = ch);
+ else
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+ /* peak ahead again */
+ nextch = lex_getch(lex);
+ if (nextch != '.') {
+ lex_ungetch(lex, nextch);
+ lex_ungetch(lex, '.');
+ lex_endtoken(lex);
+ if (lex->flags.noops)
+ return (lex->tok.ttype = ch);
+ else
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+ /* fill the token to be "..." */
+ lex_tokench(lex, ch);
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_DOTS);
+ }
+
+ if (ch == ',' || ch == '.') {
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */
+ ch == '>' || ch == '<' || /* <<, >>, <=, >= and >< as well! */
+ ch == '=' || ch == '!' || /* <=>, ==, != */
+ ch == '&' || ch == '|' || /* &&, ||, &=, |= */
+ ch == '~' || ch == '^' /* ~=, ~, ^ */
+ ) {
+ lex_tokench(lex, ch);
+ nextch = lex_getch(lex);
+
+ if ((nextch == '=' && ch != '<') || (nextch == '<' && ch == '>'))
+ lex_tokench(lex, nextch);
+ else if (nextch == ch && ch != '!') {
+ lex_tokench(lex, nextch);
+ if ((thirdch = lex_getch(lex)) == '=')
+ lex_tokench(lex, thirdch);
+ else
+ lex_ungetch(lex, thirdch);
+ } else if (ch == '<' && nextch == '=') {
+ lex_tokench(lex, nextch);
+ if ((thirdch = lex_getch(lex)) == '>')
+ lex_tokench(lex, thirdch);
+ else
+ lex_ungetch(lex, thirdch);
+
+ } else if (ch == '-' && nextch == '>') {
+ lex_tokench(lex, nextch);
+ } else if (ch == '&' && nextch == '~') {
+ thirdch = lex_getch(lex);
+ if (thirdch != '=') {
+ lex_ungetch(lex, thirdch);
+ lex_ungetch(lex, nextch);
+ }
+ else {
+ lex_tokench(lex, nextch);
+ lex_tokench(lex, thirdch);
+ }
+ }
+ else if (lex->flags.preprocessing &&
+ ch == '-' && util_isdigit(nextch))
+ {
+ lex->tok.ttype = lex_finish_digit(lex, nextch);
+ if (lex->tok.ttype == TOKEN_INTCONST)
+ lex->tok.constval.i = -lex->tok.constval.i;
+ else
+ lex->tok.constval.f = -lex->tok.constval.f;
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ } else {
+ lex_ungetch(lex, nextch);
+ }
+
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ if (ch == '*' || ch == '/') /* *=, /= */
+ {
+ lex_tokench(lex, ch);
+
+ nextch = lex_getch(lex);
+ if (nextch == '=' || nextch == '*') {
+ lex_tokench(lex, nextch);
+ } else
+ lex_ungetch(lex, nextch);
+
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ if (ch == '%') {
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = TOKEN_OPERATOR);
+ }
+
+ if (isident_start(ch))
+ {
+ const char *v;
+
+ lex_tokench(lex, ch);
+ if (!lex_finish_ident(lex)) {
+ /* error? */
+ return (lex->tok.ttype = TOKEN_ERROR);
+ }
+ lex_endtoken(lex);
+ lex->tok.ttype = TOKEN_IDENT;
+
+ v = lex->tok.value;
+ if (!strcmp(v, "void")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_VOID;
+ } else if (!strcmp(v, "int")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_INTEGER;
+ } else if (!strcmp(v, "float")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_FLOAT;
+ } else if (!strcmp(v, "string")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_STRING;
+ } else if (!strcmp(v, "entity")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_ENTITY;
+ } else if (!strcmp(v, "vector")) {
+ lex->tok.ttype = TOKEN_TYPENAME;
+ lex->tok.constval.t = TYPE_VECTOR;
+ } else {
+ size_t kw;
+ for (kw = 0; kw < GMQCC_ARRAY_COUNT(keywords_qc); ++kw) {
+ if (!strcmp(v, keywords_qc[kw]))
+ return (lex->tok.ttype = TOKEN_KEYWORD);
+ }
+ if (OPTS_OPTION_U32(OPTION_STANDARD) != COMPILER_QCC) {
+ for (kw = 0; kw < GMQCC_ARRAY_COUNT(keywords_fg); ++kw) {
+ if (!strcmp(v, keywords_fg[kw]))
+ return (lex->tok.ttype = TOKEN_KEYWORD);
+ }
+ }
+ }
+
+ return lex->tok.ttype;
+ }
+
+ if (ch == '"')
+ {
+ lex->flags.nodigraphs = true;
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ lex->tok.ttype = lex_finish_string(lex, '"');
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ while (!lex->flags.preprocessing && lex->tok.ttype == TOKEN_STRINGCONST)
+ {
+ /* Allow c style "string" "continuation" */
+ ch = lex_skipwhite(lex, false);
+ if (ch != '"') {
+ lex_ungetch(lex, ch);
+ break;
+ }
+
+ lex->tok.ttype = lex_finish_string(lex, '"');
+ }
+ lex->flags.nodigraphs = false;
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ }
+
+ if (ch == '\'')
+ {
+ /* we parse character constants like string,
+ * but return TOKEN_CHARCONST, or a vector type if it fits...
+ * Likewise actual unescaping has to be done by the parser.
+ * The difference is we don't allow 'char' 'continuation'.
+ */
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ lex->tok.ttype = lex_finish_string(lex, '\'');
+ if (lex->flags.preprocessing)
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+
+ lex->tok.ttype = TOKEN_CHARCONST;
+
+ /* It's a vector if we can successfully scan 3 floats */
+ if (util_sscanf(lex->tok.value, " %f %f %f ",
+ &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3)
+
+ {
+ lex->tok.ttype = TOKEN_VECTORCONST;
+ }
+ else
+ {
+ if (!lex->flags.preprocessing && strlen(lex->tok.value) > 1) {
+ utf8ch_t u8char;
+ /* check for a valid utf8 character */
+ if (!OPTS_FLAG(UTF8) || !utf8_to(&u8char, (const unsigned char *)lex->tok.value, 8)) {
+ if (lexwarn(lex, WARN_MULTIBYTE_CHARACTER,
+ ( OPTS_FLAG(UTF8) ? "invalid multibyte character sequence `%s`"
+ : "multibyte character: `%s`" ),
+ lex->tok.value))
+ return (lex->tok.ttype = TOKEN_ERROR);
+ }
+ else
+ lex->tok.constval.i = u8char;
+ }
+ else
+ lex->tok.constval.i = lex->tok.value[0];
+ }
+
+ return lex->tok.ttype;
+ }
+
+ if (util_isdigit(ch))
+ {
+ lex->tok.ttype = lex_finish_digit(lex, ch);
+ lex_endtoken(lex);
+ return lex->tok.ttype;
+ }
+
+ if (lex->flags.preprocessing) {
+ lex_tokench(lex, ch);
+ lex_endtoken(lex);
+ return (lex->tok.ttype = ch);
+ }
+
+ lexerror(lex, "unknown token: `%c`", ch);
+ return (lex->tok.ttype = TOKEN_ERROR);