lexer now turns '(' into an operator if noops=false

[xonotic/gmqcc.git] / lexer.c
diff --git a/lexer.c b/lexer.c

index ebbe1880c01f330ecb24fdf0ec81827f00000c55..1adedc2ee5fcb14563857eba926cae87c05dcb8d 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -7,6 +7,7 @@
  #include "lexer.h"
  
  MEM_VEC_FUNCTIONS(token, char, value)
+MEM_VEC_FUNCTIONS(lex_file, frame_macro, frames)
  
  void lexerror(lex_file *lex, const char *fmt, ...)
  {
@@ -24,6 +25,25 @@ void lexerror(lex_file *lex, const char *fmt, ...)
         printf("\n");
  }
  
+void lexwarn(lex_file *lex, int warn, const char *fmt, ...)
+{
+       va_list ap;
+
+       if (!OPTS_WARN(warn))
+           return;
+
+       if (lex)
+               printf("warning %s:%lu: ", lex->name, (unsigned long)lex->sline);
+       else
+               printf("warning: ");
+
+       va_start(ap, fmt);
+       vprintf(fmt, ap);
+       va_end(ap);
+
+       printf("\n");
+}
+
  token* token_new()
  {
         token *tok = (token*)mem_a(sizeof(token));
@@ -103,7 +123,7 @@ token* token_copy_all(const token *cp)
  lex_file* lex_open(const char *file)
  {
         lex_file *lex;
-       FILE *in = fopen(file, "rb");
+       FILE *in = util_fopen(file, "rb");
  
         if (!in) {
                 lexerror(NULL, "open failed: '%s'\n", file);
@@ -323,6 +343,59 @@ static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
         return true;
  }
  
+/* read one ident for the frame list */
+static int lex_parse_frame(lex_file *lex)
+{
+    int ch;
+
+    if (lex->tok)
+        token_delete(lex->tok);
+    lex->tok = token_new();
+
+    ch = lex_getch(lex);
+    while (ch != EOF && ch != '\n' && isspace(ch))
+        ch = lex_getch(lex);
+
+    if (ch == '\n')
+        return 1;
+
+    if (!isident_start(ch)) {
+        lexerror(lex, "invalid framename, must start with one of a-z or _, got %c", ch);
+        return -1;
+    }
+
+    if (!lex_tokench(lex, ch))
+        return -1;
+    if (!lex_finish_ident(lex))
+        return -1;
+    if (!lex_endtoken(lex))
+        return -1;
+    return 0;
+}
+
+/* read a list of $frames */
+static bool lex_finish_frames(lex_file *lex)
+{
+    do {
+        int rc;
+        frame_macro m;
+
+        rc = lex_parse_frame(lex);
+        if (rc > 0) /* end of line */
+            return true;
+        if (rc < 0) /* error */
+            return false;
+
+        m.value = lex->framevalue++;
+        m.name = lex->tok->value;
+        lex->tok->value = NULL;
+        if (!lex_file_frames_add(lex, m)) {
+            lexerror(lex, "out of memory");
+            return false;
+        }
+    } while (true);
+}
+
  static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
  {
         int ch = 0;
@@ -333,10 +406,6 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
                 if (ch == quote)
                         return TOKEN_STRINGCONST;
  
-               if (!lex_tokench(lex, ch))
-                       return (lex->tok->ttype = TOKEN_FATAL);
-
-               /* as lexer we only care about \" to not terminate the string prematurely */
                 if (ch == '\\') {
                         ch = lex_getch(lex);
                         if (ch == EOF) {
@@ -344,10 +413,28 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
                                 lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
                                 return (lex->tok->ttype = TOKEN_ERROR);
                         }
-                       /* so we just add the next character no matter what it actually is */
+
+            switch (ch) {
+            case '\\': break;
+            case 'a':  ch = '\a'; break;
+            case 'b':  ch = '\b'; break;
+            case 'r':  ch = '\r'; break;
+            case 'n':  ch = '\n'; break;
+            case 't':  ch = '\t'; break;
+            case 'f':  ch = '\f'; break;
+            case 'v':  ch = '\v'; break;
+            default:
+                lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
+                           /* so we just add the character plus backslash no matter what it actually is */
+                           if (!lex_tokench(lex, '\\'))
+                                   return (lex->tok->ttype = TOKEN_FATAL);
+            }
+            /* add the character finally */
                         if (!lex_tokench(lex, ch))
                                 return (lex->tok->ttype = TOKEN_FATAL);
                 }
+               else if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
         }
         lexerror(lex, "unexpected end of file within string constant");
         lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
@@ -453,21 +540,190 @@ int lex_do(lex_file *lex)
         if (ch == EOF)
                 return (lex->tok->ttype = TOKEN_EOF);
  
+       /* modelgen / spiritgen commands */
+       if (ch == '$') {
+           const char *v;
+           size_t frame;
+
+           ch = lex_getch(lex);
+           if (!isident_start(ch)) {
+               lexerror(lex, "hanging '$' modelgen/spritegen command line");
+               return lex_do(lex);
+           }
+           if (!lex_tokench(lex, ch))
+               return (lex->tok->ttype = TOKEN_FATAL);
+           if (!lex_finish_ident(lex))
+               return (lex->tok->ttype = TOKEN_ERROR);
+           if (!lex_endtoken(lex))
+               return (lex->tok->ttype = TOKEN_FATAL);
+           /* skip the known commands */
+           v = lex->tok->value;
+
+        if (!strcmp(v, "frame") || !strcmp(v, "framesave"))
+        {
+            /* frame/framesave command works like an enum
+             * similar to fteqcc we handle this in the lexer.
+             * The reason for this is that it is sensitive to newlines,
+             * which the parser is unaware of
+             */
+            if (!lex_finish_frames(lex))
+                 return (lex->tok->ttype = TOKEN_ERROR);
+            return lex_do(lex);
+        }
+
+        if (!strcmp(v, "framevalue"))
+        {
+            ch = lex_getch(lex);
+            while (ch != EOF && isspace(ch) && ch != '\n')
+                ch = lex_getch(lex);
+
+            if (!isdigit(ch)) {
+                lexerror(lex, "$framevalue requires an integer parameter");
+                return lex_do(lex);
+            }
+
+                   token_delete(lex->tok);
+               lex->tok = token_new();
+            lex->tok->ttype = lex_finish_digit(lex, ch);
+            if (!lex_endtoken(lex))
+                return (lex->tok->ttype = TOKEN_FATAL);
+            if (lex->tok->ttype != TOKEN_INTCONST) {
+                lexerror(lex, "$framevalue requires an integer parameter");
+                return lex_do(lex);
+            }
+            lex->framevalue = lex->tok->constval.i;
+            return lex_do(lex);
+        }
+
+        if (!strcmp(v, "framerestore"))
+        {
+            int rc;
+
+                   token_delete(lex->tok);
+               lex->tok = token_new();
+
+            rc = lex_parse_frame(lex);
+
+            if (rc > 0) {
+                lexerror(lex, "$framerestore requires a framename parameter");
+                return lex_do(lex);
+            }
+            if (rc < 0)
+                return (lex->tok->ttype = TOKEN_FATAL);
+
+            v = lex->tok->value;
+            for (frame = 0; frame < lex->frames_count; ++frame) {
+                if (!strcmp(v, lex->frames[frame].name)) {
+                    lex->framevalue = lex->frames[frame].value;
+                    return lex_do(lex);
+                }
+            }
+            lexerror(lex, "unknown framename `%s`", v);
+            return lex_do(lex);
+        }
+
+        if (!strcmp(v, "modelname"))
+        {
+            int rc;
+
+                   token_delete(lex->tok);
+               lex->tok = token_new();
+
+            rc = lex_parse_frame(lex);
+
+            if (rc > 0) {
+                lexerror(lex, "$framerestore requires a framename parameter");
+                return lex_do(lex);
+            }
+            if (rc < 0)
+                return (lex->tok->ttype = TOKEN_FATAL);
+
+            v = lex->tok->value;
+            if (lex->modelname) {
+                frame_macro m;
+                m.value = lex->framevalue;
+                m.name = lex->modelname;
+                lex->modelname = NULL;
+                if (!lex_file_frames_add(lex, m)) {
+                    lexerror(lex, "out of memory");
+                    return (lex->tok->ttype = TOKEN_FATAL);
+                }
+            }
+            lex->modelname = lex->tok->value;
+            lex->tok->value = NULL;
+            for (frame = 0; frame < lex->frames_count; ++frame) {
+                if (!strcmp(v, lex->frames[frame].name)) {
+                    lex->framevalue = lex->frames[frame].value;
+                    break;
+                }
+            }
+            return lex_do(lex);
+        }
+
+        if (!strcmp(v, "flush"))
+        {
+            size_t frame;
+            for (frame = 0; frame < lex->frames_count; ++frame)
+                mem_d(lex->frames[frame].name);
+            MEM_VECTOR_CLEAR(lex, frames);
+               /* skip line (fteqcc does it too) */
+               ch = lex_getch(lex);
+               while (ch != EOF && ch != '\n')
+                   ch = lex_getch(lex);
+            return lex_do(lex);
+        }
+
+           if (!strcmp(v, "cd") ||
+               !strcmp(v, "origin") ||
+               !strcmp(v, "base") ||
+               !strcmp(v, "flags") ||
+               !strcmp(v, "scale") ||
+               !strcmp(v, "skin"))
+           {
+               /* skip line */
+               ch = lex_getch(lex);
+               while (ch != EOF && ch != '\n')
+                   ch = lex_getch(lex);
+               return lex_do(lex);
+           }
+
+        for (frame = 0; frame < lex->frames_count; ++frame) {
+            if (!strcmp(v, lex->frames[frame].name)) {
+                lex->tok->constval.i = lex->frames[frame].value;
+                return (lex->tok->ttype = TOKEN_INTCONST);
+            }
+        }
+
+        lexerror(lex, "invalid frame macro");
+        return lex_do(lex);
+       }
+
         /* single-character tokens */
         switch (ch)
         {
-               case ';':
                 case '(':
+               if (!lex_tokench(lex, ch) ||
+                   !lex_endtoken(lex))
+               {
+                   return (lex->tok->ttype = TOKEN_FATAL);
+               }
+               if (lex->flags.noops)
+                   return (lex->tok->ttype = ch);
+               else
+                   return (lex->tok->ttype = TOKEN_OPERATOR);
                 case ')':
+               case ';':
                 case '{':
                 case '}':
                 case '[':
                 case ']':
  
-               case ',':
-
                 case '#':
-
+               if (!lex_tokench(lex, ch) ||
+                   !lex_endtoken(lex))
+               {
+                   return (lex->tok->ttype = TOKEN_FATAL);
+               }
                         return (lex->tok->ttype = ch);
                 default:
                         break;
@@ -491,15 +747,32 @@ int lex_do(lex_file *lex)
                         case '|':
                         case '^':
                         case '~':
-                               return ch;
+                       case ',':
+                   case '.':
+                   case '!':
+                   if (!lex_tokench(lex, ch) ||
+                       !lex_endtoken(lex))
+                   {
+                       return (lex->tok->ttype = TOKEN_FATAL);
+                   }
+                               return (lex->tok->ttype = ch);
                         default:
                                 break;
                 }
         }
  
+       if (ch == ',' || ch == '.') {
+           if (!lex_tokench(lex, ch) ||
+               !lex_endtoken(lex))
+           {
+               return (lex->tok->ttype = TOKEN_FATAL);
+           }
+           return (lex->tok->ttype = TOKEN_OPERATOR);
+       }
+
         if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
             ch == '>' || ch == '<' || /* <<, >>, <=, >= */
-           ch == '=' ||              /* == */
+           ch == '=' || ch == '!' || /* ==, != */
             ch == '&' || ch == '|')   /* &&, ||, &=, |= */
         {
                 if (!lex_tokench(lex, ch))
@@ -520,6 +793,7 @@ int lex_do(lex_file *lex)
                 return (lex->tok->ttype = TOKEN_OPERATOR);
         }
  
+    /*
         if (ch == '^' || ch == '~' || ch == '!')
         {
                 if (!lex_tokench(lex, ch) ||
@@ -529,6 +803,7 @@ int lex_do(lex_file *lex)
                 }
                 return (lex->tok->ttype = TOKEN_OPERATOR);
         }
+       */
  
         if (ch == '*' || ch == '/') /* *=, /= */
         {
@@ -550,6 +825,7 @@ int lex_do(lex_file *lex)
         if (isident_start(ch))
         {
                 const char *v;
+
                 if (!lex_tokench(lex, ch))
                         return (lex->tok->ttype = TOKEN_FATAL);
                 if (!lex_finish_ident(lex)) {
@@ -579,10 +855,13 @@ int lex_do(lex_file *lex)
                 } else if (!strcmp(v, "vector")) {
                         lex->tok->ttype = TOKEN_TYPENAME;
                     lex->tok->constval.t = TYPE_VECTOR;
-               } else if (!strcmp(v, "for") ||
-                        !strcmp(v, "while") ||
-                        !strcmp(v, "do")    ||
-                        !strcmp(v, "var")   ||
+               } else if (!strcmp(v, "for")  ||
+                        !strcmp(v, "while")  ||
+                        !strcmp(v, "do")     ||
+                        !strcmp(v, "if")     ||
+                        !strcmp(v, "else")   ||
+                        !strcmp(v, "local")  ||
+                        !strcmp(v, "return") ||
                          !strcmp(v, "const"))
                         lex->tok->ttype = TOKEN_KEYWORD;
  
@@ -620,7 +899,13 @@ int lex_do(lex_file *lex)
                          return (lex->tok->ttype = TOKEN_FATAL);
  
                  /* It's a vector if we can successfully scan 3 floats */
-                if (sscanf(lex->tok->value, " %f %f %f ", &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
+#ifdef WIN32
+                if (sscanf_s(lex->tok->value, " %f %f %f ",
+                           &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
+#else
+                if (sscanf(lex->tok->value, " %f %f %f ",
+                           &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
+#endif
                  {
                          lex->tok->ttype = TOKEN_VECTORCONST;
                  }