linecounting fix for push/pop(line) pragmas

[xonotic/gmqcc.git] / lexer.c
diff --git a/lexer.c b/lexer.c

index de09bfa2f4b85848dfba2bb0b28b444b809fe8c9..17af7b6f4b92ad7ca30c2da8bd4f5a2161afbb7b 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -1,3 +1,25 @@
+/*
+ * Copyright (C) 2012
+ *     Wolfgang Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
@@ -6,33 +28,63 @@
  #include "gmqcc.h"
  #include "lexer.h"
  
+/*
+ * List of Keywords
+ */
+
+/* original */
+static const char *keywords_qc[] = {
+    "for", "do", "while",
+    "if", "else",
+    "local",
+    "return",
+    "const"
+};
+static size_t num_keywords_qc = sizeof(keywords_qc) / sizeof(keywords_qc[0]);
+
+/* For fte/gmgqcc */
+static const char *keywords_fg[] = {
+    "switch", "case", "default",
+    "struct", "union",
+    "break", "continue",
+    "typedef"
+};
+static size_t num_keywords_fg = sizeof(keywords_fg) / sizeof(keywords_fg[0]);
+
+/*
+ * Lexer code
+ */
+
  char* *lex_filenames;
  
  void lexerror(lex_file *lex, const char *fmt, ...)
  {
-       va_list ap;
+    va_list ap;
  
-       va_start(ap, fmt);
-    con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
-       va_end(ap);
+    va_start(ap, fmt);
+    if (lex)
+        con_vprintmsg(LVL_ERROR, lex->name, lex->sline, "parse error", fmt, ap);
+    else
+        con_vprintmsg(LVL_ERROR, "", 0, "parse error", fmt, ap);
+    va_end(ap);
  }
  
  bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...)
  {
-       va_list ap;
-       int lvl = LVL_WARNING;
+    va_list ap;
+    int lvl = LVL_WARNING;
  
      if (!OPTS_WARN(warntype))
          return false;
  
      if (opts_werror)
-           lvl = LVL_ERROR;
+        lvl = LVL_ERROR;
  
-       va_start(ap, fmt);
+    va_start(ap, fmt);
      con_vprintmsg(lvl, lex->name, lex->sline, "warning", fmt, ap);
-       va_end(ap);
+    va_end(ap);
  
-       return opts_werror;
+    return opts_werror;
  }
  
  
@@ -286,13 +338,13 @@ static int lex_getch(lex_file *lex)
  
      if (lex->peekpos) {
          lex->peekpos--;
-        if (lex->peek[lex->peekpos] == '\n')
+        if (!lex->push_line && lex->peek[lex->peekpos] == '\n')
              lex->line++;
          return lex->peek[lex->peekpos];
      }
  
      ch = lex_fgetc(lex);
-    if (ch == '\n')
+    if (!lex->push_line && ch == '\n')
          lex->line++;
      else if (ch == '?')
          return lex_try_trigraph(lex, ch);
@@ -304,7 +356,7 @@ static int lex_getch(lex_file *lex)
  static void lex_ungetch(lex_file *lex, int ch)
  {
      lex->peek[lex->peekpos++] = ch;
-    if (ch == '\n')
+    if (!lex->push_line && ch == '\n')
          lex->line--;
  }
  
@@ -344,6 +396,117 @@ static void lex_endtoken(lex_file *lex)
      vec_shrinkby(lex->tok.value, 1);
  }
  
+static bool lex_try_pragma(lex_file *lex)
+{
+    int ch;
+    char *pragma  = NULL;
+    char *command = NULL;
+    char *param   = NULL;
+    size_t line;
+
+    if (lex->flags.preprocessing)
+        return false;
+
+    line = lex->line;
+
+    ch = lex_getch(lex);
+    if (ch != '#') {
+        lex_ungetch(lex, ch);
+        return false;
+    }
+
+    for (ch = lex_getch(lex); vec_size(pragma) < 8 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
+        vec_push(pragma, ch);
+    vec_push(pragma, 0);
+
+    if (ch != ' ' || strcmp(pragma, "pragma")) {
+        lex_ungetch(lex, ch);
+        goto unroll;
+    }
+
+    for (ch = lex_getch(lex); vec_size(command) < 32 && ch >= 'a' && ch <= 'z'; ch = lex_getch(lex))
+        vec_push(command, ch);
+    vec_push(command, 0);
+
+    if (ch != '(') {
+        lex_ungetch(lex, ch);
+        goto unroll;
+    }
+
+    for (ch = lex_getch(lex); vec_size(param) < 32 && ch != ')' && ch != '\n'; ch = lex_getch(lex))
+        vec_push(param, ch);
+    vec_push(param, 0);
+
+    if (ch != ')') {
+        lex_ungetch(lex, ch);
+        goto unroll;
+    }
+
+    if (!strcmp(command, "push")) {
+        if (!strcmp(param, "line")) {
+            lex->push_line++;
+            if (lex->push_line == 1)
+                --line;
+        }
+        else
+            goto unroll;
+    }
+    else if (!strcmp(command, "pop")) {
+        if (!strcmp(param, "line")) {
+            if (lex->push_line)
+                lex->push_line--;
+            if (lex->push_line == 0)
+                --line;
+        }
+        else
+            goto unroll;
+    }
+    else if (!strcmp(command, "file")) {
+        lex->name = util_strdup(param);
+        vec_push(lex_filenames, lex->name);
+    }
+    else if (!strcmp(command, "line")) {
+        line = strtol(param, NULL, 0)-1;
+    }
+    else
+        goto unroll;
+
+    lex->line = line;
+    while (ch != '\n' && ch != EOF)
+        ch = lex_getch(lex);
+    return true;
+
+unroll:
+    if (command) {
+        vec_pop(command);
+        while (vec_size(command)) {
+            lex_ungetch(lex, vec_last(command));
+            vec_pop(command);
+        }
+        vec_free(command);
+    }
+    if (command) {
+        vec_pop(command);
+        while (vec_size(command)) {
+            lex_ungetch(lex, vec_last(command));
+            vec_pop(command);
+        }
+        vec_free(command);
+    }
+    if (pragma) {
+        vec_pop(pragma);
+        while (vec_size(pragma)) {
+            lex_ungetch(lex, vec_last(pragma));
+            vec_pop(pragma);
+        }
+        vec_free(pragma);
+    }
+    lex_ungetch(lex, '#');
+
+    lex->line = line;
+    return false;
+}
+
  /* Skip whitespace and comments and return the first
   * non-white character.
   * As this makes use of the above getch() ungetch() functions,
@@ -385,6 +548,10 @@ static int lex_skipwhite(lex_file *lex)
      {
          ch = lex_getch(lex);
          while (ch != EOF && isspace(ch)) {
+            if (ch == '\n') {
+                if (lex_try_pragma(lex))
+                    continue;
+            }
              if (lex->flags.preprocessing) {
                  if (ch == '\n') {
                      /* end-of-line */
@@ -461,9 +628,13 @@ static int lex_skipwhite(lex_file *lex)
                              }
                              break;
                          }
+                        lex_ungetch(lex, ch);
                      }
                      if (lex->flags.preprocessing) {
-                        lex_tokench(lex, ' '); /* ch); */
+                        if (ch == '\n')
+                            lex_tokench(lex, '\n');
+                        else
+                            lex_tokench(lex, ' '); /* ch); */
                      }
                  }
                  ch = ' '; /* cause TRUE in the isspace check */
@@ -570,7 +741,17 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
          if (ch == quote)
              return TOKEN_STRINGCONST;
  
-        if (!lex->flags.preprocessing && ch == '\\') {
+        if (lex->flags.preprocessing && ch == '\\') {
+            lex_tokench(lex, ch);
+            ch = lex_getch(lex);
+            if (ch == EOF) {
+                lexerror(lex, "unexpected end of file");
+                lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
+                return (lex->tok.ttype = TOKEN_ERROR);
+            }
+            lex_tokench(lex, ch);
+        }
+        else if (ch == '\\') {
              ch = lex_getch(lex);
              if (ch == EOF) {
                  lexerror(lex, "unexpected end of file");
@@ -580,6 +761,8 @@ static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
  
              switch (ch) {
              case '\\': break;
+            case '\'': break;
+            case '"':  break;
              case 'a':  ch = '\a'; break;
              case 'b':  ch = '\b'; break;
              case 'r':  ch = '\r'; break;
@@ -679,7 +862,7 @@ static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
  
  int lex_do(lex_file *lex)
  {
-    int ch, nextch;
+    int ch, nextch, thirdch;
  
      lex_token_new(lex);
  #if 0
@@ -702,14 +885,14 @@ int lex_do(lex_file *lex)
          continue;
      }
  
-    lex->sline = lex->line;
-    lex->tok.ctx.line = lex->sline;
-    lex->tok.ctx.file = lex->name;
-
      if (lex->flags.preprocessing && (ch == TOKEN_WHITE || ch == TOKEN_EOL || ch == TOKEN_FATAL)) {
          return (lex->tok.ttype = ch);
      }
  
+    lex->sline = lex->line;
+    lex->tok.ctx.line = lex->sline;
+    lex->tok.ctx.file = lex->name;
+
      if (lex->eof)
          return (lex->tok.ttype = TOKEN_FATAL);
  
@@ -825,9 +1008,9 @@ int lex_do(lex_file *lex)
  
          if (!strcmp(v, "flush"))
          {
-            size_t frame;
-            for (frame = 0; frame < vec_size(lex->frames); ++frame)
-                mem_d(lex->frames[frame].name);
+            size_t fi;
+            for (fi = 0; fi < vec_size(lex->frames); ++fi)
+                mem_d(lex->frames[fi].name);
              vec_free(lex->frames);
              /* skip line (fteqcc does it too) */
              ch = lex_getch(lex);
@@ -866,6 +1049,8 @@ int lex_do(lex_file *lex)
      {
          case '[':
          case '(':
+        case ':':
+        case '?':
              lex_tokench(lex, ch);
              lex_endtoken(lex);
              if (lex->flags.noops)
@@ -874,7 +1059,6 @@ int lex_do(lex_file *lex)
                  return (lex->tok.ttype = TOKEN_OPERATOR);
          case ')':
          case ';':
-        case ':':
          case '{':
          case '}':
          case ']':
@@ -894,8 +1078,10 @@ int lex_do(lex_file *lex)
           */
          switch (ch)
          {
+            /*
              case '+':
              case '-':
+            */
              case '*':
              case '/':
              case '<':
@@ -928,7 +1114,7 @@ int lex_do(lex_file *lex)
              nextch = lex_getch(lex);
              if (nextch != '.') {
                  lex_ungetch(lex, nextch);
-                lex_ungetch(lex, nextch);
+                lex_ungetch(lex, '.');
                  lex_endtoken(lex);
                  return (lex->tok.ttype = ch);
              }
@@ -958,6 +1144,16 @@ int lex_do(lex_file *lex)
              lex_tokench(lex, nextch);
          } else if (ch == '-' && nextch == '>') {
              lex_tokench(lex, nextch);
+        } else if (ch == '&' && nextch == '~') {
+            thirdch = lex_getch(lex);
+            if (thirdch != '=') {
+                lex_ungetch(lex, thirdch);
+                lex_ungetch(lex, nextch);
+            }
+            else {
+                lex_tokench(lex, nextch);
+                lex_tokench(lex, thirdch);
+            }
          } else
              lex_ungetch(lex, nextch);
  
@@ -1019,29 +1215,17 @@ int lex_do(lex_file *lex)
          } else if (!strcmp(v, "vector")) {
              lex->tok.ttype = TOKEN_TYPENAME;
              lex->tok.constval.t = TYPE_VECTOR;
-        } else if (!strcmp(v, "for")  ||
-                 !strcmp(v, "while")  ||
-                 !strcmp(v, "do")     ||
-                 !strcmp(v, "if")     ||
-                 !strcmp(v, "else")   ||
-                 !strcmp(v, "local")  ||
-                 !strcmp(v, "return") ||
-                 !strcmp(v, "not")    ||
-                 !strcmp(v, "const"))
-        {
-            lex->tok.ttype = TOKEN_KEYWORD;
-        }
-        else if (opts_standard != COMPILER_QCC)
-        {
-            /* other standards reserve these keywords */
-            if (!strcmp(v, "switch") ||
-                !strcmp(v, "struct") ||
-                !strcmp(v, "union")  ||
-                !strcmp(v, "break")  ||
-                !strcmp(v, "continue") ||
-                !strcmp(v, "var"))
-            {
-                lex->tok.ttype = TOKEN_KEYWORD;
+        } else {
+            size_t kw;
+            for (kw = 0; kw < num_keywords_qc; ++kw) {
+                if (!strcmp(v, keywords_qc[kw]))
+                    return (lex->tok.ttype = TOKEN_KEYWORD);
+            }
+            if (opts_standard != COMPILER_QCC) {
+                for (kw = 0; kw < num_keywords_fg; ++kw) {
+                    if (!strcmp(v, keywords_fg[kw]))
+                        return (lex->tok.ttype = TOKEN_KEYWORD);
+                }
              }
          }