change the value of TOKEN_EOF

[xonotic/gmqcc.git] / lexer.h
diff --git a/lexer.h b/lexer.h

index f49b8ff2fe349a6407c5955affba02e86922303f..24ac39a63f7f574d1e9a8a7dfc1f8236cc51612a 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -1,33 +1,57 @@
-#ifndef GMQCC_LEXER_HDR_
-#define GMQCC_LEXER_HDR_
+/*
+ * Copyright (C) 2012, 2013
+ *     Wolfgang Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef GMQCC_LEXER_HDR
+#define GMQCC_LEXER_HDR
  
  typedef struct token_s token;
  
-#include "ast.h"
-
  struct token_s {
-       int ttype;
+    int ttype;
  
-       MEM_VECTOR_MAKE(char, value);
+    char *value;
  
-       union {
-               vector v;
-               int    i;
-               double f;
-               int    t; /* type */
-       } constval;
+    union {
+        vector v;
+        int    i;
+        double f;
+        int    t; /* type */
+    } constval;
  
-       struct token_s *next;
-       struct token_s *prev;
+#if 0
+    struct token_s *next;
+    struct token_s *prev;
+#endif
  
-       lex_ctx ctx;
+    lex_ctx ctx;
  };
  
+#if 0
  token* token_new();
  void   token_delete(token*);
  token* token_copy(const token *cp);
  void   token_delete_all(token *t);
  token* token_copy_all(const token *cp);
+#endif
  
  /* Lexer
   *
@@ -45,13 +69,29 @@ enum {
  
      TOKEN_KEYWORD, /* loop */
  
+    TOKEN_DOTS, /* 3 dots, ... */
+
+    TOKEN_ATTRIBUTE_OPEN,  /* [[ */
+    TOKEN_ATTRIBUTE_CLOSE, /* ]] */
+
+    TOKEN_VA_ARGS, /* for the ftepp only */
+    TOKEN_VA_ARGS_ARRAY, /* for the ftepp only */
+    TOKEN_VA_COUNT,     /* to get the count of vaargs */
+
      TOKEN_STRINGCONST, /* not the typename but an actual "string" */
      TOKEN_CHARCONST,
      TOKEN_VECTORCONST,
      TOKEN_INTCONST,
      TOKEN_FLOATCONST,
  
-    TOKEN_EOF,
+    TOKEN_WHITE,
+    TOKEN_EOL,
+
+    /* if we add additional tokens before this, the exposed API
+     * should not be broken anyway, but EOF/ERROR/... should
+     * still be at the bottom
+     */
+    TOKEN_EOF = 1024,
  
      /* We use '< TOKEN_ERROR', so TOKEN_FATAL must come after it and any
       * other error related tokens as well
@@ -60,48 +100,47 @@ enum {
      TOKEN_FATAL /* internal error, eg out of memory */
  };
  
-static const char *_tokennames[] = {
-    "TOKEN_START",
-    "TOKEN_IDENT",
-    "TOKEN_TYPENAME",
-    "TOKEN_OPERATOR",
-    "TOKEN_KEYWORD",
-    "TOKEN_STRINGCONST",
-    "TOKEN_CHARCONST",
-    "TOKEN_VECTORCONST",
-    "TOKEN_INTCONST",
-    "TOKEN_FLOATCONST",
-    "TOKEN_EOF",
-    "TOKEN_ERROR",
-    "TOKEN_FATAL",
-};
-typedef int
-_all_tokennames_added_[
-       ((TOKEN_FATAL - TOKEN_START + 1) ==
-        (sizeof(_tokennames)/sizeof(_tokennames[0])))
-       ? 1 : -1];
-
  typedef struct {
-       FILE   *file;
-       char   *name;
-       size_t  line;
-       size_t  sline; /* line at the start of a token */
+    char *name;
+    int   value;
+} frame_macro;
  
-       char    peek[256];
-       size_t  peekpos;
+typedef struct lex_file_s {
+    FILE   *file;
+    const char *open_string;
+    size_t      open_string_length;
+    size_t      open_string_pos;
  
-       token  *tok;
+    char   *name;
+    size_t  line;
+    size_t  sline; /* line at the start of a token */
  
-       struct {
-           bool noops;
-       } flags;
-} lex_file;
+    int     peek[256];
+    size_t  peekpos;
+
+    bool    eof;
+
+    token   tok; /* not a pointer anymore */
+
+    struct {
+        bool noops;
+        bool nodigraphs; /* used when lexing string constants */
+        bool preprocessing; /* whitespace and EOLs become actual tokens */
+        bool mergelines; /* backslash at the end of a line escapes the newline */
+    } flags;
  
-MEM_VECTOR_PROTO(lex_file, char, token);
+    int framevalue;
+    frame_macro *frames;
+    char *modelname;
+
+    size_t push_line;
+} lex_file;
  
  lex_file* lex_open (const char *file);
+lex_file* lex_open_string(const char *str, size_t len, const char *name);
  void      lex_close(lex_file   *lex);
  int       lex_do   (lex_file   *lex);
+void      lex_cleanup(void);
  
  /* Parser
   *
@@ -117,91 +156,185 @@ enum {
  
  typedef struct {
      const char   *op;
+    unsigned int operands;
+    unsigned int id;
      unsigned int assoc;
-    unsigned int prec;
+    signed int   prec;
      unsigned int flags;
  } oper_info;
  
-static const oper_info operators[] = {
-    { "++",  ASSOC_LEFT,  16, OP_SUFFIX},
-    { "--",  ASSOC_LEFT,  16, OP_SUFFIX},
+#define opid1(a) (a)
+#define opid2(a,b) ((a<<8)|b)
+#define opid3(a,b,c) ((a<<16)|(b<<8)|c)
  
-    { ".",   ASSOC_LEFT,  15, 0 },
+static const oper_info c_operators[] = {
+    { "(",   0, opid1('('),         ASSOC_LEFT,  99, OP_PREFIX}, /* paren expression - non function call */
  
-    { "!",   ASSOC_RIGHT, 14, 0 },
-    { "~",   ASSOC_RIGHT, 14, 0 },
-    { "+",   ASSOC_RIGHT, 14, OP_PREFIX },
-    { "-",   ASSOC_RIGHT, 14, OP_PREFIX },
-    { "++",  ASSOC_RIGHT, 14, OP_PREFIX },
-    { "--",  ASSOC_RIGHT, 14, OP_PREFIX },
-/*  { "&",   ASSOC_RIGHT, 14, OP_PREFIX }, */
+    { "++",  1, opid3('S','+','+'), ASSOC_LEFT,  17, OP_SUFFIX},
+    { "--",  1, opid3('S','-','-'), ASSOC_LEFT,  17, OP_SUFFIX},
+    { ".",   2, opid1('.'),         ASSOC_LEFT,  17, 0 },
+    { "(",   0, opid1('('),         ASSOC_LEFT,  17, 0 }, /* function call */
+    { "[",   2, opid1('['),         ASSOC_LEFT,  17, 0 }, /* array subscript */
  
-    { "*",   ASSOC_LEFT,  13, 0 },
-    { "/",   ASSOC_LEFT,  13, 0 },
-    { "%",   ASSOC_LEFT,  13, 0 },
+    { "++",  1, opid3('+','+','P'), ASSOC_RIGHT, 16, OP_PREFIX },
+    { "--",  1, opid3('-','-','P'), ASSOC_RIGHT, 16, OP_PREFIX },
  
-    { "+",   ASSOC_LEFT,  12, 0 },
-    { "-",   ASSOC_LEFT,  12, 0 },
+    { "**",  2, opid2('*', '*'),    ASSOC_RIGHT, 15, 0 },
  
-    { "<<",  ASSOC_LEFT,  11, 0 },
-    { ">>",  ASSOC_LEFT,  11, 0 },
+    { "!",   1, opid2('!', 'P'),    ASSOC_RIGHT, 14, OP_PREFIX },
+    { "~",   1, opid2('~', 'P'),    ASSOC_RIGHT, 14, OP_PREFIX },
+    { "+",   1, opid2('+','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+    { "-",   1, opid2('-','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+/*  { "&",   1, opid2('&','P'),     ASSOC_RIGHT, 14, OP_PREFIX }, */
  
-    { "<",   ASSOC_LEFT,  10, 0 },
-    { ">",   ASSOC_LEFT,  10, 0 },
-    { "<=",  ASSOC_LEFT,  10, 0 },
-    { ">=",  ASSOC_LEFT,  10, 0 },
+    { "*",   2, opid1('*'),         ASSOC_LEFT,  13, 0 },
+    { "/",   2, opid1('/'),         ASSOC_LEFT,  13, 0 },
+    { "%",   2, opid1('%'),         ASSOC_LEFT,  13, 0 },
  
-    { "==",  ASSOC_LEFT,  9,  0 },
-    { "!=",  ASSOC_LEFT,  9,  0 },
+    { "+",   2, opid1('+'),         ASSOC_LEFT,  12, 0 },
+    { "-",   2, opid1('-'),         ASSOC_LEFT,  12, 0 },
  
-    { "&",   ASSOC_LEFT,  8,  0 },
+    { "<<",  2, opid2('<','<'),     ASSOC_LEFT,  11, 0 },
+    { ">>",  2, opid2('>','>'),     ASSOC_LEFT,  11, 0 },
  
-    { "^",   ASSOC_LEFT,  7,  0 },
+    { "<",   2, opid1('<'),         ASSOC_LEFT,  10, 0 },
+    { ">",   2, opid1('>'),         ASSOC_LEFT,  10, 0 },
+    { "<=>", 2, opid3('<','=','>'), ASSOC_LEFT,  10, 0 },
+    { "<=",  2, opid2('<','='),     ASSOC_LEFT,  10, 0 },
+    { ">=",  2, opid2('>','='),     ASSOC_LEFT,  10, 0 },
  
-    { "|",   ASSOC_LEFT,  6,  0 },
+    { "==",  2, opid2('=','='),     ASSOC_LEFT,  9,  0 },
+    { "!=",  2, opid2('!','='),     ASSOC_LEFT,  9,  0 },
  
-    { "&&",  ASSOC_LEFT,  5,  0 },
+    { "&",   2, opid1('&'),         ASSOC_LEFT,  8,  0 },
  
-    { "||",  ASSOC_LEFT,  4,  0 },
+    { "^",   2, opid1('^'),         ASSOC_LEFT,  7,  0 },
  
-    { "?",   ASSOC_RIGHT, 3,  0 },
+    { "|",   2, opid1('|'),         ASSOC_LEFT,  6,  0 },
  
-    { "=",   ASSOC_RIGHT, 2,  0 },
-    { "+=",  ASSOC_RIGHT, 2,  0 },
-    { "-=",  ASSOC_RIGHT, 2,  0 },
-    { "*=",  ASSOC_RIGHT, 2,  0 },
-    { "/=",  ASSOC_RIGHT, 2,  0 },
-    { "%=",  ASSOC_RIGHT, 2,  0 },
-    { ">>=", ASSOC_RIGHT, 2,  0 },
-    { "<<=", ASSOC_RIGHT, 2,  0 },
-    { "&=",  ASSOC_RIGHT, 2,  0 },
-    { "^=",  ASSOC_RIGHT, 2,  0 },
-    { "|=",  ASSOC_RIGHT, 2,  0 },
-};
+    { "&&",  2, opid2('&','&'),     ASSOC_LEFT,  5,  0 },
  
-typedef struct
-{
-       lex_file *lex;
-       int      error;
-       lex_ctx  ctx;
+    { "||",  2, opid2('|','|'),     ASSOC_LEFT,  4,  0 },
  
-       token    *tokens;
-       token    *lastok;
+    { "?",   3, opid2('?',':'),     ASSOC_RIGHT, 3,  0 },
  
-       token    *tok; /* current token */
+    { "=",   2, opid1('='),         ASSOC_RIGHT, 2,  0 },
+    { "+=",  2, opid2('+','='),     ASSOC_RIGHT, 2,  0 },
+    { "-=",  2, opid2('-','='),     ASSOC_RIGHT, 2,  0 },
+    { "*=",  2, opid2('*','='),     ASSOC_RIGHT, 2,  0 },
+    { "/=",  2, opid2('/','='),     ASSOC_RIGHT, 2,  0 },
+    { "%=",  2, opid2('%','='),     ASSOC_RIGHT, 2,  0 },
+    { ">>=", 2, opid3('>','>','='), ASSOC_RIGHT, 2,  0 },
+    { "<<=", 2, opid3('<','<','='), ASSOC_RIGHT, 2,  0 },
+    { "&=",  2, opid2('&','='),     ASSOC_RIGHT, 2,  0 },
+    { "^=",  2, opid2('^','='),     ASSOC_RIGHT, 2,  0 },
+    { "|=",  2, opid2('|','='),     ASSOC_RIGHT, 2,  0 },
+    { "&~=", 2, opid3('&','~','='), ASSOC_RIGHT, 2,  0 },
  
-       MEM_VECTOR_MAKE(ast_value*, globals);
-} parse_file;
+    { ":",   0, opid2(':','?'),     ASSOC_RIGHT, 1,  0 },
  
-MEM_VECTOR_PROTO(parse_file, ast_value*, globals);
-
-parse_file* parse_open(const char *file);
-void        parse_file_close(parse_file*);
-
-bool        parse(parse_file*);
-
-bool        parse_iskey(parse_file *self, const char *ident);
+    { ",",   2, opid1(','),         ASSOC_LEFT,  0,  0 }
+};
+static const size_t c_operator_count = (sizeof(c_operators) / sizeof(c_operators[0]));
+
+static const oper_info fte_operators[] = {
+    { "(",   0, opid1('('),         ASSOC_LEFT,  99, OP_PREFIX}, /* paren expression - non function call */
+
+    { "++",  1, opid3('S','+','+'), ASSOC_LEFT,  15, OP_SUFFIX},
+    { "--",  1, opid3('S','-','-'), ASSOC_LEFT,  15, OP_SUFFIX},
+    { ".",   2, opid1('.'),         ASSOC_LEFT,  15, 0 },
+    { "(",   0, opid1('('),         ASSOC_LEFT,  15, 0 }, /* function call */
+    { "[",   2, opid1('['),         ASSOC_LEFT,  15, 0 }, /* array subscript */
+
+    { "!",   1, opid2('!', 'P'),    ASSOC_RIGHT, 14, OP_PREFIX },
+    { "+",   1, opid2('+','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+    { "-",   1, opid2('-','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+    { "++",  1, opid3('+','+','P'), ASSOC_RIGHT, 14, OP_PREFIX },
+    { "--",  1, opid3('-','-','P'), ASSOC_RIGHT, 14, OP_PREFIX },
+
+    { "*",   2, opid1('*'),         ASSOC_LEFT,  13, 0 },
+    { "/",   2, opid1('/'),         ASSOC_LEFT,  13, 0 },
+    { "&",   2, opid1('&'),         ASSOC_LEFT,  13, 0 },
+    { "|",   2, opid1('|'),         ASSOC_LEFT,  13, 0 },
+
+    { "+",   2, opid1('+'),         ASSOC_LEFT,  12, 0 },
+    { "-",   2, opid1('-'),         ASSOC_LEFT,  12, 0 },
+
+    { "<<",  2, opid2('<','<'),     ASSOC_LEFT,  11, 0 },
+    { ">>",  2, opid2('>','>'),     ASSOC_LEFT,  11, 0 },
+
+    { "<",   2, opid1('<'),         ASSOC_LEFT,  10, 0 },
+    { ">",   2, opid1('>'),         ASSOC_LEFT,  10, 0 },
+    { "<=",  2, opid2('<','='),     ASSOC_LEFT,  10, 0 },
+    { ">=",  2, opid2('>','='),     ASSOC_LEFT,  10, 0 },
+    { "==",  2, opid2('=','='),     ASSOC_LEFT,  10,  0 },
+    { "!=",  2, opid2('!','='),     ASSOC_LEFT,  10,  0 },
+
+    { "?",   3, opid2('?',':'),     ASSOC_RIGHT, 9,  0 },
+
+    { "=",   2, opid1('='),         ASSOC_RIGHT, 8,  0 },
+    { "+=",  2, opid2('+','='),     ASSOC_RIGHT, 8,  0 },
+    { "-=",  2, opid2('-','='),     ASSOC_RIGHT, 8,  0 },
+    { "*=",  2, opid2('*','='),     ASSOC_RIGHT, 8,  0 },
+    { "/=",  2, opid2('/','='),     ASSOC_RIGHT, 8,  0 },
+    { "%=",  2, opid2('%','='),     ASSOC_RIGHT, 8,  0 },
+    { "&=",  2, opid2('&','='),     ASSOC_RIGHT, 8,  0 },
+    { "|=",  2, opid2('|','='),     ASSOC_RIGHT, 8,  0 },
+    { "&~=", 2, opid3('&','~','='), ASSOC_RIGHT, 8,  0 },
+
+    { "&&",  2, opid2('&','&'),     ASSOC_LEFT,  5,  0 },
+    { "||",  2, opid2('|','|'),     ASSOC_LEFT,  5,  0 },
+
+    /* Leave precedence 3 for : with -fcorrect-ternary */
+    { ",",   2, opid1(','),         ASSOC_LEFT,  2,  0 },
+    { ":",   0, opid2(':','?'),     ASSOC_RIGHT, 1,  0 }
+};
+static const size_t fte_operator_count = (sizeof(fte_operators) / sizeof(fte_operators[0]));
+
+static const oper_info qcc_operators[] = {
+    { "(",   0, opid1('('),         ASSOC_LEFT,  99, OP_PREFIX}, /* paren expression - non function call */
+
+    { ".",   2, opid1('.'),         ASSOC_LEFT,  15, 0 },
+    { "(",   0, opid1('('),         ASSOC_LEFT,  15, 0 }, /* function call */
+    { "[",   2, opid1('['),         ASSOC_LEFT,  15, 0 }, /* array subscript */
+
+    { "!",   1, opid2('!', 'P'),    ASSOC_RIGHT, 14, OP_PREFIX },
+    { "+",   1, opid2('+','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+    { "-",   1, opid2('-','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+
+    { "*",   2, opid1('*'),         ASSOC_LEFT,  13, 0 },
+    { "/",   2, opid1('/'),         ASSOC_LEFT,  13, 0 },
+    { "&",   2, opid1('&'),         ASSOC_LEFT,  13, 0 },
+    { "|",   2, opid1('|'),         ASSOC_LEFT,  13, 0 },
+
+    { "+",   2, opid1('+'),         ASSOC_LEFT,  12, 0 },
+    { "-",   2, opid1('-'),         ASSOC_LEFT,  12, 0 },
+
+    { "<",   2, opid1('<'),         ASSOC_LEFT,  10, 0 },
+    { ">",   2, opid1('>'),         ASSOC_LEFT,  10, 0 },
+    { "<=",  2, opid2('<','='),     ASSOC_LEFT,  10, 0 },
+    { ">=",  2, opid2('>','='),     ASSOC_LEFT,  10, 0 },
+    { "==",  2, opid2('=','='),     ASSOC_LEFT,  10,  0 },
+    { "!=",  2, opid2('!','='),     ASSOC_LEFT,  10,  0 },
+
+    { "=",   2, opid1('='),         ASSOC_RIGHT, 8,  0 },
+    { "+=",  2, opid2('+','='),     ASSOC_RIGHT, 8,  0 },
+    { "-=",  2, opid2('-','='),     ASSOC_RIGHT, 8,  0 },
+    { "*=",  2, opid2('*','='),     ASSOC_RIGHT, 8,  0 },
+    { "/=",  2, opid2('/','='),     ASSOC_RIGHT, 8,  0 },
+    { "%=",  2, opid2('%','='),     ASSOC_RIGHT, 8,  0 },
+    { "&=",  2, opid2('&','='),     ASSOC_RIGHT, 8,  0 },
+    { "|=",  2, opid2('|','='),     ASSOC_RIGHT, 8,  0 },
+
+    { "&&",  2, opid2('&','&'),     ASSOC_LEFT,  5,  0 },
+    { "||",  2, opid2('|','|'),     ASSOC_LEFT,  5,  0 },
+
+    { ",",   2, opid1(','),         ASSOC_LEFT,  2,  0 },
+};
+static const size_t qcc_operator_count = (sizeof(qcc_operators) / sizeof(qcc_operators[0]));
  
+extern const oper_info *operators;
+extern size_t           operator_count;
  void lexerror(lex_file*, const char *fmt, ...);
  
  #endif