Merge branch 'master' into blub/bc3

author Wolfgang (Blub) Bumiller <blub@speed.at>

Fri, 10 Aug 2012 18:48:42 +0000 (20:48 +0200)

committer Wolfgang (Blub) Bumiller <blub@speed.at>

Fri, 10 Aug 2012 18:48:42 +0000 (20:48 +0200)
author Wolfgang (Blub) Bumiller <blub@speed.at>
Fri, 10 Aug 2012 18:48:42 +0000 (20:48 +0200)
committer Wolfgang (Blub) Bumiller <blub@speed.at>
Fri, 10 Aug 2012 18:48:42 +0000 (20:48 +0200)
diff --git a/Makefile b/Makefile

index 75d4921ea0c2067ed9260b06d50ff7de7c69c417..d326576da571977ee15a368ff4ab61b2f637f2b5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -16,18 +16,14 @@ ifeq ($(CC), clang)
                 -Wno-format-nonliteral
  
  endif
-OBJ     = lex.o       \
-          error.o     \
-          parse.o     \
-          typedef.o   \
+OBJ     = \
            util.o      \
            code.o      \
-          asm.o       \
            ast.o       \
-          ir.o 
+          ir.o
  OBJ_A = test/ast-test.o
  OBJ_I = test/ir-test.o
-OBJ_C = main.o
+OBJ_C = main.o lexer.o parser.o
  OBJ_X = exec-standalone.o util.o
  
  #default is compiler only
diff --git a/ast.c b/ast.c

index c19fad2656ba70d43a2d3d3b242c512548bc9ce4..5f947b0e02903709cde800090c03f877a1b31048 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -202,6 +202,18 @@ ast_binary* ast_binary_new(lex_ctx ctx, int op,
      self->left = left;
      self->right = right;
  
+    if (op >= INSTR_EQ_F && op <= INSTR_GT)
+        self->expression.vtype = TYPE_FLOAT;
+    else if (op == INSTR_AND || op == INSTR_OR ||
+             op == INSTR_BITAND || op == INSTR_BITOR)
+        self->expression.vtype = TYPE_FLOAT;
+    else if (op == INSTR_MUL_VF || op == INSTR_MUL_FV)
+        self->expression.vtype = TYPE_VECTOR;
+    else if (op == INSTR_MUL_V)
+        self->expression.vtype = TYPE_FLOAT;
+    else
+        self->expression.vtype = left->expression.vtype;
+
      return self;
  }
  
@@ -617,8 +629,10 @@ bool ast_global_codegen(ast_value *self, ir_builder *ir)
      }
  
      v = ir_builder_create_global(ir, self->name, self->expression.vtype);
-    if (!v)
+    if (!v) {
+        printf("ir_builder_create_global failed\n");
          return false;
+    }
  
      if (self->isconst) {
          switch (self->expression.vtype)
diff --git a/ast.h b/ast.h

index 5a09b434764598f282d7ecf10f8981a8b65d2eb7..3b7ebdca0c22aceea61ead4ec6baceeb5ab3c0e7 100644 (file)
--- a/ast.h
+++ b/ast.h
@@ -133,6 +133,8 @@ struct ast_value_s
          const char   *vstring;
          int           ventity;
          ast_function *vfunc;
+        quaternion    vquat;
+        matrix        vmat;
      } constval;
  
      ir_value *ir_v;
diff --git a/gmqcc.h b/gmqcc.h

index 7f7efce32c70468539996fdc225d016fdcf508ae..4e1c641c2579f8e66842e38dc71e44ad4878de87 100644 (file)
--- a/gmqcc.h
+++ b/gmqcc.h
@@ -179,95 +179,6 @@ typedef char int64_size_is_correct  [sizeof(int64_t)  == 8?1:-1];
  typedef char uintptr_size_is_correct[sizeof(intptr_t) == sizeof(int*)?1:-1];
  typedef char intptr_size_is_correct [sizeof(uintptr_t)== sizeof(int*)?1:-1];
  
-/*===================================================================*/
-/*============================ lex.c ================================*/
-/*===================================================================*/
-typedef struct lex_file_t {
-    FILE *file;        /* file handler */
-    char *name;        /* name of file */
-    char  peek  [5];
-    char  lastok[8192];
-
-    int   last;    /* last token                   */
-    int   current; /* current token                */
-    int   length;  /* bytes left to parse          */
-    int   size;    /* never changes (size of file) */
-    int   line;    /* what line are we on?         */
-} lex_file;
-
-/*
- * It's important that this table never exceed 32 keywords, the ascii
- * table starts at 33 (and we don't want conflicts)
- */
-enum {
-    TOKEN_DO       ,
-    TOKEN_ELSE     ,
-    TOKEN_IF       ,
-    TOKEN_WHILE    ,
-    TOKEN_BREAK    ,
-    TOKEN_CONTINUE ,
-    TOKEN_RETURN   ,
-    TOKEN_GOTO     ,
-    TOKEN_FOR      ,   /* extension */
-    TOKEN_TYPEDEF  ,   /* extension */
-
-    /* ensure the token types are out of the  */
-    /* bounds of anyothers that may conflict. */
-    TOKEN_FLOAT    = 110,
-    TOKEN_VECTOR        ,
-    TOKEN_STRING        ,
-    TOKEN_ENTITY        ,
-    TOKEN_VOID
-};
-
-/*
- * Lexer state constants, these are numbers for where exactly in
- * the lexing the lexer is at. Or where it decided to stop if a lexer
- * error occurs.  These numbers must be > where the ascii-table ends
- * and > the last type token which is TOKEN_VOID
- */
-enum {
-    LEX_COMMENT = 1128,
-    LEX_CHRLIT        ,
-    LEX_STRLIT        ,
-    LEX_IDENT
-};
-
-int       lex_token  (lex_file *);
-void      lex_reset  (lex_file *);
-void      lex_close  (lex_file *);
-void      lex_parse  (lex_file *);
-lex_file *lex_include(lex_file *, const char *);
-void      lex_init   (const char *, lex_file **);
-
-/*===================================================================*/
-/*========================== error.c ================================*/
-/*===================================================================*/
-#define ERROR_LEX      (SHRT_MAX+0)
-#define ERROR_PARSE    (SHRT_MAX+1)
-#define ERROR_INTERNAL (SHRT_MAX+2)
-#define ERROR_COMPILER (SHRT_MAX+3)
-#define ERROR_PREPRO   (SHRT_MAX+4)
-int error(lex_file *, int, const char *, ...);
-
-/*===================================================================*/
-/*========================== parse.c ================================*/
-/*===================================================================*/
-int parse_gen(lex_file *);
-
-/*===================================================================*/
-/*========================== typedef.c ==============================*/
-/*===================================================================*/
-typedef struct typedef_node_t {
-    char      *name;
-} typedef_node;
-
-void          typedef_init();
-void          typedef_clear();
-typedef_node *typedef_find(const char *);
-int           typedef_add (lex_file *file, const char *, const char *);
-
-
  /*===================================================================*/
  /*=========================== util.c ================================*/
  /*===================================================================*/
@@ -367,12 +278,16 @@ enum {
      TYPE_FIELD    ,
      TYPE_FUNCTION ,
      TYPE_POINTER  ,
-    /* TYPE_INTEGER  , */
+    TYPE_INTEGER  ,
+    TYPE_QUATERNION  ,
+    TYPE_MATRIX  ,
      TYPE_VARIANT  ,
  
      TYPE_COUNT
  };
  
+extern const char *type_name[TYPE_COUNT];
+
  extern size_t type_sizeof[TYPE_COUNT];
  extern uint16_t type_store_instr[TYPE_COUNT];
  /* could use type_store_instr + INSTR_STOREP_F - INSTR_STORE_F
@@ -485,8 +400,8 @@ enum {
      INSTR_DONE,
      INSTR_MUL_F,
      INSTR_MUL_V,
-    INSTR_MUL_FV,
      INSTR_MUL_VF,
+    INSTR_MUL_FV,
      INSTR_DIV_F,
      INSTR_ADD_F,
      INSTR_ADD_V,
@@ -549,6 +464,23 @@ enum {
      INSTR_BITAND,
      INSTR_BITOR,
  
+/* warning: will be reordered */
+    INSTR_MUL_Q,
+    INSTR_MUL_QF,
+    INSTR_MUL_M,
+    INSTR_MUL_MF,
+    INSTR_EQ_Q,
+    INSTR_EQ_M,
+    INSTR_NE_Q,
+    INSTR_NE_M,
+    INSTR_LOAD_Q,
+    INSTR_LOAD_M,
+    INSTR_STORE_Q,
+    INSTR_STORE_M,
+    INSTR_STOREP_Q,
+    INSTR_STOREP_M,
+    INSTR_INV_Q,
+    INSTR_INV_M,
      /*
       * Virtual instructions used by the assembler
       * keep at the end but before virtual instructions
@@ -608,8 +540,8 @@ static const struct {
      { "DONE"      , 1, 4 },
      { "MUL_F"     , 3, 5 },
      { "MUL_V"     , 3, 5 },
-    { "MUL_FV"    , 3, 6 },
      { "MUL_VF"    , 3, 6 },
+    { "MUL_FV"    , 3, 6 },
      { "DIV"       , 0, 3 },
      { "ADD_F"     , 3, 5 },
      { "ADD_V"     , 3, 5 },
@@ -671,6 +603,24 @@ static const struct {
      { "OR"        , 0, 2 },
      { "BITAND"    , 0, 6 },
      { "BITOR"     , 0, 5 },
+
+    { "MUL_Q"     , 3, 5 },
+    { "MUL_QF"    , 3, 6 },
+    { "MUL_M"     , 3, 5 },
+    { "MUL_MF"    , 3, 6 },
+    { "EQ_Q"      , 0, 4 },
+    { "EQ_M"      , 0, 4 },
+    { "NE_Q"      , 0, 4 },
+    { "NE_M"      , 0, 4 },
+    { "FIELD_Q"   , 0, 7 },
+    { "FIELD_M"   , 0, 7 },
+    { "STORE_Q"   , 0, 7 },
+    { "STORE_M"   , 0, 7 },
+    { "STOREP_Q"  , 0, 8 },
+    { "STOREP_M"  , 0, 8 },
+    { "INV_Q"     , 0, 5 },
+    { "INV_M"     , 0, 5 },
+
      { "END"       , 0, 3 } /* virtual assembler instruction */
  };
  
@@ -831,6 +781,16 @@ void Tself##_##mem##_clear(Tself *self) \
      (owner)->mem##_alloc = 0;       \
  }
  
+#define MEM_VECTOR_MOVE(from, mem, to, tm)   \
+{                                            \
+    (to)->tm = (from)->mem;                  \
+    (to)->tm##_count = (from)->mem##_count;  \
+    (to)->tm##_alloc = (from)->mem##_alloc;  \
+    (from)->mem = NULL;                      \
+    (from)->mem##_count = 0;                 \
+    (from)->mem##_alloc = 0;                 \
+}
+
  #define MEM_VEC_FUNCTIONS(Tself, Twhat, mem) \
  MEM_VEC_FUN_REMOVE(Tself, Twhat, mem)        \
  MEM_VEC_FUN_ADD(Tself, Twhat, mem)
@@ -852,6 +812,14 @@ typedef struct {
      float x, y, z;
  } vector;
  
+typedef float matrix[4][4]; /* OpenGL layout */
+typedef float quaternion[4]; /* order: x, y, z, w */
+#define MATRIX(axis, elem) ((4*(axis)) + (elem))
+#define QUAT_X 0
+#define QUAT_Y 1
+#define QUAT_Z 2
+#define QUAT_W 3
+
  /*
   * A shallow copy of a lex_file to remember where which ast node
   * came from.
diff --git a/ir.c b/ir.c

index 42999c53564770bb53e3910993b630d57568f7ae..f31c224343ccf09d9c721c0b6b1b5d89d3245e1b 100644 (file)
--- a/ir.c
+++ b/ir.c
@@ -29,6 +29,23 @@
   * Type sizes used at multiple points in the IR codegen
   */
  
+const char *type_name[TYPE_COUNT] = {
+    "void",
+    "string",
+    "float",
+    "vector",
+    "entity",
+    "field",
+    "function",
+    "pointer",
+#if 0
+    "integer",
+#endif
+    "quaternion",
+    "matrix",
+    "variant"
+};
+
  size_t type_sizeof[TYPE_COUNT] = {
      1, /* TYPE_VOID     */
      1, /* TYPE_STRING   */
@@ -41,7 +58,9 @@ size_t type_sizeof[TYPE_COUNT] = {
  #if 0
      1, /* TYPE_INTEGER  */
  #endif
-    3, /* TYPE_VARIANT  */
+    4, /* TYPE_QUATERNION */
+    16, /* TYPE_MATRIX */
+    16, /* TYPE_VARIANT  */
  };
  
  uint16_t type_store_instr[TYPE_COUNT] = {
@@ -54,9 +73,12 @@ uint16_t type_store_instr[TYPE_COUNT] = {
      INSTR_STORE_FNC,
      INSTR_STORE_ENT, /* should use I */
  #if 0
-    INSTR_STORE_ENT, /* integer type */
+    INSTR_STORE_I, /* integer type */
  #endif
-    INSTR_STORE_V, /* variant, should never be accessed */
+    INSTR_STORE_Q,
+    INSTR_STORE_M,
+
+    INSTR_STORE_M, /* variant, should never be accessed */
  };
  
  uint16_t type_storep_instr[TYPE_COUNT] = {
@@ -71,7 +93,10 @@ uint16_t type_storep_instr[TYPE_COUNT] = {
  #if 0
      INSTR_STOREP_ENT, /* integer type */
  #endif
-    INSTR_STOREP_V, /* variant, should never be accessed */
+    INSTR_STOREP_Q,
+    INSTR_STOREP_M,
+
+    INSTR_STOREP_M, /* variant, should never be accessed */
  };
  
  MEM_VEC_FUNCTIONS(ir_value_vector, ir_value*, v)
@@ -187,9 +212,14 @@ ir_value* ir_builder_get_global(ir_builder *self, const char *name)
  
  ir_value* ir_builder_create_global(ir_builder *self, const char *name, int vtype)
  {
-    ir_value *ve = ir_builder_get_global(self, name);
-    if (ve) {
-        return NULL;
+    ir_value *ve;
+
+    if (name && name[0] != '#')
+    {
+        ve = ir_builder_get_global(self, name);
+        if (ve) {
+            return NULL;
+        }
      }
  
      ve = ir_value_var(name, store_global, vtype);
@@ -638,6 +668,24 @@ bool ir_value_set_vector(ir_value *self, vector v)
      return true;
  }
  
+bool ir_value_set_quaternion(ir_value *self, quaternion v)
+{
+    if (self->vtype != TYPE_QUATERNION)
+        return false;
+    memcpy(&self->constval.vquat, v, sizeof(self->constval.vquat));
+    self->isconst = true;
+    return true;
+}
+
+bool ir_value_set_matrix(ir_value *self, matrix v)
+{
+    if (self->vtype != TYPE_MATRIX)
+        return false;
+    memcpy(&self->constval.vmat, v, sizeof(self->constval.vmat));
+    self->isconst = true;
+    return true;
+}
+
  bool ir_value_set_string(ir_value *self, const char *str)
  {
      if (self->vtype != TYPE_STRING)
@@ -941,7 +989,6 @@ bool ir_block_create_storep(ir_block *self, ir_value *target, ir_value *what)
      vtype = what->vtype;
  
      op = type_storep_instr[vtype];
-
      return ir_block_create_store_op(self, op, target, what);
  }
  
@@ -1336,6 +1383,8 @@ ir_value* ir_block_create_load_from_ent(ir_block *self, const char *label, ir_va
          case TYPE_POINTER: op = INSTR_LOAD_I;   break;
          case TYPE_INTEGER: op = INSTR_LOAD_I;   break;
  #endif
+        case TYPE_QUATERNION: op = INSTR_LOAD_Q; break;
+        case TYPE_MATRIX:     op = INSTR_LOAD_M; break;
          default:
              return NULL;
      }
@@ -1439,12 +1488,22 @@ ir_value* ir_block_create_mul(ir_block *self,
              case TYPE_VECTOR:
                  op = INSTR_MUL_V;
                  break;
+            case TYPE_QUATERNION:
+                op = INSTR_MUL_Q;
+                break;
+            case TYPE_MATRIX:
+                op = INSTR_MUL_M;
+                break;
          }
      } else {
          if ( (l == TYPE_VECTOR && r == TYPE_FLOAT) )
              op = INSTR_MUL_VF;
          else if ( (l == TYPE_FLOAT && r == TYPE_VECTOR) )
              op = INSTR_MUL_FV;
+        else if ( (l == TYPE_QUATERNION && r == TYPE_FLOAT) )
+            op = INSTR_MUL_QF;
+        else if ( (l == TYPE_MATRIX && r == TYPE_FLOAT) )
+            op = INSTR_MUL_MF;
  #if 0
          else if ( (l == TYPE_VECTOR && r == TYPE_INTEGER) )
              op = INSTR_MUL_VI;
@@ -2500,6 +2559,8 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global)
          return global->code.globaladdr >= 0;
      }
      case TYPE_VECTOR:
+    case TYPE_QUATERNION:
+    case TYPE_MATRIX:
      {
          size_t d;
          if (code_defs_add(def) < 0)
@@ -2752,6 +2813,7 @@ void ir_value_dump(ir_value* v, int (*oprintf)(const char*, ...))
  {
         if (v->isconst) {
                 switch (v->vtype) {
+                   default:
                         case TYPE_VOID:
                                 oprintf("(void)");
                                 break;
diff --git a/ir.h b/ir.h

index e36aa086eb04f3d4a916d713dda275d37c6ad85f..a4dddcb937ea5a151f993e599f98b9a0f5b5881b 100644 (file)
--- a/ir.h
+++ b/ir.h
@@ -55,6 +55,8 @@ typedef struct ir_value_s {
          char    *vstring;
          struct ir_value_s *vpointer;
          struct ir_function_s *vfunc;
+        quaternion vquat;
+        matrix     vmat;
      } constval;
  
      struct {
@@ -97,6 +99,8 @@ bool GMQCC_WARN ir_value_set_string(ir_value*, const char *s);
  bool GMQCC_WARN ir_value_set_vector(ir_value*, vector v);
  /*bool   ir_value_set_pointer_v(ir_value*, ir_value* p); */
  /*bool   ir_value_set_pointer_i(ir_value*, int i);       */
+bool GMQCC_WARN ir_value_set_quaternion(ir_value*, quaternion v);
+bool GMQCC_WARN ir_value_set_matrix(ir_value*, matrix v);
  
  MEM_VECTOR_PROTO(ir_value, ir_life_entry_t, life);
  /* merge an instruction into the life-range */
diff --git a/lexer.c b/lexer.c

new file mode 100644 (file)

index 0000000..bfa7443
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,683 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "gmqcc.h"
+#include "lexer.h"
+
+MEM_VEC_FUNCTIONS(token, char, value)
+
+void lexerror(lex_file *lex, const char *fmt, ...)
+{
+       va_list ap;
+
+       if (lex)
+               printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline);
+       else
+               printf("error: ");
+
+       va_start(ap, fmt);
+       vprintf(fmt, ap);
+       va_end(ap);
+
+       printf("\n");
+}
+
+void lexwarn(lex_file *lex, int warn, const char *fmt, ...)
+{
+       va_list ap;
+
+       if (!OPTS_WARN(warn))
+           return;
+
+       if (lex)
+               printf("warning %s:%lu: ", lex->name, (unsigned long)lex->sline);
+       else
+               printf("warning: ");
+
+       va_start(ap, fmt);
+       vprintf(fmt, ap);
+       va_end(ap);
+
+       printf("\n");
+}
+
+token* token_new()
+{
+       token *tok = (token*)mem_a(sizeof(token));
+       if (!tok)
+               return NULL;
+       memset(tok, 0, sizeof(*tok));
+       return tok;
+}
+
+void token_delete(token *self)
+{
+       if (self->next && self->next->prev == self)
+               self->next->prev = self->prev;
+       if (self->prev && self->prev->next == self)
+               self->prev->next = self->next;
+       MEM_VECTOR_CLEAR(self, value);
+       mem_d(self);
+}
+
+token* token_copy(const token *cp)
+{
+       token* self = token_new();
+       if (!self)
+               return NULL;
+       /* copy the value */
+       self->value_alloc = cp->value_count + 1;
+       self->value_count = cp->value_count;
+       self->value = (char*)mem_a(self->value_alloc);
+       if (!self->value) {
+               mem_d(self);
+               return NULL;
+       }
+       memcpy(self->value, cp->value, cp->value_count);
+       self->value[self->value_alloc-1] = 0;
+
+       /* rest */
+       self->ctx = cp->ctx;
+       self->ttype = cp->ttype;
+       memcpy(&self->constval, &cp->constval, sizeof(self->constval));
+       return self;
+}
+
+void token_delete_all(token *t)
+{
+       token *n;
+
+       do {
+               n = t->next;
+               token_delete(t);
+               t = n;
+       } while(t);
+}
+
+token* token_copy_all(const token *cp)
+{
+       token *cur;
+       token *out;
+
+       out = cur = token_copy(cp);
+       if (!out)
+               return NULL;
+
+       while (cp->next) {
+               cp = cp->next;
+               cur->next = token_copy(cp);
+               if (!cur->next) {
+                       token_delete_all(out);
+                       return NULL;
+               }
+               cur->next->prev = cur;
+               cur = cur->next;
+       }
+
+       return out;
+}
+
+lex_file* lex_open(const char *file)
+{
+       lex_file *lex;
+       FILE *in = fopen(file, "rb");
+
+       if (!in) {
+               lexerror(NULL, "open failed: '%s'\n", file);
+               return NULL;
+       }
+
+       lex = (lex_file*)mem_a(sizeof(*lex));
+       if (!lex) {
+               fclose(in);
+               lexerror(NULL, "out of memory\n");
+               return NULL;
+       }
+
+       memset(lex, 0, sizeof(*lex));
+
+       lex->file = in;
+       lex->name = util_strdup(file);
+       lex->line = 1; /* we start counting at 1 */
+
+       lex->peekpos = 0;
+
+       return lex;
+}
+
+void lex_close(lex_file *lex)
+{
+       if (lex->file)
+               fclose(lex->file);
+       if (lex->tok)
+               token_delete(lex->tok);
+       mem_d(lex->name);
+       mem_d(lex);
+}
+
+/* Get or put-back data
+ * The following to functions do NOT understand what kind of data they
+ * are working on.
+ * The are merely wrapping get/put in order to count line numbers.
+ */
+static int lex_getch(lex_file *lex)
+{
+       int ch;
+
+       if (lex->peekpos) {
+               lex->peekpos--;
+               if (lex->peek[lex->peekpos] == '\n')
+                       lex->line++;
+               return lex->peek[lex->peekpos];
+       }
+
+       ch = fgetc(lex->file);
+       if (ch == '\n')
+               lex->line++;
+       return ch;
+}
+
+static void lex_ungetch(lex_file *lex, int ch)
+{
+       lex->peek[lex->peekpos++] = ch;
+       if (ch == '\n')
+               lex->line--;
+}
+
+/* classify characters
+ * some additions to the is*() functions of ctype.h
+ */
+
+/* Idents are alphanumberic, but they start with alpha or _ */
+static bool isident_start(int ch)
+{
+       return isalpha(ch) || ch == '_';
+}
+
+static bool isident(int ch)
+{
+       return isident_start(ch) || isdigit(ch);
+}
+
+/* isxdigit_only is used when we already know it's not a digit
+ * and want to see if it's a hex digit anyway.
+ */
+static bool isxdigit_only(int ch)
+{
+       return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
+}
+
+/* Skip whitespace and comments and return the first
+ * non-white character.
+ * As this makes use of the above getch() ungetch() functions,
+ * we don't need to care at all about line numbering anymore.
+ *
+ * In theory, this function should only be used at the beginning
+ * of lexing, or when we *know* the next character is part of the token.
+ * Otherwise, if the parser throws an error, the linenumber may not be
+ * the line of the error, but the line of the next token AFTER the error.
+ *
+ * This is currently only problematic when using c-like string-continuation,
+ * since comments and whitespaces are allowed between 2 such strings.
+ * Example:
+printf(   "line one\n"
+// A comment
+          "A continuation of the previous string"
+// This line is skipped
+      , foo);
+
+ * In this case, if the parse decides it didn't actually want a string,
+ * and uses lex->line to print an error, it will show the ', foo);' line's
+ * linenumber.
+ *
+ * On the other hand, the parser is supposed to remember the line of the next
+ * token's beginning. In this case we would want skipwhite() to be called
+ * AFTER reading a token, so that the parser, before reading the NEXT token,
+ * doesn't store teh *comment's* linenumber, but the actual token's linenumber.
+ *
+ * THIS SOLUTION
+ *    here is to store the line of the first character after skipping
+ *    the initial whitespace in lex->sline, this happens in lex_do.
+ */
+static int lex_skipwhite(lex_file *lex)
+{
+       int ch = 0;
+
+       do
+       {
+               ch = lex_getch(lex);
+               while (ch != EOF && isspace(ch)) ch = lex_getch(lex);
+
+               if (ch == '/') {
+                       ch = lex_getch(lex);
+                       if (ch == '/')
+                       {
+                               /* one line comment */
+                               ch = lex_getch(lex);
+
+                               /* check for special: '/', '/', '*', '/' */
+                               if (ch == '*') {
+                                       ch = lex_getch(lex);
+                                       if (ch == '/') {
+                                               ch = ' ';
+                                               continue;
+                                       }
+                               }
+
+                               while (ch != EOF && ch != '\n') {
+                                       ch = lex_getch(lex);
+                               }
+                               continue;
+                       }
+                       if (ch == '*')
+                       {
+                               /* multiline comment */
+                               while (ch != EOF)
+                               {
+                                       ch = lex_getch(lex);
+                                       if (ch == '*') {
+                                               ch = lex_getch(lex);
+                                               if (ch == '/') {
+                                                       ch = lex_getch(lex);
+                                                       break;
+                                               }
+                                       }
+                               }
+                               if (ch == '/') /* allow *//* direct following comment */
+                               {
+                                       lex_ungetch(lex, ch);
+                                       ch = ' '; /* cause TRUE in the isspace check */
+                               }
+                               continue;
+                       }
+                       /* Otherwise roll back to the slash and break out of the loop */
+                       lex_ungetch(lex, ch);
+                       ch = '/';
+                       break;
+               }
+       } while (ch != EOF && isspace(ch));
+
+       return ch;
+}
+
+/* Append a character to the token buffer */
+static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch)
+{
+       if (!token_value_add(lex->tok, ch)) {
+               lexerror(lex, "out of memory");
+               return false;
+       }
+       return true;
+}
+
+/* Append a trailing null-byte */
+static bool GMQCC_WARN lex_endtoken(lex_file *lex)
+{
+       if (!token_value_add(lex->tok, 0)) {
+               lexerror(lex, "out of memory");
+               return false;
+       }
+       lex->tok->value_count--;
+       return true;
+}
+
+/* Get a token */
+static bool GMQCC_WARN lex_finish_ident(lex_file *lex)
+{
+       int ch;
+
+       ch = lex_getch(lex);
+       while (ch != EOF && isident(ch))
+       {
+               if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               ch = lex_getch(lex);
+       }
+
+       /* last ch was not an ident ch: */
+       lex_ungetch(lex, ch);
+
+       return true;
+}
+
+static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote)
+{
+       int ch = 0;
+
+       while (ch != EOF)
+       {
+               ch = lex_getch(lex);
+               if (ch == quote)
+                       return TOKEN_STRINGCONST;
+
+               if (ch == '\\') {
+                       ch = lex_getch(lex);
+                       if (ch == EOF) {
+                               lexerror(lex, "unexpected end of file");
+                               lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
+                               return (lex->tok->ttype = TOKEN_ERROR);
+                       }
+
+            switch (ch) {
+            case '\\': break;
+            case 'a':  ch = '\a'; break;
+            case 'b':  ch = '\b'; break;
+            case 'r':  ch = '\r'; break;
+            case 'n':  ch = '\n'; break;
+            case 't':  ch = '\t'; break;
+            case 'f':  ch = '\f'; break;
+            case 'v':  ch = '\v'; break;
+            default:
+                lexwarn(lex, WARN_UNKNOWN_CONTROL_SEQUENCE, "unrecognized control sequence: \\%c", ch);
+                           /* so we just add the character plus backslash no matter what it actually is */
+                           if (!lex_tokench(lex, '\\'))
+                                   return (lex->tok->ttype = TOKEN_FATAL);
+            }
+            /* add the character finally */
+                       if (!lex_tokench(lex, ch))
+                               return (lex->tok->ttype = TOKEN_FATAL);
+               }
+               else if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+       }
+       lexerror(lex, "unexpected end of file within string constant");
+       lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */
+       return (lex->tok->ttype = TOKEN_ERROR);
+}
+
+static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch)
+{
+       bool ishex = false;
+
+       int  ch = lastch;
+
+       /* parse a number... */
+       lex->tok->ttype = TOKEN_INTCONST;
+
+       if (!lex_tokench(lex, ch))
+               return (lex->tok->ttype = TOKEN_FATAL);
+
+       ch = lex_getch(lex);
+       if (ch != '.' && !isdigit(ch))
+       {
+               if (lastch != '0' || ch != 'x')
+               {
+                       /* end of the number or EOF */
+                       lex_ungetch(lex, ch);
+                       if (!lex_endtoken(lex))
+                               return (lex->tok->ttype = TOKEN_FATAL);
+
+                       lex->tok->constval.i = lastch - '0';
+                       return lex->tok->ttype;
+               }
+
+               ishex = true;
+       }
+
+       /* EOF would have been caught above */
+
+       if (ch != '.')
+       {
+               if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               ch = lex_getch(lex);
+               while (isdigit(ch) || (ishex && isxdigit_only(ch)))
+               {
+                       if (!lex_tokench(lex, ch))
+                               return (lex->tok->ttype = TOKEN_FATAL);
+                       ch = lex_getch(lex);
+               }
+       }
+       /* NOT else, '.' can come from above as well */
+       if (ch == '.' && !ishex)
+       {
+               /* Allow floating comma in non-hex mode */
+               lex->tok->ttype = TOKEN_FLOATCONST;
+               if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+
+               /* continue digits-only */
+               ch = lex_getch(lex);
+               while (isdigit(ch))
+               {
+                       if (!lex_tokench(lex, ch))
+                               return (lex->tok->ttype = TOKEN_FATAL);
+                       ch = lex_getch(lex);
+               }
+       }
+       /* put back the last character */
+       /* but do not put back the trailing 'f' or a float */
+       if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f')
+               ch = lex_getch(lex);
+
+       /* generally we don't want words to follow numbers: */
+       if (isident(ch)) {
+               lexerror(lex, "unexpected trailing characters after number");
+               return (lex->tok->ttype = TOKEN_ERROR);
+       }
+       lex_ungetch(lex, ch);
+
+       if (!lex_endtoken(lex))
+               return (lex->tok->ttype = TOKEN_FATAL);
+       if (lex->tok->ttype == TOKEN_FLOATCONST)
+               lex->tok->constval.f = strtod(lex->tok->value, NULL);
+       else
+               lex->tok->constval.i = strtol(lex->tok->value, NULL, 0);
+       return lex->tok->ttype;
+}
+
+int lex_do(lex_file *lex)
+{
+       int ch, nextch;
+
+       if (lex->tok)
+               token_delete(lex->tok);
+       lex->tok = token_new();
+       if (!lex->tok)
+               return TOKEN_FATAL;
+
+       ch = lex_skipwhite(lex);
+       lex->sline = lex->line;
+       lex->tok->ctx.line = lex->sline;
+       lex->tok->ctx.file = lex->name;
+
+       if (ch == EOF)
+               return (lex->tok->ttype = TOKEN_EOF);
+
+       /* single-character tokens */
+       switch (ch)
+       {
+               case ';':
+               case '(':
+               case ')':
+               case '{':
+               case '}':
+               case '[':
+               case ']':
+
+               case '#':
+
+                       return (lex->tok->ttype = ch);
+               default:
+                       break;
+       }
+
+       if (lex->flags.noops)
+       {
+               /* Detect characters early which are normally
+                * operators OR PART of an operator.
+                */
+               switch (ch)
+               {
+                       case '+':
+                       case '-':
+                       case '*':
+                       case '/':
+                       case '<':
+                       case '>':
+                       case '=':
+                       case '&':
+                       case '|':
+                       case '^':
+                       case '~':
+                       case ',':
+                               return ch;
+                       default:
+                               break;
+               }
+       }
+
+       if (ch == ',') {
+           if (!lex_tokench(lex, ch) ||
+               !lex_endtoken(lex))
+           {
+               return (lex->tok->ttype = TOKEN_FATAL);
+           }
+           return (lex->tok->ttype = TOKEN_OPERATOR);
+       }
+
+       if (ch == '+' || ch == '-' || /* ++, --, +=, -=  and -> as well! */
+           ch == '>' || ch == '<' || /* <<, >>, <=, >= */
+           ch == '=' ||              /* == */
+           ch == '&' || ch == '|')   /* &&, ||, &=, |= */
+       {
+               if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+
+               nextch = lex_getch(lex);
+               if (nextch == ch || nextch == '=') {
+                       if (!lex_tokench(lex, nextch))
+                               return (lex->tok->ttype = TOKEN_FATAL);
+               } else if (ch == '-' && nextch == '>') {
+                       if (!lex_tokench(lex, nextch))
+                               return (lex->tok->ttype = TOKEN_FATAL);
+               } else
+                       lex_ungetch(lex, nextch);
+
+               if (!lex_endtoken(lex))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               return (lex->tok->ttype = TOKEN_OPERATOR);
+       }
+
+       if (ch == '^' || ch == '~' || ch == '!')
+       {
+               if (!lex_tokench(lex, ch) ||
+                       !lex_endtoken(lex))
+               {
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               }
+               return (lex->tok->ttype = TOKEN_OPERATOR);
+       }
+
+       if (ch == '*' || ch == '/') /* *=, /= */
+       {
+               if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+
+               nextch = lex_getch(lex);
+               if (nextch == '=') {
+                       if (!lex_tokench(lex, nextch))
+                               return (lex->tok->ttype = TOKEN_FATAL);
+               } else
+                       lex_ungetch(lex, nextch);
+
+               if (!lex_endtoken(lex))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               return (lex->tok->ttype = TOKEN_OPERATOR);
+       }
+
+       if (isident_start(ch))
+       {
+               const char *v;
+               if (!lex_tokench(lex, ch))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               if (!lex_finish_ident(lex)) {
+                       /* error? */
+                       return (lex->tok->ttype = TOKEN_ERROR);
+               }
+               if (!lex_endtoken(lex))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               lex->tok->ttype = TOKEN_IDENT;
+
+               v = lex->tok->value;
+               if (!strcmp(v, "void")) {
+                       lex->tok->ttype = TOKEN_TYPENAME;
+                   lex->tok->constval.t = TYPE_VOID;
+               } else if (!strcmp(v, "int")) {
+                       lex->tok->ttype = TOKEN_TYPENAME;
+                   lex->tok->constval.t = TYPE_INTEGER;
+               } else if (!strcmp(v, "float")) {
+                       lex->tok->ttype = TOKEN_TYPENAME;
+                   lex->tok->constval.t = TYPE_FLOAT;
+               } else if (!strcmp(v, "string")) {
+                       lex->tok->ttype = TOKEN_TYPENAME;
+                   lex->tok->constval.t = TYPE_STRING;
+               } else if (!strcmp(v, "entity")) {
+                       lex->tok->ttype = TOKEN_TYPENAME;
+                   lex->tok->constval.t = TYPE_ENTITY;
+               } else if (!strcmp(v, "vector")) {
+                       lex->tok->ttype = TOKEN_TYPENAME;
+                   lex->tok->constval.t = TYPE_VECTOR;
+               } else if (!strcmp(v, "for")  ||
+                        !strcmp(v, "while")  ||
+                        !strcmp(v, "do")     ||
+                        !strcmp(v, "var")    ||
+                        !strcmp(v, "return") ||
+                        !strcmp(v, "const"))
+                       lex->tok->ttype = TOKEN_KEYWORD;
+
+               return lex->tok->ttype;
+       }
+
+       if (ch == '"')
+       {
+               lex->tok->ttype = lex_finish_string(lex, '"');
+               while (lex->tok->ttype == TOKEN_STRINGCONST)
+               {
+                       /* Allow c style "string" "continuation" */
+                       ch = lex_skipwhite(lex);
+                       if (ch != '"') {
+                               lex_ungetch(lex, ch);
+                               break;
+                       }
+
+                       lex->tok->ttype = lex_finish_string(lex, '"');
+               }
+               if (!lex_endtoken(lex))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               return lex->tok->ttype;
+       }
+
+       if (ch == '\'')
+       {
+               /* we parse character constants like string,
+                * but return TOKEN_CHARCONST, or a vector type if it fits...
+                * Likewise actual unescaping has to be done by the parser.
+                * The difference is we don't allow 'char' 'continuation'.
+                */
+                lex->tok->ttype = lex_finish_string(lex, '\'');
+                if (!lex_endtoken(lex))
+                        return (lex->tok->ttype = TOKEN_FATAL);
+
+                /* It's a vector if we can successfully scan 3 floats */
+                if (sscanf(lex->tok->value, " %f %f %f ", &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3)
+                {
+                        lex->tok->ttype = TOKEN_VECTORCONST;
+                }
+
+                return lex->tok->ttype;
+       }
+
+       if (isdigit(ch))
+       {
+               lex->tok->ttype = lex_finish_digit(lex, ch);
+               if (!lex_endtoken(lex))
+                       return (lex->tok->ttype = TOKEN_FATAL);
+               return lex->tok->ttype;
+       }
+
+       lexerror(lex, "unknown token");
+       return (lex->tok->ttype = TOKEN_ERROR);
+}
diff --git a/lexer.h b/lexer.h

new file mode 100644 (file)

index 0000000..ae8812b
--- /dev/null
+++ b/lexer.h
@@ -0,0 +1,216 @@
+#ifndef GMQCC_LEXER_HDR_
+#define GMQCC_LEXER_HDR_
+
+typedef struct token_s token;
+
+#include "ast.h"
+
+struct token_s {
+       int ttype;
+
+       MEM_VECTOR_MAKE(char, value);
+
+       union {
+               vector v;
+               int    i;
+               double f;
+               int    t; /* type */
+       } constval;
+
+       struct token_s *next;
+       struct token_s *prev;
+
+       lex_ctx ctx;
+};
+
+token* token_new();
+void   token_delete(token*);
+token* token_copy(const token *cp);
+void   token_delete_all(token *t);
+token* token_copy_all(const token *cp);
+
+/* Lexer
+ *
+ */
+enum {
+    /* Other tokens which we can return: */
+    TOKEN_NONE = 0,
+    TOKEN_START = 128,
+
+    TOKEN_IDENT,
+
+    TOKEN_TYPENAME,
+
+    TOKEN_OPERATOR,
+
+    TOKEN_KEYWORD, /* loop */
+
+    TOKEN_STRINGCONST, /* not the typename but an actual "string" */
+    TOKEN_CHARCONST,
+    TOKEN_VECTORCONST,
+    TOKEN_INTCONST,
+    TOKEN_FLOATCONST,
+
+    TOKEN_EOF,
+
+    /* We use '< TOKEN_ERROR', so TOKEN_FATAL must come after it and any
+     * other error related tokens as well
+     */
+    TOKEN_ERROR,
+    TOKEN_FATAL /* internal error, eg out of memory */
+};
+
+static const char *_tokennames[] = {
+    "TOKEN_START",
+    "TOKEN_IDENT",
+    "TOKEN_TYPENAME",
+    "TOKEN_OPERATOR",
+    "TOKEN_KEYWORD",
+    "TOKEN_STRINGCONST",
+    "TOKEN_CHARCONST",
+    "TOKEN_VECTORCONST",
+    "TOKEN_INTCONST",
+    "TOKEN_FLOATCONST",
+    "TOKEN_EOF",
+    "TOKEN_ERROR",
+    "TOKEN_FATAL",
+};
+typedef int
+_all_tokennames_added_[
+       ((TOKEN_FATAL - TOKEN_START + 1) ==
+        (sizeof(_tokennames)/sizeof(_tokennames[0])))
+       ? 1 : -1];
+
+typedef struct {
+       FILE   *file;
+       char   *name;
+       size_t  line;
+       size_t  sline; /* line at the start of a token */
+
+       char    peek[256];
+       size_t  peekpos;
+
+       token  *tok;
+
+       struct {
+           bool noops;
+       } flags;
+} lex_file;
+
+MEM_VECTOR_PROTO(lex_file, char, token);
+
+lex_file* lex_open (const char *file);
+void      lex_close(lex_file   *lex);
+int       lex_do   (lex_file   *lex);
+
+/* Parser
+ *
+ */
+
+enum {
+    ASSOC_LEFT,
+    ASSOC_RIGHT
+};
+
+#define OP_SUFFIX 1
+#define OP_PREFIX 2
+
+typedef struct {
+    const char   *op;
+    unsigned int operands;
+    unsigned int id;
+    unsigned int assoc;
+    unsigned int prec;
+    unsigned int flags;
+} oper_info;
+
+#define opid1(a) (a)
+#define opid2(a,b) ((a<<8)|b)
+#define opid3(a,b,c) ((a<<16)|(b<<8)|c)
+
+static const oper_info operators[] = {
+    { "++",  1, opid3('S','+','+'), ASSOC_LEFT,  16, OP_SUFFIX},
+    { "--",  1, opid3('S','-','-'), ASSOC_LEFT,  16, OP_SUFFIX},
+
+    { ".",   2, opid1('.'),         ASSOC_LEFT,  15, 0 },
+
+    { "!",   1, opid2('!', 'P'),    ASSOC_RIGHT, 14, 0 },
+    { "~",   1, opid2('~', 'P'),    ASSOC_RIGHT, 14, 0 },
+    { "+",   1, opid2('+','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+    { "-",   1, opid2('-','P'),     ASSOC_RIGHT, 14, OP_PREFIX },
+    { "++",  1, opid3('+','+','P'), ASSOC_RIGHT, 14, OP_PREFIX },
+    { "--",  1, opid3('-','-','P'), ASSOC_RIGHT, 14, OP_PREFIX },
+/*  { "&",   1, opid2('&','P'),     ASSOC_RIGHT, 14, OP_PREFIX }, */
+
+    { "*",   2, opid1('*'),         ASSOC_LEFT,  13, 0 },
+    { "/",   2, opid1('/'),         ASSOC_LEFT,  13, 0 },
+    { "%",   2, opid1('%'),         ASSOC_LEFT,  13, 0 },
+
+    { "+",   2, opid1('+'),         ASSOC_LEFT,  12, 0 },
+    { "-",   2, opid1('-'),         ASSOC_LEFT,  12, 0 },
+
+    { "<<",  2, opid2('<','<'),     ASSOC_LEFT,  11, 0 },
+    { ">>",  2, opid2('>','>'),     ASSOC_LEFT,  11, 0 },
+
+    { "<",   2, opid1('<'),         ASSOC_LEFT,  10, 0 },
+    { ">",   2, opid1('>'),         ASSOC_LEFT,  10, 0 },
+    { "<=",  2, opid2('<','='),     ASSOC_LEFT,  10, 0 },
+    { ">=",  2, opid2('>','='),     ASSOC_LEFT,  10, 0 },
+
+    { "==",  2, opid2('=','='),     ASSOC_LEFT,  9,  0 },
+    { "!=",  2, opid2('!','='),     ASSOC_LEFT,  9,  0 },
+
+    { "&",   2, opid1('&'),         ASSOC_LEFT,  8,  0 },
+
+    { "^",   2, opid1('^'),         ASSOC_LEFT,  7,  0 },
+
+    { "|",   2, opid1('|'),         ASSOC_LEFT,  6,  0 },
+
+    { "&&",  2, opid2('&','&'),     ASSOC_LEFT,  5,  0 },
+
+    { "||",  2, opid2('|','|'),     ASSOC_LEFT,  4,  0 },
+
+    { "?",   3, opid2('?',':'),     ASSOC_RIGHT, 3,  0 },
+
+    { "=",   2, opid1('='),         ASSOC_RIGHT, 2,  0 },
+    { "+=",  2, opid2('+','='),     ASSOC_RIGHT, 2,  0 },
+    { "-=",  2, opid2('-','='),     ASSOC_RIGHT, 2,  0 },
+    { "*=",  2, opid2('*','='),     ASSOC_RIGHT, 2,  0 },
+    { "/=",  2, opid2('/','='),     ASSOC_RIGHT, 2,  0 },
+    { "%=",  2, opid2('%','='),     ASSOC_RIGHT, 2,  0 },
+    { ">>=", 2, opid3('>','>','='), ASSOC_RIGHT, 2,  0 },
+    { "<<=", 2, opid3('<','<','='), ASSOC_RIGHT, 2,  0 },
+    { "&=",  2, opid2('&','='),     ASSOC_RIGHT, 2,  0 },
+    { "^=",  2, opid2('^','='),     ASSOC_RIGHT, 2,  0 },
+    { "|=",  2, opid2('|','='),     ASSOC_RIGHT, 2,  0 },
+
+    { ",",   2, opid1(','),         ASSOC_LEFT,  1,  0 }
+};
+static const size_t operator_count = (sizeof(operators) / sizeof(operators[0]));
+
+typedef struct
+{
+       lex_file *lex;
+       int      error;
+       lex_ctx  ctx;
+
+       token    *tokens;
+       token    *lastok;
+
+       token    *tok; /* current token */
+
+       MEM_VECTOR_MAKE(ast_value*, globals);
+} parse_file;
+
+MEM_VECTOR_PROTO(parse_file, ast_value*, globals);
+
+parse_file* parse_open(const char *file);
+void        parse_file_close(parse_file*);
+
+bool        parse(parse_file*);
+
+bool        parse_iskey(parse_file *self, const char *ident);
+
+void lexerror(lex_file*, const char *fmt, ...);
+
+#endif
diff --git a/main.c b/main.c

index 6b751e3e03e7048788167dbd2bf4764c65e8505d..bef80723632b9176167b9cf3d4ee939aecca124e 100644 (file)
--- a/main.c
+++ b/main.c
@@ -308,10 +308,34 @@ static bool options_parse(int argc, char **argv) {
      return true;
  }
  
+static void options_set(uint32_t *flags, size_t idx, bool on)
+{
+    longbit lb = LONGBIT(idx);
+#if 0
+    if (on)
+        flags[lb.idx] |= (1<<(lb.bit));
+    else
+        flags[lb.idx] &= ~(1<<(lb.bit));
+#else
+    if (on)
+        flags[0] |= (1<<(lb));
+    else
+        flags[0] &= ~(1<<(lb));
+#endif
+}
+
+bool parser_init();
+bool parser_compile(const char *filename);
+bool parser_finish(const char *output);
+void parser_cleanup();
+
  int main(int argc, char **argv) {
      size_t itr;
      app_name = argv[0];
  
+    /* default options / warn flags */
+    options_set(opts_warn, WARN_UNKNOWN_CONTROL_SEQUENCE, true);
+
      if (!options_parse(argc, argv)) {
          return usage();
      }
@@ -326,6 +350,13 @@ int main(int argc, char **argv) {
      printf("optimization level = %i\n", (int)opts_O);
      printf("standard = %i\n", opts_standard);
  
+    if (!parser_init()) {
+        printf("failed to initialize parser\n");
+        goto cleanup;
+    }
+
+    util_debug("COM", "starting ...\n");
+
      if (items_elements) {
          printf("Mode: manual\n");
          printf("There are %lu items to compile:\n", (unsigned long)items_elements);
@@ -336,17 +367,23 @@ int main(int argc, char **argv) {
                       (items_data[itr].type == TYPE_ASM ? "asm" :
                       (items_data[itr].type == TYPE_SRC ? "progs.src" :
                       ("unknown"))))));
+
+            if (!parser_compile(items_data[itr].filename))
+                goto cleanup;
          }
+
+        parser_finish(opts_output);
      } else {
-        printf("Mode: progs.src\n");
+        printf("Mode: progs.src - not implemented\n");
      }
  
-    util_debug("COM", "starting ...\n");
-
      /* stuff */
  
+cleanup:
      util_debug("COM", "cleaning ...\n");
  
+    parser_cleanup();
+
      util_meminfo();
      return 0;
  }
diff --git a/parser.c b/parser.c

new file mode 100644 (file)

index 0000000..bb85edf
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,1192 @@
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "gmqcc.h"
+#include "lexer.h"
+
+typedef struct {
+    lex_file *lex;
+    int      tok;
+
+    MEM_VECTOR_MAKE(ast_value*, globals);
+    MEM_VECTOR_MAKE(ast_function*, functions);
+    MEM_VECTOR_MAKE(ast_value*, imm_float);
+    MEM_VECTOR_MAKE(ast_value*, imm_string);
+    MEM_VECTOR_MAKE(ast_value*, imm_vector);
+
+    ast_function *function;
+    MEM_VECTOR_MAKE(ast_value*, locals);
+    size_t blocklocal;
+
+    size_t errors;
+} parser_t;
+
+MEM_VEC_FUNCTIONS(parser_t, ast_value*, globals)
+MEM_VEC_FUNCTIONS(parser_t, ast_value*, imm_float)
+MEM_VEC_FUNCTIONS(parser_t, ast_value*, imm_string)
+MEM_VEC_FUNCTIONS(parser_t, ast_value*, imm_vector)
+MEM_VEC_FUNCTIONS(parser_t, ast_value*, locals)
+MEM_VEC_FUNCTIONS(parser_t, ast_function*, functions)
+
+void parseerror(parser_t *parser, const char *fmt, ...)
+{
+       va_list ap;
+
+       parser->errors++;
+
+    if (parser)
+           printf("error %s:%lu: ", parser->lex->tok->ctx.file, (unsigned long)parser->lex->tok->ctx.line);
+       else
+           printf("error: ");
+
+       va_start(ap, fmt);
+       vprintf(fmt, ap);
+       va_end(ap);
+
+       printf("\n");
+}
+
+bool parser_next(parser_t *parser)
+{
+    /* lex_do kills the previous token */
+    parser->tok = lex_do(parser->lex);
+    if (parser->tok == TOKEN_EOF || parser->tok >= TOKEN_ERROR)
+        return false;
+    return true;
+}
+
+/* lift a token out of the parser so it's not destroyed by parser_next */
+token *parser_lift(parser_t *parser)
+{
+    token *tok = parser->lex->tok;
+    parser->lex->tok = NULL;
+    return tok;
+}
+
+#define parser_tokval(p) (p->lex->tok->value)
+#define parser_token(p)  (p->lex->tok)
+#define parser_ctx(p)    (p->lex->tok->ctx)
+
+ast_value* parser_const_float(parser_t *parser, double d)
+{
+    size_t i;
+    ast_value *out;
+    for (i = 0; i < parser->imm_float_count; ++i) {
+        if (parser->imm_float[i]->constval.vfloat == d)
+            return parser->imm_float[i];
+    }
+    out = ast_value_new(parser_ctx(parser), "#IMMEDIATE", TYPE_FLOAT);
+    out->isconst = true;
+    out->constval.vfloat = d;
+    if (!parser_t_imm_float_add(parser, out)) {
+        ast_value_delete(out);
+        return NULL;
+    }
+    return out;
+}
+
+ast_value* parser_const_string(parser_t *parser, const char *str)
+{
+    size_t i;
+    ast_value *out;
+    for (i = 0; i < parser->imm_string_count; ++i) {
+        if (!strcmp(parser->imm_string[i]->constval.vstring, str))
+            return parser->imm_string[i];
+    }
+    out = ast_value_new(parser_ctx(parser), "#IMMEDIATE", TYPE_STRING);
+    out->isconst = true;
+    out->constval.vstring = util_strdup(str);
+    if (!parser_t_imm_string_add(parser, out)) {
+        ast_value_delete(out);
+        return NULL;
+    }
+    return out;
+}
+
+ast_value* parser_const_vector(parser_t *parser, vector v)
+{
+    size_t i;
+    ast_value *out;
+    for (i = 0; i < parser->imm_vector_count; ++i) {
+        if (!memcmp(&parser->imm_vector[i]->constval.vvec, &v, sizeof(v)))
+            return parser->imm_vector[i];
+    }
+    out = ast_value_new(parser_ctx(parser), "#IMMEDIATE", TYPE_VECTOR);
+    out->isconst = true;
+    out->constval.vvec = v;
+    if (!parser_t_imm_vector_add(parser, out)) {
+        ast_value_delete(out);
+        return NULL;
+    }
+    return out;
+}
+
+ast_value* parser_find_global(parser_t *parser, const char *name)
+{
+    size_t i;
+    for (i = 0; i < parser->globals_count; ++i) {
+        if (!strcmp(parser->globals[i]->name, name))
+            return parser->globals[i];
+    }
+    return NULL;
+}
+
+ast_value* parser_find_local(parser_t *parser, const char *name, size_t upto)
+{
+    size_t i;
+    ast_value *fun;
+    for (i = parser->locals_count; i > upto;) {
+        --i;
+        if (!strcmp(parser->locals[i]->name, name))
+            return parser->locals[i];
+    }
+    fun = parser->function->vtype;
+    for (i = 0; i < fun->expression.params_count; ++i) {
+        if (!strcmp(fun->expression.params[i]->name, name))
+            return fun->expression.params[i];
+    }
+    return NULL;
+}
+
+ast_value* parser_find_var(parser_t *parser, const char *name)
+{
+    ast_value *v;
+    v         = parser_find_local(parser, name, 0);
+    if (!v) v = parser_find_global(parser, name);
+    return v;
+}
+
+typedef struct {
+    MEM_VECTOR_MAKE(ast_value*, p);
+} paramlist_t;
+MEM_VEC_FUNCTIONS(paramlist_t, ast_value*, p)
+
+static ast_value *parser_parse_type(parser_t *parser, int basetype, bool *isfunc)
+{
+    paramlist_t params;
+    ast_value *var;
+    lex_ctx   ctx = parser_ctx(parser);
+    int vtype = basetype;
+    int temptype;
+    size_t i;
+
+    MEM_VECTOR_INIT(&params, p);
+
+    *isfunc = false;
+
+    if (parser->tok == '(') {
+        *isfunc = true;
+        while (true) {
+            ast_value *param;
+            bool dummy;
+
+            if (!parser_next(parser))
+                goto on_error;
+
+            if (parser->tok == ')')
+                break;
+
+            temptype = parser_token(parser)->constval.t;
+            if (!parser_next(parser))
+                goto on_error;
+
+            param = parser_parse_type(parser, temptype, &dummy);
+            (void)dummy;
+
+            if (!param)
+                goto on_error;
+
+            if (parser->tok == TOKEN_IDENT) {
+                /* named parameter */
+                if (!ast_value_set_name(param, parser_tokval(parser)))
+                    goto on_error;
+                if (!parser_next(parser))
+                    goto on_error;
+            }
+
+            if (!paramlist_t_p_add(&params, param)) {
+                parseerror(parser, "Out of memory while parsing typename");
+                goto on_error;
+            }
+
+            if (parser->tok == ',')
+                continue;
+            if (parser->tok == ')')
+                break;
+            parseerror(parser, "Unexpected token");
+            goto on_error;
+        }
+        if (!parser_next(parser))
+            goto on_error;
+    }
+
+    var = ast_value_new(ctx, "<unnamed>", vtype);
+    if (!var)
+        goto on_error;
+    MEM_VECTOR_MOVE(&params, p, &var->expression, params);
+    return var;
+on_error:
+    for (i = 0; i < params.p_count; ++i)
+        ast_value_delete(params.p[i]);
+    MEM_VECTOR_CLEAR(&params, p);
+    return NULL;
+}
+
+typedef struct
+{
+    size_t etype; /* 0 = expression, others are operators */
+    int             paren;
+    size_t          off;
+    ast_expression *out;
+    ast_block      *block; /* for commas and function calls */
+    lex_ctx ctx;
+} sy_elem;
+typedef struct
+{
+    MEM_VECTOR_MAKE(sy_elem, out);
+    MEM_VECTOR_MAKE(sy_elem, ops);
+} shunt;
+MEM_VEC_FUNCTIONS(shunt, sy_elem, out)
+MEM_VEC_FUNCTIONS(shunt, sy_elem, ops)
+
+static sy_elem syexp(lex_ctx ctx, ast_expression *v) {
+    sy_elem e;
+    e.etype = 0;
+    e.out   = v;
+    e.block = NULL;
+    e.ctx   = ctx;
+    e.paren = 0;
+    return e;
+}
+
+static sy_elem syblock(lex_ctx ctx, ast_block *v) {
+    sy_elem e;
+    e.etype = 0;
+    e.out   = (ast_expression*)v;
+    e.block = v;
+    e.ctx   = ctx;
+    e.paren = 0;
+    return e;
+}
+
+static sy_elem syop(lex_ctx ctx, const oper_info *op) {
+    sy_elem e;
+    e.etype = 1 + (op - operators);
+    e.out   = NULL;
+    e.block = NULL;
+    e.ctx   = ctx;
+    e.paren = 0;
+    return e;
+}
+
+static sy_elem syparen(lex_ctx ctx, int p, size_t off) {
+    sy_elem e;
+    e.etype = 0;
+    e.off   = off;
+    e.out   = NULL;
+    e.block = NULL;
+    e.ctx   = ctx;
+    e.paren = p;
+    return e;
+}
+
+static bool parser_sy_pop(parser_t *parser, shunt *sy)
+{
+    const oper_info *op;
+    lex_ctx ctx;
+    ast_expression *out = NULL;
+    ast_expression *exprs[3];
+    ast_block      *blocks[3];
+    size_t i;
+
+    if (!sy->ops_count) {
+        parseerror(parser, "internal error: missing operator");
+        return false;
+    }
+
+    if (sy->ops[sy->ops_count-1].paren) {
+        parseerror(parser, "unmatched parenthesis");
+        return false;
+    }
+
+    op = &operators[sy->ops[sy->ops_count-1].etype - 1];
+    ctx = sy->ops[sy->ops_count-1].ctx;
+
+    if (sy->out_count < op->operands) {
+        parseerror(parser, "internal error: not enough operands: %i", sy->out_count);
+        return false;
+    }
+
+    sy->ops_count--;
+
+    sy->out_count -= op->operands;
+    for (i = 0; i < op->operands; ++i) {
+        exprs[i]  = sy->out[sy->out_count+i].out;
+        blocks[i] = sy->out[sy->out_count+i].block;
+    }
+
+    if (blocks[0] && !blocks[0]->exprs_count && op->id != opid1(',')) {
+        parseerror(parser, "internal error: operator cannot be applied on empty blocks");
+        return false;
+    }
+
+    switch (op->id)
+    {
+        default:
+            parseerror(parser, "internal error: unhandled operand");
+            return false;
+
+        case opid1(','):
+            if (blocks[0]) {
+                if (!ast_block_exprs_add(blocks[0], exprs[1]))
+                    return false;
+            } else {
+                blocks[0] = ast_block_new(ctx);
+                if (!ast_block_exprs_add(blocks[0], exprs[0]) ||
+                    !ast_block_exprs_add(blocks[0], exprs[1]))
+                {
+                    return false;
+                }
+            }
+            if (!ast_block_set_type(blocks[0], exprs[1]))
+                return false;
+
+            sy->out[sy->out_count++] = syblock(ctx, blocks[0]);
+            return true;
+
+        case opid1('+'):
+            if (exprs[0]->expression.vtype != exprs[1]->expression.vtype) {
+                parseerror(parser, "Cannot add type %s and %s",
+                           type_name[exprs[0]->expression.vtype],
+                           type_name[exprs[1]->expression.vtype]);
+                return false;
+            }
+            switch (exprs[0]->expression.vtype) {
+                case TYPE_FLOAT:
+                    out = (ast_expression*)ast_binary_new(ctx, INSTR_ADD_F, exprs[0], exprs[1]);
+                    break;
+                case TYPE_VECTOR:
+                    out = (ast_expression*)ast_binary_new(ctx, INSTR_ADD_V, exprs[0], exprs[1]);
+                    break;
+                default:
+                    parseerror(parser, "Cannot add type %s and %s",
+                               type_name[exprs[0]->expression.vtype],
+                               type_name[exprs[1]->expression.vtype]);
+                    return false;
+            };
+            break;
+        case opid1('-'):
+            if (exprs[0]->expression.vtype != exprs[1]->expression.vtype) {
+                parseerror(parser, "Cannot subtract type %s from %s",
+                           type_name[exprs[1]->expression.vtype],
+                           type_name[exprs[0]->expression.vtype]);
+                return false;
+            }
+            switch (exprs[0]->expression.vtype) {
+                case TYPE_FLOAT:
+                    out = (ast_expression*)ast_binary_new(ctx, INSTR_SUB_F, exprs[0], exprs[1]);
+                    break;
+                case TYPE_VECTOR:
+                    out = (ast_expression*)ast_binary_new(ctx, INSTR_SUB_V, exprs[0], exprs[1]);
+                    break;
+                default:
+                    parseerror(parser, "Cannot add type %s from %s",
+                               type_name[exprs[1]->expression.vtype],
+                               type_name[exprs[0]->expression.vtype]);
+                    return false;
+            };
+            break;
+        case opid1('*'):
+            if (exprs[0]->expression.vtype != exprs[1]->expression.vtype &&
+                exprs[0]->expression.vtype != TYPE_VECTOR &&
+                exprs[0]->expression.vtype != TYPE_FLOAT &&
+                exprs[1]->expression.vtype != TYPE_VECTOR &&
+                exprs[1]->expression.vtype != TYPE_FLOAT)
+            {
+                parseerror(parser, "Cannot multiply type %s from %s",
+                           type_name[exprs[1]->expression.vtype],
+                           type_name[exprs[0]->expression.vtype]);
+                return false;
+            }
+            switch (exprs[0]->expression.vtype) {
+                case TYPE_FLOAT:
+                    if (exprs[1]->expression.vtype == TYPE_VECTOR)
+                        out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_FV, exprs[0], exprs[1]);
+                    else
+                        out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_F, exprs[0], exprs[1]);
+                    break;
+                case TYPE_VECTOR:
+                    if (exprs[1]->expression.vtype == TYPE_FLOAT)
+                        out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_VF, exprs[0], exprs[1]);
+                    else
+                        out = (ast_expression*)ast_binary_new(ctx, INSTR_MUL_V, exprs[0], exprs[1]);
+                    break;
+                default:
+                    parseerror(parser, "Cannot add type %s from %s",
+                               type_name[exprs[1]->expression.vtype],
+                               type_name[exprs[0]->expression.vtype]);
+                    return false;
+            };
+            break;
+        case opid1('/'):
+            if (exprs[0]->expression.vtype != exprs[1]->expression.vtype ||
+                exprs[0]->expression.vtype != TYPE_FLOAT)
+            {
+                parseerror(parser, "Cannot divide types %s and %s",
+                           type_name[exprs[0]->expression.vtype],
+                           type_name[exprs[1]->expression.vtype]);
+                return false;
+            }
+            out = (ast_expression*)ast_binary_new(ctx, INSTR_DIV_F, exprs[0], exprs[1]);
+            break;
+
+
+        case opid1('='):
+            out = (ast_expression*)ast_store_new(ctx,
+                                                 type_store_instr[exprs[0]->expression.vtype],
+                                                 exprs[0], exprs[1]);
+            break;
+    }
+
+    if (!out) {
+        parseerror(parser, "failed to apply operand %s", op->op);
+        return false;
+    }
+
+    sy->out[sy->out_count++] = syexp(ctx, out);
+    return true;
+}
+
+static bool parser_close_call(parser_t *parser, shunt *sy)
+{
+    /* was a function call */
+    ast_expression *fun;
+    ast_call       *call;
+
+    size_t          fid;
+    size_t          paramcount;
+
+    sy->ops_count--;
+    fid = sy->ops[sy->ops_count].off;
+
+    /* out[fid] is the function
+     * everything above is parameters...
+     * 0 params = nothing
+     * 1 params = ast_expression
+     * more = ast_block
+     */
+
+    if (sy->out_count < 1 || sy->out_count <= fid) {
+        parseerror(parser, "internal error: function call needs function and parameter list...");
+        return false;
+    }
+
+    fun = sy->out[fid].out;
+
+    call = ast_call_new(sy->ops[sy->ops_count].ctx, fun);
+    if (!call) {
+        parseerror(parser, "out of memory");
+        return false;
+    }
+
+    if (fid+1 == sy->out_count) {
+        /* no arguments */
+        paramcount = 0;
+    } else if (fid+2 == sy->out_count) {
+        ast_block *params;
+        sy->out_count--;
+        params = sy->out[sy->out_count].block;
+        if (!params) {
+            /* 1 param */
+            paramcount = 1;
+            if (!ast_call_params_add(call, sy->out[sy->out_count].out)) {
+                ast_delete(sy->out[sy->out_count].out);
+                parseerror(parser, "out of memory");
+                return false;
+            }
+        } else {
+            paramcount = params->exprs_count;
+            MEM_VECTOR_MOVE(params, exprs, call, params);
+            ast_delete(params);
+        }
+    } else {
+        parseerror(parser, "invalid function call");
+        return false;
+    }
+
+    /* overwrite fid, the function, with a call */
+    sy->out[fid] = syexp(call->expression.node.context, (ast_expression*)call);
+
+    if (fun->expression.vtype != TYPE_FUNCTION) {
+        parseerror(parser, "not a function");
+        return false;
+    }
+
+    if (!fun->expression.next) {
+        parseerror(parser, "could not determine function return type");
+        return false;
+    } else {
+        if (fun->expression.params_count != paramcount) {
+            parseerror(parser, "expected %i parameters, got %i", (int)fun->expression.params_count, paramcount);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static bool parser_close_paren(parser_t *parser, shunt *sy, bool functions_only)
+{
+    if (!sy->ops_count) {
+        parseerror(parser, "unmatched closing paren");
+        return false;
+    }
+    if (sy->ops[sy->ops_count-1].paren == 1) {
+        parseerror(parser, "empty parenthesis expression");
+        return false;
+    }
+    while (sy->ops_count) {
+        if (sy->ops[sy->ops_count-1].paren == 'f') {
+            if (!parser_close_call(parser, sy))
+                return false;
+            break;
+        }
+        if (sy->ops[sy->ops_count-1].paren == 1) {
+            sy->ops_count--;
+            return !functions_only;
+        }
+        if (!parser_sy_pop(parser, sy))
+            return false;
+    }
+    return true;
+}
+
+static ast_expression* parser_expression(parser_t *parser)
+{
+    ast_expression *expr = NULL;
+    shunt sy;
+    bool wantop = false;
+
+    MEM_VECTOR_INIT(&sy, out);
+    MEM_VECTOR_INIT(&sy, ops);
+
+    while (true)
+    {
+        if (!wantop)
+        {
+            bool nextwant = true;
+            if (parser->tok == TOKEN_IDENT)
+            {
+                /* variable */
+                ast_value *var = parser_find_var(parser, parser_tokval(parser));
+                if (!var) {
+                    parseerror(parser, "unexpected ident: %s", parser_tokval(parser));
+                    goto onerr;
+                }
+                if (!shunt_out_add(&sy, syexp(parser_ctx(parser), (ast_expression*)var))) {
+                    parseerror(parser, "out of memory");
+                    goto onerr;
+                }
+            }
+            else if (parser->tok == TOKEN_FLOATCONST) {
+                ast_value *val = parser_const_float(parser, (parser_token(parser)->constval.f));
+                if (!val)
+                    return false;
+                if (!shunt_out_add(&sy, syexp(parser_ctx(parser), (ast_expression*)val))) {
+                    parseerror(parser, "out of memory");
+                    goto onerr;
+                }
+            }
+            else if (parser->tok == TOKEN_INTCONST) {
+                ast_value *val = parser_const_float(parser, (double)(parser_token(parser)->constval.i));
+                if (!val)
+                    return false;
+                if (!shunt_out_add(&sy, syexp(parser_ctx(parser), (ast_expression*)val))) {
+                    parseerror(parser, "out of memory");
+                    goto onerr;
+                }
+            }
+            else if (parser->tok == TOKEN_STRINGCONST) {
+                ast_value *val = parser_const_string(parser, parser_tokval(parser));
+                if (!val)
+                    return false;
+                if (!shunt_out_add(&sy, syexp(parser_ctx(parser), (ast_expression*)val))) {
+                    parseerror(parser, "out of memory");
+                    goto onerr;
+                }
+            }
+            else if (parser->tok == TOKEN_VECTORCONST) {
+                ast_value *val = parser_const_vector(parser, parser_token(parser)->constval.v);
+                if (!val)
+                    return false;
+                if (!shunt_out_add(&sy, syexp(parser_ctx(parser), (ast_expression*)val))) {
+                    parseerror(parser, "out of memory");
+                    goto onerr;
+                }
+            }
+            else if (parser->tok == '(') {
+                nextwant = false; /* not expecting an operator next */
+                if (!shunt_ops_add(&sy, syparen(parser_ctx(parser), 1, 0))) {
+                    parseerror(parser, "out of memory");
+                    goto onerr;
+                }
+            }
+            else if (parser->tok == ')') {
+                /* allowed for function calls */
+                if (!parser_close_paren(parser, &sy, true))
+                    goto onerr;
+            }
+            else {
+                /* TODO: prefix operators */
+                parseerror(parser, "expected statement");
+                goto onerr;
+            }
+            wantop = nextwant;
+            parser->lex->flags.noops = !wantop;
+        } else {
+            if (parser->tok == '(') {
+                /* we expected an operator, this is the function-call operator */
+                if (!shunt_ops_add(&sy, syparen(parser_ctx(parser), 'f', sy.out_count-1))) {
+                    parseerror(parser, "out of memory");
+                    goto onerr;
+                }
+            }
+            else if (parser->tok == ')') {
+                /* we do expect an operator next */
+                /* closing an opening paren */
+                if (!parser_close_paren(parser, &sy, false))
+                    goto onerr;
+            }
+            else if (parser->tok != TOKEN_OPERATOR) {
+                parseerror(parser, "expected operator or end of statement");
+                goto onerr;
+            }
+            else {
+                /* classify the operator */
+                /* TODO: suffix operators */
+                const oper_info *op;
+                const oper_info *olast = NULL;
+                size_t o;
+                for (o = 0; o < operator_count; ++o) {
+                    if (!(operators[o].flags & OP_PREFIX) &&
+                        !(operators[o].flags & OP_SUFFIX) && /* remove this */
+                        !strcmp(parser_tokval(parser), operators[o].op))
+                    {
+                        break;
+                    }
+                }
+                if (o == operator_count) {
+                    /* no operator found... must be the end of the statement */
+                    break;
+                }
+                /* found an operator */
+                op = &operators[o];
+
+                if (sy.ops_count && !sy.ops[sy.ops_count-1].paren)
+                    olast = &operators[sy.ops[sy.ops_count-1].etype-1];
+
+                while (olast && (
+                        (op->prec < olast->prec) ||
+                        (op->assoc == ASSOC_LEFT && op->prec <= olast->prec) ) )
+                {
+                    if (!parser_sy_pop(parser, &sy))
+                        goto onerr;
+                    if (sy.ops_count && !sy.ops[sy.ops_count-1].paren)
+                        olast = &operators[sy.ops[sy.ops_count-1].etype-1];
+                }
+
+                if (!shunt_ops_add(&sy, syop(parser_ctx(parser), op)))
+                    goto onerr;
+            }
+            wantop = false;
+            parser->lex->flags.noops = true;
+        }
+        if (!parser_next(parser)) {
+            goto onerr;
+        }
+        if (parser->tok == ';') {
+            break;
+        }
+    }
+    if (!parser_next(parser)) {
+        parseerror(parser, "Unexpected end of file");
+        goto onerr;
+    }
+
+    while (sy.ops_count) {
+        if (!parser_sy_pop(parser, &sy))
+            goto onerr;
+    }
+
+    parser->lex->flags.noops = true;
+    if (!sy.out_count) {
+        parseerror(parser, "empty expression");
+        expr = NULL;
+    } else
+        expr = sy.out[0].out;
+    MEM_VECTOR_CLEAR(&sy, out);
+    MEM_VECTOR_CLEAR(&sy, ops);
+    return expr;
+
+onerr:
+    parser->lex->flags.noops = true;
+    MEM_VECTOR_CLEAR(&sy, out);
+    MEM_VECTOR_CLEAR(&sy, ops);
+    return NULL;
+}
+
+static bool parser_variable(parser_t *parser, ast_block *localblock);
+static bool parser_body_do(parser_t *parser, ast_block *block)
+{
+    if (parser->tok == TOKEN_TYPENAME)
+    {
+        /* local variable */
+        if (!parser_variable(parser, block))
+            return false;
+        return true;
+    }
+    else if (parser->tok == TOKEN_KEYWORD)
+    {
+        if (!strcmp(parser_tokval(parser), "return"))
+        {
+            ast_expression *exp = NULL;
+            ast_return     *ret = NULL;
+            ast_value      *expected = parser->function->vtype;
+
+            if (!parser_next(parser)) {
+                parseerror(parser, "expected return expression");
+                return false;
+            }
+
+            if (parser->tok != ';') {
+                exp = parser_expression(parser);
+                if (!exp)
+                    return false;
+
+                if (exp->expression.vtype != expected->expression.next->expression.vtype) {
+                    parseerror(parser, "return with invalid expression");
+                }
+
+                ret = ast_return_new(exp->expression.node.context, exp);
+                if (!ret) {
+                    ast_delete(exp);
+                    return false;
+                }
+
+                if (!ast_block_exprs_add(block, (ast_expression*)ret)) {
+                    ast_delete(ret);
+                    return false;
+                }
+            } else if (!parser_next(parser)) {
+                parseerror(parser, "expected semicolon");
+                if (expected->expression.next->expression.vtype != TYPE_VOID) {
+                    parseerror(parser, "return without value");
+                }
+            }
+            return true;
+        }
+        parseerror(parser, "Unexpected keyword");
+        return false;
+    }
+    else if (parser->tok == '{')
+    {
+        /* a block */
+        parseerror(parser, "TODO: inner blocks: %s", parser_tokval(parser));
+        return false;
+    }
+    else
+    {
+        ast_expression *exp = parser_expression(parser);
+        if (!exp)
+            return false;
+        if (!ast_block_exprs_add(block, exp)) {
+            ast_delete(exp);
+            return false;
+        }
+        return true;
+    }
+}
+
+static ast_block* parser_parse_block(parser_t *parser)
+{
+    size_t oldblocklocal;
+    ast_block *block = NULL;
+
+    oldblocklocal = parser->blocklocal;
+    parser->blocklocal = parser->locals_count;
+
+    if (!parser_next(parser)) { /* skip the '{' */
+        parseerror(parser, "expected function body");
+        goto cleanup;
+    }
+
+    block = ast_block_new(parser_ctx(parser));
+
+    while (parser->tok != TOKEN_EOF && parser->tok < TOKEN_ERROR)
+    {
+        if (parser->tok == '}')
+            break;
+
+        if (!parser_body_do(parser, block)) {
+            ast_block_delete(block);
+            block = NULL;
+            goto cleanup;
+        }
+    }
+
+    if (parser->tok != '}') {
+        ast_block_delete(block);
+        block = NULL;
+    } else {
+        (void)parser_next(parser);
+    }
+
+cleanup:
+    parser->blocklocal = oldblocklocal;
+    return block;
+}
+
+static bool parser_variable(parser_t *parser, ast_block *localblock)
+{
+    bool          isfunc = false;
+    ast_function *func = NULL;
+    lex_ctx       ctx;
+    ast_value    *var;
+
+    int basetype = parser_token(parser)->constval.t;
+
+    while (true)
+    {
+        if (!parser_next(parser)) { /* skip basetype or comma */
+            parseerror(parser, "expected variable declaration");
+            return false;
+        }
+
+        isfunc = false;
+        func = NULL;
+        ctx = parser_ctx(parser);
+        var = parser_parse_type(parser, basetype, &isfunc);
+
+        if (!var)
+            return false;
+
+        if (parser->tok != TOKEN_IDENT) {
+            parseerror(parser, "expected variable name\n");
+            return false;
+        }
+
+        if (!localblock && parser_find_global(parser, parser_tokval(parser))) {
+            ast_value_delete(var);
+            parseerror(parser, "global already exists: %s\n", parser_tokval(parser));
+            return false;
+        }
+
+        if (localblock && parser_find_local(parser, parser_tokval(parser), parser->blocklocal)) {
+            ast_value_delete(var);
+            parseerror(parser, "local variable already exists: %s\n", parser_tokval(parser));
+            return false;
+        }
+
+        if (!ast_value_set_name(var, parser_tokval(parser))) {
+            parseerror(parser, "failed to set variable name\n");
+            ast_value_delete(var);
+            return false;
+        }
+
+        if (isfunc) {
+            /* a function was defined */
+            ast_value *fval;
+
+            /* turn var into a value of TYPE_FUNCTION, with the old var
+             * as return type
+             */
+            fval = ast_value_new(ctx, var->name, TYPE_FUNCTION);
+            func = ast_function_new(ctx, var->name, fval);
+            if (!fval || !func) {
+                ast_value_delete(var);
+                if (fval) ast_value_delete(fval);
+                if (func) ast_function_delete(func);
+                return false;
+            }
+
+            fval->expression.next = (ast_expression*)var;
+            MEM_VECTOR_MOVE(&var->expression, params, &fval->expression, params);
+
+            if (!parser_t_functions_add(parser, func)) {
+                ast_value_delete(var);
+                if (fval) ast_value_delete(fval);
+                if (func) ast_function_delete(func);
+                return false;
+            }
+
+            var = fval;
+        }
+
+        if ( (!localblock && !parser_t_globals_add(parser, var)) ||
+             ( localblock && !parser_t_locals_add(parser, var)) )
+        {
+            ast_value_delete(var);
+            return false;
+        }
+        if (localblock && !ast_block_locals_add(localblock, var))
+        {
+            parser->locals_count--;
+            ast_value_delete(var);
+            return false;
+        }
+
+        if (!parser_next(parser)) {
+            ast_value_delete(var);
+            return false;
+        }
+
+        if (parser->tok == ';') {
+            if (!parser_next(parser))
+                return parser->tok == TOKEN_EOF;
+            return true;
+        }
+
+        if (parser->tok == ',') {
+            /* another var */
+            continue;
+        }
+
+        if (parser->tok != '=') {
+            parseerror(parser, "expected '=' or ';'");
+            return false;
+        }
+
+        if (!parser_next(parser))
+            return false;
+
+        if (parser->tok == '#') {
+            if (localblock) {
+                parseerror(parser, "cannot declare builtins within functions");
+                return false;
+            }
+            if (!isfunc || !func) {
+                parseerror(parser, "unexpected builtin number, '%s' is not a function", var->name);
+                return false;
+            }
+            if (!parser_next(parser)) {
+                parseerror(parser, "expected builtin number");
+                return false;
+            }
+            if (parser->tok != TOKEN_INTCONST) {
+                parseerror(parser, "builtin number must be an integer constant");
+                return false;
+            }
+            if (parser_token(parser)->constval.i <= 0) {
+                parseerror(parser, "builtin number must be positive integer greater than zero");
+                return false;
+            }
+
+            func->builtin = -parser_token(parser)->constval.i;
+        } else if (parser->tok == '{') {
+            /* function body */
+            ast_block *block;
+            ast_function *old = parser->function;
+
+            if (localblock) {
+                parseerror(parser, "cannot declare functions within functions");
+                return false;
+            }
+
+            parser->function = func;
+            block = parser_parse_block(parser);
+            parser->function = old;
+
+            if (!block)
+                return false;
+
+            if (!ast_function_blocks_add(func, block)) {
+                ast_block_delete(block);
+                return false;
+            }
+            return true;
+        } else {
+            parseerror(parser, "TODO, const assignment");
+        }
+
+        if (!parser_next(parser))
+            return false;
+
+        if (parser->tok == ',') {
+            /* another */
+            continue;
+        }
+
+        if (parser->tok != ';') {
+            parseerror(parser, "expected semicolon");
+            return false;
+        }
+
+        (void)parser_next(parser);
+
+        return true;
+    }
+}
+
+static bool parser_do(parser_t *parser)
+{
+    if (parser->tok == TOKEN_TYPENAME)
+    {
+        return parser_variable(parser, NULL);
+    }
+    else if (parser->tok == TOKEN_KEYWORD)
+    {
+        /* handle 'var' and 'const' */
+        return false;
+    }
+    else if (parser->tok == '.')
+    {
+        /* entity-member declaration */
+        return false;
+    }
+    else
+    {
+        parseerror(parser, "unexpected token: %s", parser->lex->tok->value);
+        return false;
+    }
+    return true;
+}
+
+static parser_t *parser;
+
+bool parser_init()
+{
+    parser = (parser_t*)mem_a(sizeof(parser_t));
+    if (!parser)
+        return false;
+
+    memset(parser, 0, sizeof(parser));
+
+    MEM_VECTOR_INIT(parser, globals);
+    MEM_VECTOR_INIT(parser, locals);
+    return true;
+}
+
+bool parser_compile(const char *filename)
+{
+    parser->lex = lex_open(filename);
+    if (!parser->lex) {
+        printf("failed to open file \"%s\"\n", filename);
+        return false;
+    }
+
+    /* initial lexer/parser state */
+    parser->lex->flags.noops = true;
+
+    if (parser_next(parser))
+    {
+        while (parser->tok != TOKEN_EOF && parser->tok < TOKEN_ERROR)
+        {
+            if (!parser_do(parser)) {
+                if (parser->tok == TOKEN_EOF)
+                    parseerror(parser, "unexpected eof");
+                else
+                    parseerror(parser, "parse error\n");
+                lex_close(parser->lex);
+                mem_d(parser);
+                return false;
+            }
+        }
+    }
+
+    lex_close(parser->lex);
+
+    return !parser->errors;
+}
+
+void parser_cleanup()
+{
+    size_t i;
+    for (i = 0; i < parser->functions_count; ++i) {
+        ast_delete(parser->functions[i]);
+    }
+    for (i = 0; i < parser->imm_vector_count; ++i) {
+        ast_delete(parser->imm_vector[i]);
+    }
+    for (i = 0; i < parser->imm_string_count; ++i) {
+        ast_delete(parser->imm_string[i]);
+    }
+    for (i = 0; i < parser->imm_float_count; ++i) {
+        ast_delete(parser->imm_float[i]);
+    }
+    for (i = 0; i < parser->globals_count; ++i) {
+        ast_delete(parser->globals[i]);
+    }
+    MEM_VECTOR_CLEAR(parser, globals);
+
+    mem_d(parser);
+}
+
+bool parser_finish(const char *output)
+{
+    size_t i;
+    ir_builder *ir;
+
+    if (!parser->errors)
+    {
+        ir = ir_builder_new("gmqcc_out");
+        if (!ir) {
+            printf("failed to allocate builder\n");
+            return false;
+        }
+
+        for (i = 0; i < parser->imm_float_count; ++i) {
+            if (!ast_global_codegen(parser->imm_float[i], ir)) {
+                printf("failed to generate global %s\n", parser->imm_float[i]->name);
+                ir_builder_delete(ir);
+                return false;
+            }
+        }
+        for (i = 0; i < parser->imm_string_count; ++i) {
+            if (!ast_global_codegen(parser->imm_string[i], ir)) {
+                printf("failed to generate global %s\n", parser->imm_string[i]->name);
+                ir_builder_delete(ir);
+                return false;
+            }
+        }
+        for (i = 0; i < parser->imm_vector_count; ++i) {
+            if (!ast_global_codegen(parser->imm_vector[i], ir)) {
+                printf("failed to generate global %s\n", parser->imm_vector[i]->name);
+                ir_builder_delete(ir);
+                return false;
+            }
+        }
+        for (i = 0; i < parser->globals_count; ++i) {
+            if (!ast_global_codegen(parser->globals[i], ir)) {
+                printf("failed to generate global %s\n", parser->globals[i]->name);
+                ir_builder_delete(ir);
+                return false;
+            }
+        }
+        for (i = 0; i < parser->functions_count; ++i) {
+            if (!ast_function_codegen(parser->functions[i], ir)) {
+                printf("failed to generate function %s\n", parser->functions[i]->name);
+                ir_builder_delete(ir);
+                return false;
+            }
+            if (!ir_function_finalize(parser->functions[i]->ir_func)) {
+                printf("failed to finalize function %s\n", parser->functions[i]->name);
+                ir_builder_delete(ir);
+                return false;
+            }
+        }
+
+        ir_builder_dump(ir, printf);
+
+        if (!ir_builder_generate(ir, output)) {
+            printf("*** failed to generate output file\n");
+            ir_builder_delete(ir);
+            return false;
+        }
+
+        ir_builder_delete(ir);
+        return true;
+    }
+
+    printf("*** there were compile errors\n");
+    return false;
+}
diff --git a/warns.def b/warns.def

index 6238e504bbc60d703f4e7b5791bcbe04d59037fc..e7ebda70aa6c2d004fada92c806c20a8599f3189 100644 (file)
--- a/warns.def
+++ b/warns.def
@@ -3,3 +3,4 @@
  #endif
  
  GMQCC_DEFINE_FLAG(UNUSED_VARIABLE)
+GMQCC_DEFINE_FLAG(UNKNOWN_CONTROL_SEQUENCE)
author	Wolfgang (Blub) Bumiller <blub@speed.at>
	Fri, 10 Aug 2012 18:48:42 +0000 (20:48 +0200)
committer	Wolfgang (Blub) Bumiller <blub@speed.at>
	Fri, 10 Aug 2012 18:48:42 +0000 (20:48 +0200)
Makefile		patch \| blob \| history
ast.c		patch \| blob \| history
ast.h		patch \| blob \| history
gmqcc.h		patch \| blob \| history
ir.c		patch \| blob \| history
ir.h		patch \| blob \| history
lexer.c	[new file with mode: 0644]	patch \| blob
lexer.h	[new file with mode: 0644]	patch \| blob
main.c		patch \| blob \| history
parser.c	[new file with mode: 0644]	patch \| blob
warns.def		patch \| blob \| history