Merge branch 'master' into ast-and-ir

[xonotic/gmqcc.git] / lex.c
diff --git a/lex.c b/lex.c

index 48d01c49db4adee40308f55d5a11172e166e7436..fa09f27042391d7d7fd8d013331a8e3192f20a34 100644 (file)
--- a/lex.c
+++ b/lex.c
@@ -1,6 +1,6 @@
  /*
- * Copyright (C) 2012 
- *     Dale Weiler
+ * Copyright (C) 2012
+ *     Dale Weiler
   *
   * Permission is hereby granted, free of charge, to any person obtaining a copy of
   * this software and associated documentation files (the "Software"), to deal in
@@ -20,11 +20,6 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-#include <stdio.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
  #include "gmqcc.h"
  
  /*
@@ -32,51 +27,48 @@
   * than keyword lexing.
   */
  static const char *const lex_keywords[] = {
-       "do",    "else",     "if",     "while",
-       "break", "continue", "return", "goto",
-       "for",   "typedef",
-       
-       /* types */
-       "int",
-       "void",
-       "string",
-       "float",
-       "vector",
-       "entity",
+    "do",    "else",     "if",     "while",
+    "break", "continue", "return", "goto",
+    "for",   "typedef"
  };
  
-struct lex_file *lex_open(FILE *fp) {
-       struct lex_file *lex = mem_a(sizeof(struct lex_file));
-       if (lex) {
-               lex->file = fp;
-               fseek(lex->file, 0, SEEK_END);
-               lex->length = ftell(lex->file);
-               lex->size   = lex->length; /* copy, this is never changed */
-               fseek(lex->file, 0, SEEK_SET);
-               lex->last = 0;
-               
-               memset(lex->peek, 0, sizeof(lex->peek));
-       }
-       return lex;
+void lex_init(const char *file, lex_file **set) {
+    lex_file *lex = mem_a(sizeof(lex_file));
+    if (!lex)
+        return;
+
+    lex->file = fopen(file, "r");
+    if (!lex->file) {
+        mem_d(lex);
+        return;
+    }
+
+    fseek(lex->file, 0, SEEK_END);
+    lex->length = ftell(lex->file);
+    lex->size   = lex->length; /* copy, this is never changed */
+    fseek(lex->file, 0, SEEK_SET);
+    lex->last = 0;
+    lex->line = 1;
+
+    memset(lex->peek, 0, sizeof(lex->peek));
+    *set = lex;
  }
  
-int lex_close(struct lex_file *file) {
-       int ret = -1;
-       if (file) {
-               ret = fclose(file->file);
-               mem_d(file);
-       }
-       return ret;
+void lex_close(lex_file *file) {
+    if (!file) return;
+
+    fclose(file->file); /* may already be closed */
+    mem_d (file);
  }
  
-static void lex_addch(int ch, struct lex_file *file) {
-       if (file->current <  sizeof(file->lastok)-1)
-               file->lastok[file->current++] = (char)ch;
-       if (file->current == sizeof(file->lastok)-1)
-               file->lastok[file->current]   = (char)'\0';
+static void lex_addch(int ch, lex_file *file) {
+    if (file->current <  sizeof(file->lastok)-1)
+        file->lastok[file->current++] = (char)ch;
+    if (file->current == sizeof(file->lastok)-1)
+        file->lastok[file->current]   = (char)'\0';
  }
-static inline void lex_clear(struct lex_file *file) {
-       file->current = 0;
+static inline void lex_clear(lex_file *file) {
+    file->current = 0;
  }
  
  /*
@@ -84,283 +76,280 @@ static inline void lex_clear(struct lex_file *file) {
   * This doesn't play with file streams, the lexer has
   * it's own internal state for this.
   */
-static int lex_inget(struct lex_file *file) {
-       file->length --;
-       if (file->last > 0)
-               return file->peek[--file->last];
-       return fgetc(file->file);
+static int lex_inget(lex_file *file) {
+    char  get;
+    file->length --;
+
+    if (file->last > 0) {
+        if ((get = file->peek[--file->last]) == '\n')
+            file->line ++;
+        return get;
+    }
+    if ((get = fgetc(file->file)) == '\n')
+        file->line++;
+
+    return get;
  }
-static void lex_unget(int ch, struct lex_file *file) {
-       if (file->last < sizeof(file->peek))
-               file->peek[file->last++] = ch;
-       file->length ++;
+static void lex_unget(int ch, lex_file *file) {
+    if (file->last < sizeof(file->peek)) {
+        if (ch == '\n')
+            file->line --;
+        file->peek[file->last++] = ch;
+    }
+    file->length ++;
  }
  
  /*
   * This is trigraph and digraph support, a feature not qc compiler
   * supports.  Moving up in this world!
   */
-static int lex_trigraph(struct lex_file *file) {
-       int  ch;
-       if ((ch = lex_inget(file)) != '?') {
-               lex_unget(ch, file);
-               return '?';
-       }
-       
-       ch = lex_inget(file);
-       switch (ch) {
-               case '(' : return '[' ;
-               case ')' : return ']' ;
-               case '/' : return '\\';
-               case '\'': return '^' ;
-               case '<' : return '{' ;
-               case '>' : return '}' ;
-               case '!' : return '|' ;
-               case '-' : return '~' ;
-               case '=' : return '#' ;
-               default:
-                       lex_unget('?', file);
-                       lex_unget(ch , file);
-                       return '?';
-       }
-       return '?';
+static int lex_trigraph(lex_file *file) {
+    int  ch;
+    if ((ch = lex_inget(file)) != '?') {
+        lex_unget(ch, file);
+        return '?';
+    }
+
+    ch = lex_inget(file);
+    switch (ch) {
+        case '(' : return '[' ;
+        case ')' : return ']' ;
+        case '/' : return '\\';
+        case '\'': return '^' ;
+        case '<' : return '{' ;
+        case '>' : return '}' ;
+        case '!' : return '|' ;
+        case '-' : return '~' ;
+        case '=' : return '#' ;
+        default:
+            lex_unget('?', file);
+            lex_unget(ch , file);
+            return '?';
+    }
+    return '?';
  }
-static int lex_digraph(struct lex_file *file, int first) {
-       int ch = lex_inget(file);
-       switch (first) {
-               case '<':
-                       if (ch == '%') return '{';
-                       if (ch == ':') return '[';
-                       break;
-               case '%':
-                       if (ch == '>') return '}';
-                       if (ch == ':') return '#';
-                       break;
-               case ':':
-                       if (ch == '>') return ']';
-                       break;
-       }
-       
-       lex_unget(ch, file);
-       return first;
+static int lex_digraph(lex_file *file, int first) {
+    int ch = lex_inget(file);
+    switch (first) {
+        case '<':
+            if (ch == '%') return '{';
+            if (ch == ':') return '[';
+            break;
+        case '%':
+            if (ch == '>') return '}';
+            if (ch == ':') return '#';
+            break;
+        case ':':
+            if (ch == '>') return ']';
+            break;
+    }
+
+    lex_unget(ch, file);
+    return first;
  }
  
-static int lex_getch(struct lex_file *file) {
-       int ch = lex_inget(file);
-       if (ch == '?')
-               return lex_trigraph(file);
-       if (ch == '<' || ch == ':' || ch == '%')
-               return lex_digraph (file, ch);
-               
-       return ch;
+static int lex_getch(lex_file *file) {
+    int ch = lex_inget(file);
+    if (ch == '?')
+        return lex_trigraph(file);
+    if (ch == '<' || ch == ':' || ch == '%')
+        return lex_digraph(file, ch);
+    return ch;
  }
  
-static int lex_get(struct lex_file *file) {
-       int ch;
-       if (!isspace(ch = lex_getch(file)))
-               return ch;
-       
-       /* skip over all spaces */
-       while (isspace(ch) && ch != '\n')
-               ch = lex_getch(file);
-               
-       if (ch == '\n') {
-               file->line ++;
-               return ch;
-       }
-               
-       lex_unget(ch, file);
-       return ' ';
+static int lex_get(lex_file *file) {
+    int ch;
+    if (!isspace(ch = lex_getch(file)))
+        return ch;
+
+    /* skip over all spaces */
+    while (isspace(ch) && ch != '\n')
+        ch = lex_getch(file);
+
+    if (ch == '\n')
+        return ch;
+    lex_unget(ch, file);
+    return ' ';
  }
  
-static int lex_skipchr(struct lex_file *file) {
-       int ch;
-       int it;
-       
-       lex_clear(file);
-       lex_addch('\'', file);
-       
-       for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) {
-               lex_addch(ch, file);
-               
-               if (ch == '\n')
-                       return ERROR_LEX;
-               if (ch == '\\')
-                       lex_addch(lex_getch(file), file);
-       }
-       lex_addch('\'', file);
-       lex_addch('\0', file);
-       
-       if (it > 2)
-               return ERROR_LEX;
-               
-       return LEX_CHRLIT;
+static int lex_skipchr(lex_file *file) {
+    int ch;
+    int it;
+
+    lex_clear(file);
+    lex_addch('\'', file);
+
+    for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) {
+        lex_addch(ch, file);
+
+        if (ch == '\n')
+            return ERROR_LEX;
+        if (ch == '\\')
+            lex_addch(lex_getch(file), file);
+    }
+    lex_addch('\'', file);
+    lex_addch('\0', file);
+
+    if (it > 2)
+        return ERROR_LEX;
+
+    return LEX_CHRLIT;
  }
  
-static int lex_skipstr(struct lex_file *file) {
-       int ch;
-       lex_clear(file);
-       lex_addch('"', file);
-       
-       while ((ch = lex_getch(file)) != '"') {
-               if (ch == '\n' || ch == EOF)
-                       return ERROR_LEX;
-                       
-               lex_addch(ch, file);
-               if (ch == '\\')
-                       lex_addch(lex_inget(file), file);
-       }
-       
-       lex_addch('"', file);
-       lex_addch('\0', file);
-       
-       return LEX_STRLIT;
+static int lex_skipstr(lex_file *file) {
+    int ch;
+    lex_clear(file);
+    lex_addch('"', file);
+
+    while ((ch = lex_getch(file)) != '"') {
+        if (ch == '\n' || ch == EOF)
+            return ERROR_LEX;
+
+        lex_addch(ch, file);
+        if (ch == '\\')
+            lex_addch(lex_inget(file), file);
+    }
+
+    lex_addch('"', file);
+    lex_addch('\0', file);
+
+    return LEX_STRLIT;
  }
-static int lex_skipcmt(struct lex_file *file) {
-       int ch;
-       lex_clear(file);
-       ch = lex_getch(file);
-       
-       if (ch == '/') {
-               lex_addch('/', file);
-               lex_addch('/', file);
-               
-               while ((ch = lex_getch(file)) != '\n') {
-                       if (ch == '\\') {
-                               lex_addch(ch, file);
-                               lex_addch(lex_getch(file), file);
-                       } else {
-                               lex_addch(ch, file);
-                       }
-               }
-               lex_addch('\0', file);
-               return LEX_COMMENT;
-       }
-       
-       if (ch != '*') {
-               lex_unget(ch, file);
-               return '/';
-       }
-       
-       lex_addch('/', file);
-       
-       /* hate this */
-       do {
-               lex_addch(ch, file);
-               while ((ch = lex_getch(file)) != '*') {
-                       if (ch == EOF)
-                               return error(ERROR_LEX, "malformatted comment", " ");
-                       else
-                               lex_addch(ch, file);
-               }
-               lex_addch(ch, file);
-       } while ((ch = lex_getch(file)) != '/');
-       
-       lex_addch('/',  file);
-       lex_addch('\0', file);
-       
-       return LEX_COMMENT;
+static int lex_skipcmt(lex_file *file) {
+    int ch;
+    lex_clear(file);
+    ch = lex_getch(file);
+
+    if (ch == '/') {
+        lex_addch('/', file);
+        lex_addch('/', file);
+
+        while ((ch = lex_getch(file)) != '\n') {
+            if (ch == '\\') {
+                lex_addch(ch, file);
+                lex_addch(lex_getch(file), file);
+            } else {
+                lex_addch(ch, file);
+            }
+        }
+        lex_addch('\0', file);
+        return LEX_COMMENT;
+    }
+
+    if (ch != '*') {
+        lex_unget(ch, file);
+        return '/';
+    }
+
+    lex_addch('/', file);
+
+    /* hate this */
+    do {
+        lex_addch(ch, file);
+        while ((ch = lex_getch(file)) != '*') {
+            if (ch == EOF)
+                return error(file, ERROR_LEX, "malformatted comment");
+            else
+                lex_addch(ch, file);
+        }
+        lex_addch(ch, file);
+    } while ((ch = lex_getch(file)) != '/');
+
+    lex_addch('/',  file);
+    lex_addch('\0', file);
+
+    return LEX_COMMENT;
  }
  
-static int lex_getsource(struct lex_file *file) {
-       int ch = lex_get(file);
-       
-       /* skip char/string/comment */
-       switch (ch) {
-               case '\'': return lex_skipchr(file);
-               case '"':  return lex_skipstr(file);
-               case '/':  return lex_skipcmt(file);
-               default:   return ch;
-       }
+static int lex_getsource(lex_file *file) {
+    int ch = lex_get(file);
+
+    /* skip char/string/comment */
+    switch (ch) {
+        case '\'': return lex_skipchr(file);
+        case '"':  return lex_skipstr(file);
+        case '/':  return lex_skipcmt(file);
+        default:
+            return ch;
+    }
+}
+
+int lex_token(lex_file *file) {
+    int ch = lex_getsource(file);
+    int it;
+
+    /* valid identifier */
+    if (ch > 0 && (ch == '_' || isalpha(ch))) {
+        lex_clear(file);
+
+        while (ch > 0 && (ch == '_' || isalpha(ch))) {
+            lex_addch(ch, file);
+            ch = lex_getsource(file);
+        }
+        lex_unget(ch,   file);
+        lex_addch('\0', file);
+
+        /* look inside the table for a keyword .. */
+        for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++)
+            if (!strncmp(file->lastok, lex_keywords[it], strlen(lex_keywords[it])))
+                return it;
+
+        /* try a type? */
+        #define TEST_TYPE(X)                                 \
+            do {                                             \
+                if (!strncmp(X, "float",  sizeof("float")))  \
+                    return TOKEN_FLOAT;                      \
+                if (!strncmp(X, "vector", sizeof("vector"))) \
+                    return TOKEN_VECTOR;                     \
+                if (!strncmp(X, "string", sizeof("string"))) \
+                    return TOKEN_STRING;                     \
+                if (!strncmp(X, "entity", sizeof("entity"))) \
+                    return TOKEN_ENTITY;                     \
+                if (!strncmp(X, "void"  , sizeof("void")))   \
+                    return TOKEN_VOID;                       \
+            } while(0)
+
+        TEST_TYPE(file->lastok);
+
+        /* try the hashtable for typedefs? */
+        if (typedef_find(file->lastok))
+            TEST_TYPE(typedef_find(file->lastok)->name);
+
+        #undef TEST_TYPE
+        return LEX_IDENT;
+    }
+    return ch;
  }
  
-int lex_token(struct lex_file *file) {
-       int ch = lex_getsource(file);
-       int it;
-       
-       /* valid identifier */
-       if (ch > 0 && (ch == '_' || isalpha(ch))) {
-               lex_clear(file);
-               while (ch > 0 && (isalpha(ch) || ch == '_')) {
-                       lex_addch(ch, file);
-                       ch = lex_getsource(file);
-               }
-               lex_unget(ch,   file);
-               lex_addch('\0', file);
-               
-               /* look inside the table for a keyword .. */
-               for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++)
-                       if (!strncmp(file->lastok, lex_keywords[it], sizeof(lex_keywords[it])))
-                               return it;
-                               
-               /* try the hashtable for typedefs? */
-               if (typedef_find(file->lastok))
-                       for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++)
-                               if (!strncmp(typedef_find(file->lastok)->name, lex_keywords[it], sizeof(lex_keywords[it])))
-                                       return it;
-                               
-               return LEX_IDENT;
-       }
-       return ch;
+void lex_reset(lex_file *file) {
+    file->current = 0;
+    file->last    = 0;
+    file->length  = file->size;
+    fseek(file->file, 0, SEEK_SET);
+
+    memset(file->peek,   0, sizeof(file->peek  ));
+    memset(file->lastok, 0, sizeof(file->lastok));
  }
  
-void lex_reset(struct lex_file *file) {
-       file->current = 0;
-       file->last    = 0;
-       file->length  = file->size;
-       fseek(file->file, 0, SEEK_SET);
-       
-       memset(file->peek,   0, sizeof(file->peek  ));
-       memset(file->lastok, 0, sizeof(file->lastok));
+void lex_parse(lex_file *file) {
+    if (!file) return;
+    parse_gen(file); /* run parser */
  }
  
-int lex_debug(struct lex_file *file) {
-       int list_do       = 0;
-       int list_else     = 0;
-       int list_if       = 0;
-       int list_while    = 0;
-       int list_break    = 0;
-       int list_continue = 0;
-       int list_return   = 0;
-       int list_goto     = 0;
-       int list_for      = 0;
-       int token         = 0;
-       printf("===========================\nTOKENS:   \n===========================\n");
-       while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) {
-               if (token != -1) {
-                       switch (token) {
-                               case 0: list_do      ++; break;
-                               case 1: list_else    ++; break;
-                               case 2: list_if      ++; break;
-                               case 3: list_while   ++; break;
-                               case 4: list_break   ++; break;
-                               case 5: list_continue++; break;
-                               case 6: list_return  ++; break;
-                               case 7: list_goto    ++; break;
-                               case 8: list_for     ++; break;
-                       }
-               }
-               if (token >= 33 && token <= 126)
-                       putchar(token);
-       }
-       printf("\n===========================\nBRANCHES \n===========================\n");
-       printf("\t if       % 8d\n", list_if);
-       printf("\t else     % 8d\n", list_else);
-       printf("===========================\nLOOPS      \n===========================\n");
-       printf("\t for      % 8d\n", list_for);
-       printf("\t while    % 8d\n", list_while);
-       printf("\t do       % 8d\n", list_do);
-       printf("===========================\nSTATEMENTS \n===========================\n");
-       printf("\t break    % 8d\n", list_break);
-       printf("\t continue % 8d\n", list_continue);
-       printf("\t return   % 8d\n", list_return);
-       printf("\t goto     % 8d\n", list_goto);
-       printf("===========================\nIDENTIFIERS\n===========================\n");
-       lex_reset(file);
-       while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0)
-               if (token == LEX_IDENT)
-                       printf("%s ", file->lastok);
-       fputc('\n', stdout);
-       lex_reset(file);
-       return 1;
+/*
+ * Include a file into the lexer / parsing process:  This really
+ * should check if names are the same to prevent endless include
+ * recrusion.
+ */
+lex_file *lex_include(lex_file *lex, const char *file) {
+    util_strrq(file);
+    if (strncmp(lex->name, file, strlen(lex->name)) == 0) {
+        error(lex, ERROR_LEX, "Source file cannot include itself\n");
+        exit (-1);
+    }
+
+    lex_file *set = NULL;
+    lex_init(file, &set);
+
+    return set;
  }