From bd5ba9e0fe0e9bdaea3f869f55d8f40651a2d0fb Mon Sep 17 00:00:00 2001
From: Dale Weiler <killfieldengine@gmail.com>
Date: Tue, 10 Apr 2012 04:20:15 -0400
Subject: [PATCH] Cleanups

---
 gmqcc.h   |  12 +++---
 lex.c     |  12 +++---
 main.c    |   4 +-
 parse.c   |  68 ++++++++++++++---------------
 typedef.c | 127 +++++-------------------------------------------------
 5 files changed, 59 insertions(+), 164 deletions(-)

diff --git a/gmqcc.h b/gmqcc.h
index 5acfbd5..50fd856 100644
--- a/gmqcc.h
+++ b/gmqcc.h
@@ -156,7 +156,8 @@ struct lex_file {
 #define TOKEN_FOR      8   // extension
 #define TOKEN_TYPEDEF  9   // extension
 
-
+// ensure the token types are out of the
+// bounds of anyothers that may conflict.
 #define TOKEN_FLOAT    110
 #define TOKEN_VECTOR   111
 #define TOKEN_STRING   112
@@ -166,16 +167,17 @@ struct lex_file {
 /*
  * Lexer state constants, these are numbers for where exactly in
  * the lexing the lexer is at. Or where it decided to stop if a lexer
- * error occurs.
+ * error occurs.  These numbers must be > where the ascii-table ends
+ * and > the last type token which is TOKEN_VOID
  */
-#define LEX_COMMENT    1128 /* higher than ascii */
+#define LEX_COMMENT    1128 
 #define LEX_CHRLIT     1129
 #define LEX_STRLIT     1130
 #define LEX_IDENT      1131
 
 int              lex_token(struct lex_file *);
 void             lex_reset(struct lex_file *);
-int              lex_close(struct lex_file *);
+void             lex_close(struct lex_file *);
 struct lex_file *lex_open (FILE *);
 
 /* errors */
@@ -187,7 +189,7 @@ struct lex_file *lex_open (FILE *);
 int error(int, const char *, ...);
 
 /* parse.c */
-int parse(struct lex_file *);
+int parse_tree(struct lex_file *);
 struct parsenode {
 	struct parsenode *next;
 	int               type; /* some token */
diff --git a/lex.c b/lex.c
index d7c7b00..7b55dff 100644
--- a/lex.c
+++ b/lex.c
@@ -52,13 +52,11 @@ struct lex_file *lex_open(FILE *fp) {
 	return lex;
 }
 
-int lex_close(struct lex_file *file) {
-	int ret = -1;
-	if (file) {
-		ret = fclose(file->file);
-		mem_d(file);
-	}
-	return ret;
+void lex_close(struct lex_file *file) {
+	if (!file) return;
+	
+	fclose(file->file); /* may already be closed */
+	mem_d(file);
 }
 
 static void lex_addch(int ch, struct lex_file *file) {
diff --git a/main.c b/main.c
index c796d7c..52413ca 100644
--- a/main.c
+++ b/main.c
@@ -65,8 +65,8 @@ int main(int argc, char **argv) {
 		return error(ERROR_COMPILER, "Source file: %s not found\n", ifile);
 	} else {
 		struct lex_file *lex = lex_open(fp);
-		parse    (lex);
-		lex_close(lex);
+		parse_tree(lex); /* generate parse tree */
+		lex_close (lex); /* cleanup  lexer      */
 	}
 	return 0;
 }
diff --git a/parse.c b/parse.c
index 4927013..0e1c6a5 100644
--- a/parse.c
+++ b/parse.c
@@ -151,13 +151,15 @@ void parse_debug(struct parsenode *tree) {
 }
 
 /*
- * This just skips the token and throws it in the parse tree for later
- * checking / optimization / codegen, it doesn't do anything with it
- * like syntax check for legal use -- like it should as it's a TODO item
- * which is not implemented
+ * Performs a parse operation:  This is a macro to prevent bugs, if the
+ * calls to lex_token are'nt exactly enough to feed to the end of the
+ * actual lexees for the current thing that is being parsed, the state 
+ * of the next iteration in the creation of the parse tree will be wrong
+ * and everything will fail.
  */
-#define PARSE_TODO(X) {          \
+#define PARSE_PERFORM(X,C) {     \
 	token = lex_token(file);     \
+	{ C }                        \
 	while (token != '\n') {      \
 		token = lex_token(file); \
 	}                            \
@@ -178,7 +180,11 @@ void parse_clear(struct parsenode *tree) {
 	typedef_clear();
 }
 
-int parse(struct lex_file *file) {
+/*
+ * Generates a parse tree out of the lexees generated by the lexer.  This
+ * is where the tree is built.  This is where valid check is performed.
+ */
+int parse_tree(struct lex_file *file) {
 	struct parsenode *parsetree = NULL;
 	struct parsenode *parseroot = NULL;
 	
@@ -203,29 +209,19 @@ int parse(struct lex_file *file) {
 		    token                    != ERROR_PREPRO   && file->length >= 0) {
 		switch (token) {
 			case TOKEN_IF:
-				//token = lex_token(file);
 				while ((token == ' ' || token == '\n') && file->length >= 0)
 					token = lex_token(file);
-					
-				//if (token != '(')
-				//	error(ERROR_PARSE, "Expected `(` after if\n", "");
-				
 				PARSE_TREE_ADD(PARSE_TYPE_IF);
 				break;
 			case TOKEN_ELSE:
 				token = lex_token(file);
-				//while ((token == ' ' || token == '\n') && file->length >= 0)
-				//	token = lex_token(file);
-					
 				PARSE_TREE_ADD(PARSE_TYPE_ELSE);
 				break;
 			case TOKEN_FOR:
-				token = lex_token(file);
+				//token = lex_token(file);
 				while ((token == ' ' || token == '\n') && file->length >= 0)
 					token = lex_token(file);
-					
-				//PARSE_TREE_ADD(PARSE_TYPE_FOR);
-				PARSE_TODO(PARSE_TYPE_FOR);
+				PARSE_TREE_ADD(PARSE_TYPE_FOR);
 				break;
 			
 			/*
@@ -243,9 +239,6 @@ int parse(struct lex_file *file) {
 				
 				typedef_add(f, t);
 				
-				/* free stdup strings */
-				//mem_d(f);
-				//mem_d(t);
 				free(f);
 				free(t);
 				
@@ -253,19 +246,27 @@ int parse(struct lex_file *file) {
 					token = lex_token(file);
 				break;
 			}
+			
+			/*
+			 * Returns are addable as-is, statement checking is during
+			 * the actual parse tree check.
+			 */
+			case TOKEN_RETURN:
+				PARSE_TREE_ADD(PARSE_TYPE_RETURN);
+				break;
+				//PARSE_PERFORM(PARSE_TYPE_RETURN,  {});
+			
 				
-				
-			case TOKEN_DO:        PARSE_TODO(PARSE_TYPE_DO);
-			case TOKEN_WHILE:     PARSE_TODO(PARSE_TYPE_WHILE);
-			case TOKEN_BREAK:     PARSE_TODO(PARSE_TYPE_BREAK);
-			case TOKEN_CONTINUE:  PARSE_TODO(PARSE_TYPE_CONTINUE);
-			case TOKEN_RETURN:    PARSE_TODO(PARSE_TYPE_RETURN);
-			case TOKEN_GOTO:      PARSE_TODO(PARSE_TYPE_GOTO);
-			case TOKEN_VOID:      PARSE_TODO(PARSE_TYPE_VOID);
-			case TOKEN_STRING:    PARSE_TODO(PARSE_TYPE_STRING);
-			case TOKEN_FLOAT:     PARSE_TODO(PARSE_TYPE_FLOAT);
-			case TOKEN_VECTOR:    PARSE_TODO(PARSE_TYPE_VECTOR);
-			case TOKEN_ENTITY:    PARSE_TODO(PARSE_TYPE_ENTITY);
+			case TOKEN_DO:        PARSE_PERFORM(PARSE_TYPE_DO,      {});
+			case TOKEN_WHILE:     PARSE_PERFORM(PARSE_TYPE_WHILE,   {});
+			case TOKEN_BREAK:     PARSE_PERFORM(PARSE_TYPE_BREAK,   {});
+			case TOKEN_CONTINUE:  PARSE_PERFORM(PARSE_TYPE_CONTINUE,{});
+			case TOKEN_GOTO:      PARSE_PERFORM(PARSE_TYPE_GOTO,    {});
+			case TOKEN_VOID:      PARSE_PERFORM(PARSE_TYPE_VOID,    {});
+			case TOKEN_STRING:    PARSE_PERFORM(PARSE_TYPE_STRING,  {});
+			case TOKEN_FLOAT:     PARSE_PERFORM(PARSE_TYPE_FLOAT,   {});
+			case TOKEN_VECTOR:    PARSE_PERFORM(PARSE_TYPE_VECTOR,  {});
+			case TOKEN_ENTITY:    PARSE_PERFORM(PARSE_TYPE_ENTITY,  {});
 				
 			/*
 			 * From here down is all language punctuation:  There is no
@@ -392,6 +393,5 @@ int parse(struct lex_file *file) {
 	parse_debug(parseroot);
 	lex_reset(file);
 	parse_clear(parseroot);
-	
 	return 1;
 }	
diff --git a/typedef.c b/typedef.c
index 9486576..4ee7348 100644
--- a/typedef.c
+++ b/typedef.c
@@ -24,12 +24,6 @@
 #include <stdint.h> /* replace if stdint.h doesn't exist! */
 #include <limits.h>
 #include "gmqcc.h"
-
-/*
- * This implements a hashtable for typedef type keywords which end up
- * being translated to their full-expressed type.  This uses a singly
- * linked list with a fast hash function.
- */
 static typedef_node *typedef_table[1024];
 
 void typedef_init() {
@@ -38,118 +32,19 @@ void typedef_init() {
 		typedef_table[i] = NULL;
 }
 
-/*
- * Fast collisionless hashfunction based off of:
- * http://www.azillionmonkeys.com/qed/hash.html
- * By: Paul Hsieh
- * 
- * The code is licensed under LGPL 2.1 or Paul
- * Hsieh's derivative license. Stated on his page
- * quote:
- * 
- * 	The LGPL 2.1 is not necessarily a more liberal license than my 
- *	derivative license, but this additional licensing makes the code
- * 	available to more developers. Note that this does not give you 
- * 	multi-licensing rights. You can only use the code under one of
- * 	the licenses at a time. 
- * 
- *  Paul Hsieh derivative license
- *
- *	The derivative content includes raw computer source code, ideas, 
- *	opinions, and excerpts whose original source is covered under 
- *	another license and transformations of such derivatives.
- *	Note that mere excerpts by themselves (with the exception of raw
- * 	source code) are not considered derivative works under this license.
- * 	Use and redistribution is limited to the following conditions:
- * 
- *	One may not create a derivative work which, in any way, violates the
- *	Paul Hsieh exposition license described above on the original content.
- *
- *	One may not apply a license to a derivative work that precludes anyone
- *	else from using and redistributing derivative content.
- *
- *	One may not attribute any derivative content to authors not involved
- *	in the creation of the content, though an attribution to the author
- *	is not necessary.
- * 
- *  Paul Hsieh exposition license
- *
- *	The content of all text, figures, tables and displayed layout is
- *	copyrighted by its author and owner Paul Hsieh unless specifically
- *	denoted otherwise. Redistribution is limited to the following conditions:
- *
- *	The redistributor must fully attribute the content's authorship and
- *	make a good faith effort to cite the original location of the original
- *	content.
- *
- *	The content may not be modified via excerpt or otherwise with the
- *	exception of additional citations such as described above without prior
- *	consent of Paul Hsieh.
- *
- *	The content may not be subject to a change in license without prior
- *	consent of Paul Hsieh.
- *
- *	The content may be used for commercial purposes.
- */
-
-#if (defined(__GNUC__) && defined(__i386__)) || defined(_MSC_VER)
-/*
- * Unalligned loads are faster if we can do them, otherwise fall back
- * to safer version below.
- */
-#   define load16(D) (*((const uint16_t*)(D)))
-#else
-#   define load16(D) ((((uint32_t)(((const uint8_t*)(D))[1])) << 8) + \
-                        (uint32_t)(((const uint8_t*)(D))[0]))
-#endif
-unsigned int inline typedef_hash(const char *data) {
-	uint32_t hash = strlen(data);
-	uint32_t size = hash;
-	uint32_t temp = 0;
+unsigned int typedef_hash(const char *s) {
+	unsigned int hash = 0;
+	unsigned int size = strlen(s);
+	unsigned int iter;
 	
-	int last;
-	if (size <= 0|| data == NULL)
-		return -1;
-	
-	last   = size & 3;
-	size >>= 2;
-	
-	/* main loop */
-	for (;size > 0; size--) {
-		hash += (load16(data));
-		temp  = (load16(data+2) << 11) ^ hash;
-		hash  = (hash << 16) ^ temp;
-		data += sizeof(uint16_t) << 1;
-		hash += hash >> 11;
+	for (iter = 0; iter < size; iter++) {
+		hash += s[iter];
+		hash += (hash << 10);
+		hash ^= (hash >> 6);
 	}
-	
-	/* ends */
-	switch (last) {
-		case 3:
-			hash += load16(data);
-			hash ^= hash << 16;
-			hash ^= ((signed char)data[sizeof(uint16_t)]) << 8;
-			hash += hash >> 11;
-			break;
-		case 2:
-			hash += load16(data);
-			hash ^= hash << 11;
-			hash += hash >> 17;
-			break;
-		case 1:
-			hash += (signed char)*data;
-			hash ^= hash << 10;
-			hash += hash >> 1;
-			break;
-	}
-	
-	/* force avalanching of final 127 bits */
-	hash ^= hash << 3;
-	hash += hash >> 5;
-	hash ^= hash << 4;
-	hash += hash >> 17;
-	hash ^= hash << 25;
-	hash += hash >> 6;
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
 	
 	return hash % 1024;
 }
-- 
2.39.2