/*
- * Copyright (C) 2012
+ * Copyright (C) 2012
* Dale Weiler
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
*/
#include "gmqcc.h"
/*
- * Some assembler keywords not part of the opcodes above: these are
- * for creating functions, or constants.
+ * Following parse states:
+ * ASM_FUNCTION -- in a function accepting input statements
+ * ....
*/
-const char *const asm_keys[] = {
- "FLOAT" , /* define float */
- "VECTOR" , /* define vector */
- "ENTITY" , /* define ent */
- "FIELD" , /* define field */
- "STRING" , /* define string */
- "FUNCTION"
-};
+typedef enum {
+ ASM_NULL,
+ ASM_FUNCTION
+} asm_state;
+typedef struct {
+ char *name; /* name of constant */
+ char type; /* type, float, vector, string */
+ char elem; /* 0=x, 1=y, or 2=Z? */
+ int offset; /* location in globals */
+} globals;
+VECTOR_MAKE(globals, assembly_constants);
+
+/*
+ * Assembly text processing: this handles the internal collection
+ * of text to allow parsing and assemblation.
+ */
static char *const asm_getline(size_t *byte, FILE *fp) {
char *line = NULL;
- ssize_t read = util_getline(&line, byte, fp);
+ size_t read = util_getline(&line, byte, fp);
*byte = read;
if (read == -1) {
mem_d (line);
return line;
}
+/*
+ * Entire external interface for main.c - to perform actual assemblation
+ * of assembly files.
+ */
void asm_init(const char *file, FILE **fp) {
*fp = fopen(file, "r");
code_init();
}
-
void asm_close(FILE *fp) {
fclose(fp);
code_write();
}
-
-/*
- * Following parse states:
- * ASM_FUNCTION -- in a function accepting input statements
- * ....
- */
-typedef enum {
- ASM_NULL,
- ASM_FUNCTION
-} asm_state;
-
-typedef struct {
- char *name; /* name of constant */
- int offset; /* location in globals */
-} globals;
-VECTOR_MAKE(globals, assembly_constants);
-
void asm_clear() {
size_t i = 0;
for (; i < assembly_constants_elements; i++)
mem_d(assembly_constants_data);
}
-int asm_parsetype(const char *key, char **skip, long line) {
- size_t keylen = strlen(key);
- if (!strncmp(key, *skip, keylen)) {
- if ((*skip)[keylen] != ':'){
- printf("%li: Missing `:` after decltype\n", line);
- exit(1);
+/*
+ * Dumps all values of all constants and assembly related
+ * information obtained during the assembly procedure.
+ */
+void asm_dumps() {
+ size_t i = 0;
+ for (; i < assembly_constants_elements; i++) {
+ globals *g = &assembly_constants_data[i];
+ switch (g->type) {
+ case TYPE_VECTOR: {
+ util_debug("ASM", "vector %s %c[%f]\n", g->name,
+ (g->elem == 0) ? 'X' :(
+ (g->elem == 1) ? 'Y' :
+ (g->elem == 2) ? 'Z' :' '),
+ INT2FLT(code_globals_data[g->offset])
+ );
+ break;
+ }
}
- *skip += keylen+1;
- while (**skip == ' ' || **skip == '\t')
- (*skip)++;
-
- if (!isalpha(**skip)) {
- printf("%li: Invalid identififer: %s\n", line, *skip);
- exit(1);
- } else {
- assembly_constants_add((globals) {
- .name = util_strdup("empty"),
- .offset = code_globals_elements
- });
- return 1;
+ }
+}
+
+/*
+ * Parses a type, could be global or not depending on the
+ * assembly state: global scope with assignments are constants.
+ * globals with no assignments are globals. Function body types
+ * are locals.
+ */
+static GMQCC_INLINE bool asm_parse_type(const char *skip, size_t line, asm_state *state) {
+ if (!(strstr(skip, "FLOAT:") == &skip[0]) &&
+ (strstr(skip, "VECTOR:") == &skip[0]) &&
+ (strstr(skip, "ENTITY:") == &skip[0]) &&
+ (strstr(skip, "FIELD:") == &skip[0]) &&
+ (strstr(skip, "STRING:") == &skip[0])) return false;
+
+ /* TODO: determine if constant, global, or local */
+ switch (*skip) {
+ /* VECTOR */ case 'V': {
+ float val1;
+ float val2;
+ float val3;
+ globals global;
+
+ char *find = (char*)skip + 7;
+ char *name = (char*)skip + 7;
+ while (*find == ' ' || *find == '\t') find++;
+
+ /* constant? */
+ if (strchr(find, ',')) {
+ /* strip name */
+ *strchr((name = util_strdup(find)), ',')='\0';
+ /* find data */
+ find += strlen(name) + 1;
+ while (*find == ' ' || *find == '\t') find++;
+ /* valid name */
+ if (util_strupper(name) || isdigit(*name)) {
+ printf("invalid name for vector variable\n");
+ mem_d(name);
+ }
+ /*
+ * Parse all three elements of the vector. This will only
+ * pass the first try if we hit a constant, otherwise it's
+ * a global.
+ */
+ #define PARSE_ELEMENT(X,Y,Z) \
+ if (isdigit(*X) || *X == '-'||*X == '+') { \
+ bool negated = (*X == '-'); \
+ if (negated || *X == '+') { X++; } \
+ Y = (negated)?-atof(X):atof(X); \
+ X = strchr(X, ','); \
+ Z \
+ }
+
+ PARSE_ELEMENT(find, val1, { find ++; while (*find == ' ') { find ++; } });
+ PARSE_ELEMENT(find, val2, { find ++; while (*find == ' ') { find ++; } });
+ PARSE_ELEMENT(find, val3, { find ++; /* no need to do anything here */ });
+ #undef PARSE_ELEMENT
+ #define BUILD_ELEMENT(X,Y) \
+ global.type = TYPE_VECTOR; \
+ global.name = util_strdup(name); \
+ global.elem = (X); \
+ global.offset = code_globals_elements; \
+ assembly_constants_add(global); \
+ code_globals_add(FLT2INT(Y))
+ BUILD_ELEMENT(0, val1);
+ BUILD_ELEMENT(1, val2);
+ BUILD_ELEMENT(2, val3);
+ #undef BUILD_ELEMENT
+ mem_d(name);
+ } else {
+ /* TODO global not constant */
+ }
+ break;
+ }
+ /* ENTITY */ case 'E': {
+ const char *find = skip + 7;
+ while (*find == ' ' || *find == '\t') find++;
+ printf("found ENTITY %s\n", find);
+ break;
+ }
+ /* STRING */ case 'S': {
+ const char *find = skip + 7;
+ while (*find == ' ' || *find == '\t') find++;
+ printf("found STRING %s\n", find);
+ break;
}
}
- return 0;
+
+ return false;
}
-void asm_parse(FILE *fp) {
- char *data = NULL;
- char *skip = NULL;
- long line = 1; /* current line */
- size_t size = 0; /* size of line */
- asm_state state = ASM_NULL;
-
- while ((data = skip = asm_getline(&size, fp)) != NULL) {
- /* remove any whitespace at start */
- while (*skip == ' ' || *skip == '\t')
- skip++;
- /* remove newline at end of string */
- *(skip+*(&size)-1) = '\0';
-
- if (asm_parsetype(asm_keys[5], &skip, line)) {
- if (state != ASM_NULL) {
- printf("%li: Error unfinished function block, expected DONE or RETURN\n", line);
- goto end;
+/*
+ * Parses a function: trivial case, handles occurances of duplicated
+ * names among other things. Ensures valid name as well, and even
+ * internal engine function selection.
+ */
+static GMQCC_INLINE bool asm_parse_func(const char *skip, size_t line, asm_state *state) {
+ if (*state == ASM_FUNCTION)
+ return false;
+
+ if (strstr(skip, "FUNCTION:") == &skip[0]) {
+ char *copy = util_strsws(skip+10);
+ char *name = util_strchp(copy, strchr(copy, '\0'));
+
+ /* TODO: failure system, missing name */
+ if (!name) {
+ printf("expected name on function\n");
+ mem_d(copy);
+ mem_d(name);
+ return false;
+ }
+ /* TODO: failure system, invalid name */
+ if (!isalpha(*name) || util_strupper(name)) {
+ printf("invalid identifer for function name\n");
+ mem_d(copy);
+ mem_d(name);
+ return false;
+ }
+
+ /*
+ * Function could be internal function, look for $
+ * to determine this.
+ */
+ if (strchr(name, ',')) {
+ prog_section_function function;
+ prog_section_def def;
+
+ char *find = strchr(name, ',') + 1;
+
+ /* skip whitespace */
+ while (*find == ' ' || *find == '\t')
+ find++;
+
+ if (*find != '$') {
+ printf("expected $ for internal function selection, got %s instead\n", find);
+ mem_d(copy);
+ mem_d(name);
+ return false;
+ }
+ find ++;
+ if (!isdigit(*find)) {
+ printf("invalid internal identifier, expected valid number\n");
+ mem_d(copy);
+ mem_d(name);
+ return false;
}
- state = ASM_FUNCTION;
- code_defs_add((prog_section_def){
- .type = TYPE_VOID,
- .offset = code_globals_elements,
- .name = code_chars_elements
- });
- code_globals_add(code_functions_elements);
- code_functions_add((prog_section_function) {
- .entry = code_statements_elements,
- .firstlocal = 0,
- .locals = 0,
- .profile = 0,
- .name = code_chars_elements,
- .file = 0,
- .nargs = 0,
- .argsize = {0}
- });
- code_strings_add(skip);
- };
-
- #if 0
- /* if we make it this far then we have statements */
- {
- size_t i = 0; /* counter */
- size_t o = 0; /* operands */
- size_t c = 0; /* copy */
- char *t = NULL; /* token */
-
+ *strchr(name, ',')='\0';
+
/*
- * Most ops a single statement can have is three.
- * lets allocate some space for all of those here.
+ * Now add the following items to the code system:
+ * function
+ * definition (optional)
+ * global (optional)
+ * name
*/
- char op[3][32768] = {{0},{0},{0}};
- for (; i < sizeof(asm_instr)/sizeof(*asm_instr); i++) {
- if (!strncmp(skip, asm_instr[i].m, asm_instr[i].l)) {
- if (state != ASM_FUNCTION) {
- printf("%li: Statement not inside function block\n", line);
- goto end;
- }
-
- /* update parser state */
- if (i == INSTR_DONE || i == INSTR_RETURN) {
- goto end;
- state = ASM_NULL;
- }
-
- /* parse the statement */
- c = i;
- o = asm_instr[i].o; /* operands */
- skip += asm_instr[i].l; /* skip instruction */
- t = strtok(skip, " ,");
- i = 0;
- while (t != NULL && i < 3) {
- strcpy(op[i], t);
- t = strtok(NULL, " ,");
- i ++;
+ function.entry = -atoi(find);
+ function.firstlocal = 0;
+ function.locals = 0;
+ function.profile = 0;
+ function.name = code_chars_elements;
+ function.file = 0;
+ function.nargs = 0;
+ def.type = TYPE_FUNCTION;
+ def.offset = code_globals_elements;
+ def.name = code_chars_elements;
+ memset(function.argsize, 0, sizeof(function.argsize));
+ code_functions_add(function);
+ code_defs_add (def);
+ code_chars_put (name, strlen(name));
+ code_chars_add ('\0');
+
+ util_debug("ASM", "added internal function %s to function table\n", name);
+
+ /*
+ * Sanatize the numerical constant used to select the
+ * internal function. Must ensure it's all numeric, since
+ * atoi can silently drop characters from a string and still
+ * produce a valid constant that would lead to runtime problems.
+ */
+ if (util_strdigit(find))
+ util_debug("ASM", "found internal function %s, -%d\n", name, atoi(find));
+ else
+ printf("invalid internal function identifier, must be all numeric\n");
+
+ } else {
+ /*
+ * The function isn't an internal one. Determine the name and
+ * amount of arguments the function accepts by searching for
+ * the `#` (pound sign).
+ */
+ int args = 0;
+ int size = 0;
+ char *find = strchr(name, '#');
+ char *peek = find;
+
+ /*
+ * Code structures for filling after determining the correct
+ * information to add to the code write system.
+ */
+ prog_section_function function;
+ prog_section_def def;
+ if (find) {
+ find ++;
+
+ /* skip whitespace */
+ if (*find == ' ' || *find == '\t')
+ find++;
+
+ /*
+ * If the input is larger than eight, it's considered
+ * invalid and shouldn't be allowed. The QuakeC VM only
+ * allows a maximum of eight arguments.
+ */
+ if (*find == '9') {
+ printf("invalid number of arguments, must be a valid number from 0-8\n");
+ mem_d(copy);
+ mem_d(name);
+ return false;
+ }
+
+ if (*find != '0') {
+ /*
+ * if we made it this far we have a valid number for the
+ * argument count, so fall through a switch statement and
+ * do it.
+ */
+ switch (*find) {
+ case '8': args++; case '7': args++;
+ case '6': args++; case '5': args++;
+ case '4': args++; case '3': args++;
+ case '2': args++; case '1': args++;
}
-
- /* check */
- if (i != o) {
- printf("not enough operands, expected: %li, got %li\n", o, i);
+ }
+ /*
+ * We need to parse the argument size now by determining
+ * the argument identifer list used after the amount of
+ * arguments.
+ */
+ memset(function.argsize, 0, sizeof(function.argsize));
+ find ++; /* skip the number */
+ while (*find == ' ' || *find == '\t') find++;
+ while (size < args) {
+ switch (*find) {
+ case 'V': case 'v': function.argsize[size]=3; break;
+ case 'S': case 's':
+ case 'F': case 'f':
+ case 'E': case 'e': function.argsize[size]=1; break;
+ case '\0':
+ printf("missing argument identifer, expected %d\n", args);
+ return false;
+ default:
+ printf("error invalid function argument identifier\n");
+ return false;
}
-
- /* TODO: hashtable value LOAD .... etc */
- code_statements_add((prog_section_statement){
- c,
- { atof(op[0]) },
- { atof(op[1]) },
- { atof(op[2]) }
- });
- goto end;
+ size++,find++;
+ }
+ while (*find == ' ' || *find == '\t') find++;
+ if (*find != '\0') {
+ printf("too many function argument identifers expected %d\n", args);
+ return false;
+ }
+ } else {
+ printf("missing number of argument count in function %s\n", name);
+ return false;
+ }
+
+ /*
+ * Now we need to strip the name apart into it's exact size
+ * by working in the peek buffer till we hit the name again.
+ */
+ if (*peek == '#') {
+ peek --; /* '#' */
+ peek --; /* number */
+ }
+ while (*peek == ' ' || *peek == '\t') peek--;
+
+ /*
+ * We're guranteed to be exactly where we need to be in the
+ * peek buffer to null terminate and get our name from name
+ * without any garbage before or after it.
+ */
+ *++peek='\0';
+
+ /*
+ * We got valid function structure information now. Lets add
+ * the function to the code writer function table.
+ */
+ function.entry = code_statements_elements-1;
+ function.firstlocal = 0;
+ function.locals = 0;
+ function.profile = 0;
+ function.name = code_chars_elements;
+ function.file = 0;
+ function.nargs = args;
+ def.type = TYPE_FUNCTION;
+ def.offset = code_globals_elements;
+ def.name = code_chars_elements;
+ code_functions_add(function);
+ code_globals_add(code_statements_elements);
+ code_chars_put (name, strlen(name));
+ code_chars_add ('\0');
+
+ /* update assembly state */
+
+ *state = ASM_FUNCTION;
+ util_debug("ASM", "added context function %s to function table\n", name);
+ }
+
+ mem_d(copy);
+ mem_d(name);
+ return true;
+ }
+ return false;
+}
+
+static GMQCC_INLINE bool asm_parse_stmt(const char *skip, size_t line, asm_state *state) {
+ /*
+ * This parses a valid statement in assembly and adds it to the code
+ * table to be wrote. This needs to handle correct checking of all
+ * statements to ensure the correct amount of operands are passed to
+ * the menomic. This must also check for valid function calls (ensure
+ * the names selected exist in the program scope) and ensure the correct
+ * CALL* is used (depending on the amount of arguments the function
+ * is expected to take)
+ */
+ char *c = (char*)skip;
+ prog_section_statement s;
+ size_t i = 0;
+
+ /*
+ * statements are only allowed when inside a function body
+ * otherwise the assembly is invalid.
+ */
+ if (*state != ASM_FUNCTION)
+ return false;
+
+ /*
+ * Skip any possible whitespace, it's not wanted we're searching
+ * for an instruction. TODO: recrusive decent parser skip on line
+ * entry instead of pre-op.
+ */
+ while (*skip == ' ' || *skip == '\t')
+ skip++;
+
+ for (; i < sizeof(asm_instr)/sizeof(*asm_instr); i++) {
+ /*
+ * Iterate all possible instructions and check if the selected
+ * instructure in the input stream `skip` is actually a valid
+ * instruction.
+ */
+ if (!strncmp(skip, asm_instr[i].m, asm_instr[i].l)) {
+ printf("found statement %s\n", asm_instr[i].m);
+ /*
+ * Parse the operands for `i` (the instruction). The order
+ * of asm_instr is in the order of the menomic encoding so
+ * `i` == menomic encoding.
+ */
+ s.opcode = i;
+ switch (asm_instr[i].o) {
+ /*
+ * Each instruction can have from 0-3 operands; and can
+ * be used with less or more operands depending on it's
+ * selected use.
+ *
+ * DONE for example can use either 0 operands, or 1 (to
+ * emulate the effect of RETURN)
+ *
+ * TODO: parse operands correctly figure out what it is
+ * that the assembly is trying to do, i.e string table
+ * lookup, function calls etc.
+ *
+ * This needs to have a fall state, we start from the
+ * end of the string and work backwards.
+ */
+ #define OPFILL(X) \
+ do { \
+ size_t w = 0; \
+ if (!(c = strrchr(c, ','))) { \
+ printf("error, expected more operands\n"); \
+ return false; \
+ } \
+ c++; \
+ w++; \
+ while (*c == ' ' || *c == '\t') { \
+ c++; \
+ w++; \
+ } \
+ X = (const char*)c; \
+ c -= w; \
+ *c = '\0'; \
+ c = (char*)skip; \
+ } while (0)
+
+ case 3: {
+ const char *data; OPFILL(data);
+ printf("OP3: %s\n", data);
+ s.o3.s1 = 0;
}
+ case 2: {
+ const char *data; OPFILL(data);
+ printf("OP2: %s\n", data);
+ s.o2.s1 = 0;
+ }
+ case 1: {
+ while (*c == ' ' || *c == '\t') c++;
+ c += asm_instr[i].l;
+ while (*c == ' ' || *c == '\t') c++;
+
+ printf("OP1: %s\n", c);
+ s.o1.s1 = 0;
+ }
+ #undef OPFILL
}
+ /* add the statement now */
+ code_statements_add(s);
}
- #endif
-
- /* if we made it this far something is wrong */
- if (*skip != '\0')
- printf("%li: Invalid statement %s, expression, or decleration\n", line, skip);
-
- end:
- mem_d(data);
- line ++;
}
- asm_clear();
+ return true;
+}
+
+void asm_parse(FILE *fp) {
+ char *data = NULL;
+ char *skip = NULL;
+ long line = 1; /* current line */
+ size_t size = 0; /* size of line */
+ asm_state state = ASM_NULL;
+
+ #define asm_end(x) \
+ do { \
+ mem_d(data); \
+ mem_d(copy); \
+ line++; \
+ util_debug("ASM", x); \
+ } while (0); continue
+
+ while ((data = asm_getline (&size, fp)) != NULL) {
+ char *copy = util_strsws(data); /* skip whitespace */
+ skip = util_strrnl(copy); /* delete newline */
+
+ /* TODO: statement END check */
+ if (state == ASM_FUNCTION)
+ state = ASM_NULL;
+
+ if (asm_parse_type(skip, line, &state)){ asm_end("asm_parse_type\n"); }
+ if (asm_parse_func(skip, line, &state)){ asm_end("asm_parse_func\n"); }
+ if (asm_parse_stmt(skip, line, &state)){ asm_end("asm_parse_stmt\n"); }
+ }
+ #undef asm_end
+ asm_dumps();
+ asm_clear();
}