X-Git-Url: https://git.xonotic.org/?p=xonotic%2Fgmqcc.git;a=blobdiff_plain;f=asm.c;h=c73f25cf7956b4121a6f5e2c6c6088ef8a2991db;hp=58a4ff9fb00478b80de8ede26ec7fd567d3f670b;hb=74cb075665a8e389cd4a5492bef158bb2f59de85;hpb=403901d6ee0cbcebb3723f2b8732a6421f85816a diff --git a/asm.c b/asm.c index 58a4ff9..c73f25c 100644 --- a/asm.c +++ b/asm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 + * Copyright (C) 2012 * Dale Weiler * * Permission is hereby granted, free of charge, to any person obtaining a copy of @@ -22,21 +22,31 @@ */ #include "gmqcc.h" /* - * Some assembler keywords not part of the opcodes above: these are - * for creating functions, or constants. + * Following parse states: + * ASM_FUNCTION -- in a function accepting input statements + * .... */ -const char *const asm_keys[] = { - "FLOAT" , /* define float */ - "VECTOR" , /* define vector */ - "ENTITY" , /* define ent */ - "FIELD" , /* define field */ - "STRING" , /* define string */ - "FUNCTION" -}; - -static char *const asm_getline(size_t *byte, FILE *fp) { +typedef enum { + ASM_NULL, + ASM_FUNCTION +} asm_state; + +typedef struct { + char *name; + char type; /* type, float, vector, string, function*/ + char elem; /* 0=x, 1=y, or 2=Z? */ + int offset; /* location in globals */ + bool isconst; +} asm_sym; +VECTOR_MAKE(asm_sym, asm_symbols); + +/* + * Assembly text processing: this handles the internal collection + * of text to allow parsing and assemblation. + */ +static char* asm_getline(size_t *byte, FILE *fp) { char *line = NULL; - ssize_t read = util_getline(&line, byte, fp); + size_t read = util_getline(&line, byte, fp); *byte = read; if (read == -1) { mem_d (line); @@ -45,37 +55,49 @@ static char *const asm_getline(size_t *byte, FILE *fp) { return line; } +/* + * Entire external interface for main.c - to perform actual assemblation + * of assembly files. + */ void asm_init(const char *file, FILE **fp) { *fp = fopen(file, "r"); code_init(); } - void asm_close(FILE *fp) { fclose(fp); - code_write(); + code_write("program.dat"); +} +void asm_clear() { + size_t i = 0; + for (; i < asm_symbols_elements; i++) + mem_d(asm_symbols_data[i].name); + mem_d(asm_symbols_data); } /* - * Following parse states: - * ASM_FUNCTION -- in a function accepting input statements - * .... + * Dumps all values of all constants and assembly related + * information obtained during the assembly procedure. */ -typedef enum { - ASM_NULL, - ASM_FUNCTION -} asm_state; - -typedef struct { - char *name; /* name of constant */ - int offset; /* location in globals */ -} globals; -VECTOR_MAKE(globals, assembly_constants); - -void asm_clear() { +void asm_dumps() { size_t i = 0; - for (; i < assembly_constants_elements; i++) - mem_d(assembly_constants_data[i].name); - mem_d(assembly_constants_data); + for (; i < asm_symbols_elements; i++) { + asm_sym *g = &asm_symbols_data[i]; + if (!g->isconst) continue; + switch (g->type) { + case TYPE_VECTOR: { + util_debug("ASM", "vector %s %c[%f]\n", g->name, + (g->elem == 0) ? 'X' :( + (g->elem == 1) ? 'Y' : + (g->elem == 2) ? 'Z' :' '), + INT2FLT(code_globals_data[g->offset]) + ); + break; + } + case TYPE_FUNCTION: { + util_debug("ASM", "function %s\n", g->name); + } + } + } } /* @@ -84,12 +106,86 @@ void asm_clear() { * globals with no assignments are globals. Function body types * are locals. */ -static inline bool asm_parse_type(const char *skip, size_t line, asm_state *state) { - if (strstr(skip, "FLOAT:") == &skip[0]) { return true; } - if (strstr(skip, "VECTOR:") == &skip[0]) { return true; } - if (strstr(skip, "ENTITY:") == &skip[0]) { return true; } - if (strstr(skip, "FIELD:") == &skip[0]) { return true; } - if (strstr(skip, "STRING:") == &skip[0]) { return true; } +static GMQCC_INLINE bool asm_parse_type(const char *skip, size_t line, asm_state *state) { + if ((strstr(skip, "FLOAT:") != &skip[0]) && + (strstr(skip, "VECTOR:") != &skip[0]) && + (strstr(skip, "ENTITY:") != &skip[0]) && + (strstr(skip, "FIELD:") != &skip[0]) && + (strstr(skip, "STRING:") != &skip[0])) return false; + + /* TODO: determine if constant, global, or local */ + switch (*skip) { + /* VECTOR */ case 'V': { + float val1; + float val2; + float val3; + asm_sym sym; + + char *find = (char*)skip + 7; + char *name = (char*)skip + 7; + while (*find == ' ' || *find == '\t') find++; + + /* constant? */ + if (strchr(find, ',')) { + /* strip name */ + *strchr((name = util_strdup(find)), ',')='\0'; + /* find data */ + find += strlen(name) + 1; + while (*find == ' ' || *find == '\t') find++; + /* valid name */ + if (util_strupper(name) || isdigit(*name)) { + printf("invalid name for vector variable\n"); + mem_d(name); + } + /* + * Parse all three elements of the vector. This will only + * pass the first try if we hit a constant, otherwise it's + * a global. + */ + #define PARSE_ELEMENT(X,Y,Z) \ + if (isdigit(*X) || *X == '-'||*X == '+') { \ + bool negated = (*X == '-'); \ + if (negated || *X == '+') { X++; } \ + Y = (negated)?-atof(X):atof(X); \ + X = strchr(X, ','); \ + Z \ + } + + PARSE_ELEMENT(find, val1, { find ++; while (*find == ' ') { find ++; } }); + PARSE_ELEMENT(find, val2, { find ++; while (*find == ' ') { find ++; } }); + PARSE_ELEMENT(find, val3, { find ++; /* no need to do anything here */ }); + #undef PARSE_ELEMENT + #define BUILD_ELEMENT(X,Y) \ + sym.type = TYPE_VECTOR; \ + sym.name = util_strdup(name); \ + sym.elem = (X); \ + sym.offset = code_globals_elements; \ + asm_symbols_add(sym); \ + code_globals_add(FLT2INT(Y)) + BUILD_ELEMENT(0, val1); + BUILD_ELEMENT(1, val2); + BUILD_ELEMENT(2, val3); + #undef BUILD_ELEMENT + mem_d(name); + } else { + /* TODO global not constant */ + } + break; + } + /* ENTITY */ case 'E': { + const char *find = skip + 7; + while (*find == ' ' || *find == '\t') find++; + printf("found ENTITY %s\n", find); + break; + } + /* STRING */ case 'S': { + const char *find = skip + 7; + while (*find == ' ' || *find == '\t') find++; + printf("found STRING %s\n", find); + break; + } + } + return false; } @@ -98,13 +194,24 @@ static inline bool asm_parse_type(const char *skip, size_t line, asm_state *stat * names among other things. Ensures valid name as well, and even * internal engine function selection. */ -static inline bool asm_parse_func(const char *skip, size_t line, asm_state *state) { - if (*state == ASM_FUNCTION && (strstr(skip, "FUNCTION:") == &skip[0])) +static GMQCC_INLINE bool asm_parse_func(const char *skip, size_t line, asm_state *state) { + if (*state == ASM_FUNCTION) return false; if (strstr(skip, "FUNCTION:") == &skip[0]) { - char *copy = util_strsws(skip+10); - char *name = util_strchp(copy, strchr(copy, '\0')); + asm_sym sym; + char *look = util_strdup(skip+10); + char *copy = look; + char *name = NULL; + while (*copy == ' ' || *copy == '\t') copy++; + + memset(&sym, 0, sizeof(asm_sym)); + + /* + * Chop the function name out of the string, this allocates + * a new string. + */ + name = util_strchp(copy, strchr(copy, '\0')); /* TODO: failure system, missing name */ if (!name) { @@ -114,14 +221,211 @@ static inline bool asm_parse_func(const char *skip, size_t line, asm_state *stat return false; } /* TODO: failure system, invalid name */ - if (!isalpha(*name) || isupper(*name)) { + if (!isalpha(*name) || util_strupper(name)) { printf("invalid identifer for function name\n"); mem_d(copy); mem_d(name); return false; } - printf("NAME: %s\n", name); + /* + * Function could be internal function, look for $ + * to determine this. + */ + if (strchr(name, ',')) { + char *find = strchr(name, ',') + 1; + prog_section_function function; + prog_section_def def; + memset(&function, 0, sizeof(prog_section_function)); + memset(&def, 0, sizeof(prog_section_def)); + + /* skip whitespace */ + while (*find == ' ' || *find == '\t') + find++; + + if (*find != '$') { + printf("expected $ for internal function selection, got %s instead\n", find); + mem_d(copy); + mem_d(name); + return false; + } + find ++; + if (!isdigit(*find)) { + printf("invalid internal identifier, expected valid number\n"); + mem_d(copy); + mem_d(name); + return false; + } + *strchr(name, ',')='\0'; + + /* + * Now add the following items to the code system: + * function + * definition (optional) + * global (optional) + * name + */ + function.entry = -atoi(find); + function.firstlocal = 0; + function.locals = 0; + function.profile = 0; + function.name = code_chars_elements; + function.file = 0; + function.nargs = 0; + def.type = TYPE_FUNCTION; + def.offset = code_globals_elements; + def.name = code_chars_elements; + code_functions_add(function); + code_defs_add (def); + code_chars_put (name, strlen(name)); + code_chars_add ('\0'); + sym.type = TYPE_FUNCTION; + sym.name = util_strdup(name); + sym.offset = function.entry; + asm_symbols_add(sym); + + util_debug("ASM", "added internal function %s to function table\n", name); + + /* + * Sanatize the numerical constant used to select the + * internal function. Must ensure it's all numeric, since + * atoi can silently drop characters from a string and still + * produce a valid constant that would lead to runtime problems. + */ + if (util_strdigit(find)) + util_debug("ASM", "found internal function %s, -%d\n", name, atoi(find)); + else + printf("invalid internal function identifier, must be all numeric\n"); + + } else { + /* + * The function isn't an internal one. Determine the name and + * amount of arguments the function accepts by searching for + * the `#` (pound sign). + */ + int args = 0; + int size = 0; + char *find = strchr(name, '#'); + char *peek = find; + + /* + * Code structures for filling after determining the correct + * information to add to the code write system. + */ + prog_section_function function; + prog_section_def def; + memset(&function, 0, sizeof(prog_section_function)); + memset(&def, 0, sizeof(prog_section_def)); + if (find) { + find ++; + + /* skip whitespace */ + if (*find == ' ' || *find == '\t') + find++; + + /* + * If the input is larger than eight, it's considered + * invalid and shouldn't be allowed. The QuakeC VM only + * allows a maximum of eight arguments. + */ + if (*find == '9') { + printf("invalid number of arguments, must be a valid number from 0-8\n"); + mem_d(copy); + mem_d(name); + return false; + } + + if (*find != '0') { + /* + * if we made it this far we have a valid number for the + * argument count, so fall through a switch statement and + * do it. + */ + switch (*find) { + case '8': args++; case '7': args++; + case '6': args++; case '5': args++; + case '4': args++; case '3': args++; + case '2': args++; case '1': args++; + } + } + /* + * We need to parse the argument size now by determining + * the argument identifer list used after the amount of + * arguments. + */ + memset(function.argsize, 0, sizeof(function.argsize)); + find ++; /* skip the number */ + while (*find == ' ' || *find == '\t') find++; + while (size < args) { + switch (*find) { + case 'V': case 'v': function.argsize[size]=3; break; + case 'S': case 's': + case 'F': case 'f': + case 'E': case 'e': function.argsize[size]=1; break; + case '\0': + printf("missing argument identifer, expected %d\n", args); + return false; + default: + printf("error invalid function argument identifier\n"); + return false; + } + size++,find++; + } + while (*find == ' ' || *find == '\t') find++; + if (*find != '\0') { + printf("too many function argument identifers expected %d\n", args); + return false; + } + } else { + printf("missing number of argument count in function %s\n", name); + return false; + } + + /* + * Now we need to strip the name apart into it's exact size + * by working in the peek buffer till we hit the name again. + */ + if (*peek == '#') { + peek --; /* '#' */ + peek --; /* number */ + } + while (*peek == ' ' || *peek == '\t') peek--; + + /* + * We're guranteed to be exactly where we need to be in the + * peek buffer to null terminate and get our name from name + * without any garbage before or after it. + */ + *++peek='\0'; + + /* + * We got valid function structure information now. Lets add + * the function to the code writer function table. + */ + function.entry = code_statements_elements; + function.firstlocal = 0; + function.locals = 0; + function.profile = 0; + function.name = code_chars_elements; + function.file = 0; + function.nargs = args; + def.type = TYPE_FUNCTION; + def.offset = code_globals_elements; + def.name = code_chars_elements; + code_functions_add(function); + code_globals_add (code_statements_elements); + code_chars_put (name, strlen(name)); + code_chars_add ('\0'); + sym.type = TYPE_FUNCTION; + sym.name = util_strdup(name); + sym.offset = function.entry; + asm_symbols_add(sym); + + /* update assembly state */ + + *state = ASM_FUNCTION; + util_debug("ASM", "added context function %s to function table\n", name); + } mem_d(copy); mem_d(name); @@ -130,9 +434,163 @@ static inline bool asm_parse_func(const char *skip, size_t line, asm_state *stat return false; } +static GMQCC_INLINE bool asm_parse_stmt(const char *skip, size_t line, asm_state *state) { + /* + * This parses a valid statement in assembly and adds it to the code + * table to be wrote. This needs to handle correct checking of all + * statements to ensure the correct amount of operands are passed to + * the menomic. This must also check for valid function calls (ensure + * the names selected exist in the program scope) and ensure the correct + * CALL* is used (depending on the amount of arguments the function + * is expected to take) + */ + enum { + EXPECT_FUNCTION = 1, + EXPECT_VARIABLE = 2, + EXPECT_VALUE = 3 + }; + + char *c = (char*)skip; + size_t i = 0; + char expect = 0; + prog_section_statement s; + memset(&s, 0, sizeof(prog_section_statement)); + + /* + * statements are only allowed when inside a function body + * otherwise the assembly is invalid. + */ + if (*state != ASM_FUNCTION) + return false; + + /* + * Skip any possible whitespace, it's not wanted we're searching + * for an instruction. TODO: recrusive decent parser skip on line + * entry instead of pre-op. + */ + while (*skip == ' ' || *skip == '\t') + skip++; + + for (; i < sizeof(asm_instr)/sizeof(*asm_instr); i++) { + /* + * Iterate all possible instructions and check if the selected + * instructure in the input stream `skip` is actually a valid + * instruction. + */ + if (!strncmp(skip, asm_instr[i].m, asm_instr[i].l)) { + + /* + * We hit the end of a function scope, retarget the state + * and add a DONE statement to the statment table. + */ + if (i == AINSTR_END) { + s.opcode = i; + code_statements_add(s); + *state = ASM_NULL; + return true; + } + + /* + * Check the instruction type to see what sort of data + * it's expected to have. + */ + if (i >= INSTR_CALL0 && i <= INSTR_CALL8) + expect = EXPECT_FUNCTION; + else + expect = EXPECT_VARIABLE; + + util_debug( + "ASM", + "found statement %s expecting: `%s` (%ld operand(s))\n", + asm_instr[i].m, + (expect == EXPECT_FUNCTION)?"function name":( + (expect == EXPECT_VARIABLE)?"variable name":( + (expect == EXPECT_VALUE ?"value" : "unknown"))), + asm_instr[i].o + ); + /* + * Parse the operands for `i` (the instruction). The order + * of asm_instr is in the order of the menomic encoding so + * `i` == menomic encoding. + */ + s.opcode = i; + switch (asm_instr[i].o) { + /* + * Each instruction can have from 0-3 operands; and can + * be used with less or more operands depending on it's + * selected use. + * + * DONE for example can use either 0 operands, or 1 (to + * emulate the effect of RETURN) + * + * TODO: parse operands correctly figure out what it is + * that the assembly is trying to do, i.e string table + * lookup, function calls etc. + * + * This needs to have a fall state, we start from the + * end of the string and work backwards. + */ + #define OPEATS(X,Y) X##Y + #define OPCCAT(X,Y) OPEATS(X,Y) + #define OPLOAD(X,Y) \ + do { \ + util_debug("ASM", "loading operand data ...\n"); \ + if (expect == EXPECT_VARIABLE) { \ + size_t f=0; \ + for (; f