From 3df51c597935aa4b60da7a27883779ff584209e3 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Sat, 18 Oct 2014 13:49:13 +0200 Subject: [PATCH] adding -fsplit-vector-parameters, todo: float-lookup should get optimized as commented --- ir.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- ir.h | 15 +++--- opts.def | 1 + 3 files changed, 162 insertions(+), 12 deletions(-) diff --git a/ir.c b/ir.c index ef782f9..f4f386d 100644 --- a/ir.c +++ b/ir.c @@ -1121,6 +1121,18 @@ ir_value* ir_value_var(const char *name, int storetype, int vtype) return self; } +/* helper function */ +static ir_value* ir_builder_imm_float(ir_builder *self, float value, bool add_to_list) { + ir_value *v = ir_value_var("#IMMEDIATE", store_global, TYPE_FLOAT); + v->hasvalue = true; + v->constval.vfloat = value; + + vec_push(self->globals, v); + if (add_to_list) + vec_push(self->const_floats, v); + return v; +} + ir_value* ir_value_vector_member(ir_value *self, unsigned int member) { char *name; @@ -1206,9 +1218,11 @@ void ir_value_delete(ir_value* self) if (self->vtype == TYPE_STRING) mem_d((void*)self->constval.vstring); } - for (i = 0; i < 3; ++i) { - if (self->members[i]) - ir_value_delete(self->members[i]); + if (!(self->flags & IR_FLAG_SPLIT_VECTOR)) { + for (i = 0; i < 3; ++i) { + if (self->members[i]) + ir_value_delete(self->members[i]); + } } vec_free(self->reads); vec_free(self->writes); @@ -3129,7 +3143,21 @@ static bool gen_blocks_recursive(code_t *code, ir_function *func, ir_block *bloc stmt.opcode = type_store_instr[param->vtype]; stmt.o1.u1 = ir_value_code_addr(param); stmt.o2.u1 = OFS_PARM0 + 3 * p; - code_push_statement(code, &stmt, instr->context); + + if (param->vtype == TYPE_VECTOR && (param->flags & IR_FLAG_SPLIT_VECTOR)) { + /* fetch 3 separate floats */ + stmt.opcode = INSTR_STORE_F; + stmt.o1.u1 = ir_value_code_addr(param->members[0]); + code_push_statement(code, &stmt, instr->context); + stmt.o2.u1++; + stmt.o1.u1 = ir_value_code_addr(param->members[1]); + code_push_statement(code, &stmt, instr->context); + stmt.o2.u1++; + stmt.o1.u1 = ir_value_code_addr(param->members[2]); + code_push_statement(code, &stmt, instr->context); + } + else + code_push_statement(code, &stmt, instr->context); } /* Now handle extparams */ first = vec_size(instr->params); @@ -3158,7 +3186,20 @@ static bool gen_blocks_recursive(code_t *code, ir_function *func, ir_block *bloc stmt.opcode = type_store_instr[param->vtype]; stmt.o1.u1 = ir_value_code_addr(param); stmt.o2.u1 = ir_value_code_addr(targetparam); - code_push_statement(code, &stmt, instr->context); + if (param->vtype == TYPE_VECTOR && (param->flags & IR_FLAG_SPLIT_VECTOR)) { + /* fetch 3 separate floats */ + stmt.opcode = INSTR_STORE_F; + stmt.o1.u1 = ir_value_code_addr(param->members[0]); + code_push_statement(code, &stmt, instr->context); + stmt.o2.u1++; + stmt.o1.u1 = ir_value_code_addr(param->members[1]); + code_push_statement(code, &stmt, instr->context); + stmt.o2.u1++; + stmt.o1.u1 = ir_value_code_addr(param->members[2]); + code_push_statement(code, &stmt, instr->context); + } + else + code_push_statement(code, &stmt, instr->context); } stmt.opcode = INSTR_CALL0 + vec_size(instr->params); @@ -3635,6 +3676,10 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool isloc prog_section_def_t def; bool pushdef = opts.optimizeoff; + /* we don't generate split-vectors */ + if (global->vtype == TYPE_VECTOR && (global->flags & IR_FLAG_SPLIT_VECTOR)) + return true; + def.type = global->vtype; def.offset = vec_size(self->code->globals); def.name = 0; @@ -3885,12 +3930,113 @@ static bool ir_builder_gen_field(ir_builder *self, ir_value *field) return field->code.globaladdr >= 0; } +static void ir_builder_collect_reusables(ir_builder *builder) { + size_t i; + ir_value **reusables = NULL; + for (i = 0; i < vec_size(builder->globals); ++i) { + ir_value *value = builder->globals[i]; + if (value->vtype != TYPE_FLOAT || !value->hasvalue) + continue; + if (value->cvq == CV_CONST || (value->name && value->name[0] == '#')) { + vec_push(reusables, value); + } + } + builder->const_floats = reusables; +} + +static void ir_builder_split_vector(ir_builder *self, ir_value *vec) { + size_t i, count; + ir_value* found[3] = { NULL, NULL, NULL }; + + /* must not be written to */ + if (vec_size(vec->writes)) + return; + /* must not be trying to access individual members */ + if (vec->members[0] || vec->members[1] || vec->members[2]) + return; + /* should be actually used otherwise it won't be generated anyway */ + count = vec_size(vec->reads); + if (!count) + return; + + /* may only be used directly as function parameters, so if we find some other instruction cancel */ + for (i = 0; i != count; ++i) { + /* we only split vectors if they're used directly as parameter to a call only! */ + ir_instr *user = vec->reads[i]; + if ((user->opcode < INSTR_CALL0 || user->opcode > INSTR_CALL8) && user->opcode != VINSTR_NRCALL) + return; + } + + vec->flags |= IR_FLAG_SPLIT_VECTOR; + + /* find existing floats making up the split */ + count = vec_size(self->const_floats); + for (i = 0; i != count; ++i) { + ir_value *c = self->const_floats[i]; + if (!found[0] && c->constval.vfloat == vec->constval.vvec.x) + found[0] = c; + if (!found[1] && c->constval.vfloat == vec->constval.vvec.y) + found[1] = c; + if (!found[2] && c->constval.vfloat == vec->constval.vvec.z) + found[2] = c; + if (found[0] && found[1] && found[2]) + break; + } + + /* generate floats for not yet found components */ + if (!found[0]) + found[0] = ir_builder_imm_float(self, vec->constval.vvec.x, true); + if (!found[1]) { + if (vec->constval.vvec.y == vec->constval.vvec.x) + found[1] = found[0]; + else + found[1] = ir_builder_imm_float(self, vec->constval.vvec.y, true); + } + if (!found[2]) { + if (vec->constval.vvec.z == vec->constval.vvec.x) + found[2] = found[0]; + else if (vec->constval.vvec.z == vec->constval.vvec.y) + found[2] = found[1]; + else + found[2] = ir_builder_imm_float(self, vec->constval.vvec.z, true); + } + + /* the .members array should be safe to use here. */ + vec->members[0] = found[0]; + vec->members[1] = found[1]; + vec->members[2] = found[2]; + + /* register the readers for these floats */ + count = vec_size(vec->reads); + for (i = 0; i != count; ++i) { + vec_push(found[0]->reads, vec->reads[i]); + vec_push(found[1]->reads, vec->reads[i]); + vec_push(found[2]->reads, vec->reads[i]); + } +} + +static void ir_builder_split_vectors(ir_builder *self) { + size_t i, count = vec_size(self->globals); + for (i = 0; i != count; ++i) { + ir_value *v = self->globals[i]; + if (v->vtype != TYPE_VECTOR || !v->name || v->name[0] != '#') + continue; + ir_builder_split_vector(self, self->globals[i]); + } +} + bool ir_builder_generate(ir_builder *self, const char *filename) { prog_section_statement_t stmt; size_t i; char *lnofile = NULL; + if (OPTS_FLAG(SPLIT_VECTOR_PARAMETERS)) { + ir_builder_collect_reusables(self); + if (vec_size(self->const_floats) > 0) + ir_builder_split_vectors(self); + } + for (i = 0; i < vec_size(self->fields); ++i) { ir_builder_prepare_field(self->code, self->fields[i]); @@ -3956,7 +4102,7 @@ bool ir_builder_generate(ir_builder *self, const char *filename) } if (vec_size(self->code->globals) >= 65536) { - irerror(vec_last(self->globals)->context, "This progs file would require more globals than the metadata can handle. Bailing out."); + irerror(vec_last(self->globals)->context, "This progs file would require more globals than the metadata can handle (%u). Bailing out.", (unsigned int)vec_size(self->code->globals)); return false; } diff --git a/ir.h b/ir.h index d0fd787..731fbbc 100644 --- a/ir.h +++ b/ir.h @@ -43,12 +43,14 @@ typedef struct { } ir_life_entry_t; enum { - IR_FLAG_HAS_ARRAYS = 1 << 0, - IR_FLAG_HAS_UNINITIALIZED = 1 << 1, - IR_FLAG_HAS_GOTO = 1 << 2, - IR_FLAG_INCLUDE_DEF = 1 << 3, - IR_FLAG_ERASEABLE = 1 << 4, - IR_FLAG_BLOCK_COVERAGE = 1 << 5, + IR_FLAG_HAS_ARRAYS = 1 << 0, + IR_FLAG_HAS_UNINITIALIZED = 1 << 1, + IR_FLAG_HAS_GOTO = 1 << 2, + IR_FLAG_INCLUDE_DEF = 1 << 3, + IR_FLAG_ERASEABLE = 1 << 4, + IR_FLAG_BLOCK_COVERAGE = 1 << 5, + + IR_FLAG_SPLIT_VECTOR = 1 << 6, IR_FLAG_LAST, IR_FLAG_MASK_NO_OVERLAP = (IR_FLAG_HAS_ARRAYS | IR_FLAG_HAS_UNINITIALIZED), @@ -254,6 +256,7 @@ struct ir_builder_s { ir_function **functions; ir_value **globals; ir_value **fields; + ir_value **const_floats; /* for reusing them in vector-splits, TODO: sort this or use a radix-tree */ ht htfunctions; ht htglobals; diff --git a/opts.def b/opts.def index 5a4746c..92cff14 100644 --- a/opts.def +++ b/opts.def @@ -59,6 +59,7 @@ GMQCC_DEFINE_FLAG(SORT_OPERANDS) GMQCC_DEFINE_FLAG(EMULATE_STATE) GMQCC_DEFINE_FLAG(ARITHMETIC_EXCEPTIONS) + GMQCC_DEFINE_FLAG(SPLIT_VECTOR_PARAMETERS) #endif /* warning flags */ -- 2.39.2