]> git.xonotic.org Git - xonotic/gmqcc.git/blobdiff - ir.c
-Olocaltemps -> -Olocal-temps for consistency; added manpage entry; removed leftover...
[xonotic/gmqcc.git] / ir.c
diff --git a/ir.c b/ir.c
index 48cc067d29b9b89d60c08681fa42cc800bb8e177..ce98e6c252c49bf340f261862c5e597f0f2c313c 100644 (file)
--- a/ir.c
+++ b/ir.c
@@ -45,7 +45,7 @@ const char *type_name[TYPE_COUNT] = {
     "array"
 };
 
-size_t type_sizeof[TYPE_COUNT] = {
+size_t type_sizeof_[TYPE_COUNT] = {
     1, /* TYPE_VOID     */
     1, /* TYPE_STRING   */
     1, /* TYPE_FLOAT    */
@@ -285,6 +285,8 @@ ir_builder* ir_builder_new(const char *modulename)
     self->htfields    = util_htnew(IR_HT_SIZE);
     self->htfunctions = util_htnew(IR_HT_SIZE);
 
+    self->max_locals  = 0;
+
     self->str_immediate = 0;
     self->name = NULL;
     if (!ir_builder_set_name(self, modulename)) {
@@ -433,6 +435,8 @@ ir_function* ir_function_new(ir_builder* owner, int outtype)
         mem_d(self);
         return NULL;
     }
+    self->flags = 0;
+
     self->owner = owner;
     self->context.file = "<@no context>";
     self->context.line = 0;
@@ -527,7 +531,8 @@ static bool instr_is_operation(uint16_t op)
              (op >= INSTR_LOAD_F && op <= INSTR_LOAD_FNC) ||
              (op == INSTR_ADDRESS) ||
              (op >= INSTR_NOT_F  && op <= INSTR_NOT_FNC) ||
-             (op >= INSTR_AND    && op <= INSTR_BITOR) );
+             (op >= INSTR_AND    && op <= INSTR_BITOR) ||
+             (op >= INSTR_CALL0  && op <= INSTR_CALL8) );
 }
 
 bool ir_function_pass_peephole(ir_function *self)
@@ -604,7 +609,8 @@ bool ir_function_pass_peephole(ir_function *self)
                     if (inot->_ops[0] != value ||
                         inot->opcode < INSTR_NOT_F ||
                         inot->opcode > INSTR_NOT_FNC ||
-                        inot->opcode == INSTR_NOT_V) /* can't do this one */
+                        inot->opcode == INSTR_NOT_V || /* can't do these */
+                        inot->opcode == INSTR_NOT_S)
                     {
                         break;
                     }
@@ -638,7 +644,7 @@ bool ir_function_pass_peephole(ir_function *self)
     return true;
 }
 
-bool ir_function_pass_tailcall(ir_function *self)
+bool ir_function_pass_tailrecursion(ir_function *self)
 {
     size_t b, p;
 
@@ -738,8 +744,8 @@ bool ir_function_finalize(ir_function *self)
     }
 
     if (OPTS_OPTIMIZATION(OPTIM_TAIL_RECURSION)) {
-        if (!ir_function_pass_tailcall(self)) {
-            irerror(self->context, "tailcall optimization pass broke something in `%s`", self->name);
+        if (!ir_function_pass_tailrecursion(self)) {
+            irerror(self->context, "tail-recursion optimization pass broke something in `%s`", self->name);
             return false;
         }
     }
@@ -988,6 +994,8 @@ ir_value* ir_value_var(const char *name, int storetype, int vtype)
 
 ir_value* ir_value_vector_member(ir_value *self, unsigned int member)
 {
+    char     *name;
+    size_t    len;
     ir_value *m;
     if (member >= 3)
         return NULL;
@@ -995,9 +1003,22 @@ ir_value* ir_value_vector_member(ir_value *self, unsigned int member)
     if (self->members[member])
         return self->members[member];
 
+    if (self->name) {
+        len = strlen(self->name);
+        name = (char*)mem_a(len + 3);
+        memcpy(name, self->name, len);
+        name[len+0] = '_';
+        name[len+1] = 'x' + member;
+        name[len+2] = '\0';
+    }
+    else
+        name = NULL;
+
     if (self->vtype == TYPE_VECTOR)
     {
-        m = ir_value_var(self->name, self->store, TYPE_FLOAT);
+        m = ir_value_var(name, self->store, TYPE_FLOAT);
+        if (name)
+            mem_d(name);
         if (!m)
             return NULL;
         m->context = self->context;
@@ -1009,7 +1030,9 @@ ir_value* ir_value_vector_member(ir_value *self, unsigned int member)
     {
         if (self->fieldtype != TYPE_VECTOR)
             return NULL;
-        m = ir_value_var(self->name, self->store, TYPE_FIELD);
+        m = ir_value_var(name, self->store, TYPE_FIELD);
+        if (name)
+            mem_d(name);
         if (!m)
             return NULL;
         m->fieldtype = TYPE_FLOAT;
@@ -1028,6 +1051,13 @@ ir_value* ir_value_vector_member(ir_value *self, unsigned int member)
     return m;
 }
 
+static GMQCC_INLINE size_t ir_value_sizeof(const ir_value *self)
+{
+    if (self->vtype == TYPE_FIELD && self->fieldtype == TYPE_VECTOR)
+        return type_sizeof_[TYPE_VECTOR];
+    return type_sizeof_[self->vtype];
+}
+
 ir_value* ir_value_out(ir_function *owner, const char *name, int storetype, int vtype)
 {
     ir_value *v = ir_value_var(name, storetype, vtype);
@@ -1105,7 +1135,7 @@ static char *ir_strdup(const char *str)
 {
     if (str && !*str) {
         /* actually dup empty strings */
-        char *out = mem_a(1);
+        char *out = (char*)mem_a(1);
         *out = 0;
         return out;
     }
@@ -1350,13 +1380,20 @@ bool ir_values_overlap(const ir_value *a, const ir_value *b)
  *IR main operations
  */
 
+static bool ir_check_unreachable(ir_block *self)
+{
+    /* The IR should never have to deal with unreachable code */
+    if (!self->final/* || OPTS_FLAG(ALLOW_UNREACHABLE_CODE)*/)
+        return true;
+    irerror(self->context, "unreachable statement (%s)", self->label);
+    return false;
+}
+
 bool ir_block_create_store_op(ir_block *self, lex_ctx ctx, int op, ir_value *target, ir_value *what)
 {
     ir_instr *in;
-    if (self->final) {
-        irerror(self->context, "unreachable statement (%s)", self->label);
+    if (!ir_check_unreachable(self))
         return false;
-    }
 
     if (target->store == store_value &&
         (op < INSTR_STOREP_F || op > INSTR_STOREP_FNC))
@@ -1371,7 +1408,7 @@ bool ir_block_create_store_op(ir_block *self, lex_ctx ctx, int op, ir_value *tar
     if (!in)
         return false;
 
-    if (!ir_instr_op(in, 0, target, true) ||
+    if (!ir_instr_op(in, 0, target, (op < INSTR_STOREP_F || op > INSTR_STOREP_FNC)) ||
         !ir_instr_op(in, 1, what, false))
     {
         ir_instr_delete(in);
@@ -1431,10 +1468,8 @@ bool ir_block_create_storep(ir_block *self, lex_ctx ctx, ir_value *target, ir_va
 bool ir_block_create_return(ir_block *self, lex_ctx ctx, ir_value *v)
 {
     ir_instr *in;
-    if (self->final) {
-        irerror(self->context, "unreachable statement (%s)", self->label);
+    if (!ir_check_unreachable(self))
         return false;
-    }
     self->final = true;
     self->is_return = true;
     in = ir_instr_new(ctx, self, INSTR_RETURN);
@@ -1454,10 +1489,8 @@ bool ir_block_create_if(ir_block *self, lex_ctx ctx, ir_value *v,
                         ir_block *ontrue, ir_block *onfalse)
 {
     ir_instr *in;
-    if (self->final) {
-        irerror(self->context, "unreachable statement (%s)", self->label);
+    if (!ir_check_unreachable(self))
         return false;
-    }
     self->final = true;
     /*in = ir_instr_new(ctx, self, (v->vtype == TYPE_STRING ? INSTR_IF_S : INSTR_IF_F));*/
     in = ir_instr_new(ctx, self, VINSTR_COND);
@@ -1484,10 +1517,8 @@ bool ir_block_create_if(ir_block *self, lex_ctx ctx, ir_value *v,
 bool ir_block_create_jump(ir_block *self, lex_ctx ctx, ir_block *to)
 {
     ir_instr *in;
-    if (self->final) {
-        irerror(self->context, "unreachable statement (%s)", self->label);
+    if (!ir_check_unreachable(self))
         return false;
-    }
     self->final = true;
     in = ir_instr_new(ctx, self, VINSTR_JUMP);
     if (!in)
@@ -1503,32 +1534,16 @@ bool ir_block_create_jump(ir_block *self, lex_ctx ctx, ir_block *to)
 
 bool ir_block_create_goto(ir_block *self, lex_ctx ctx, ir_block *to)
 {
-    ir_instr *in;
-    if (self->final) {
-        irerror(self->context, "unreachable statement (%s)", self->label);
-        return false;
-    }
-    self->final = true;
-    in = ir_instr_new(ctx, self, INSTR_GOTO);
-    if (!in)
-        return false;
-
-    in->bops[0] = to;
-    vec_push(self->instr, in);
-
-    vec_push(self->exits, to);
-    vec_push(to->entries, self);
-    return true;
+    self->owner->flags |= IR_FLAG_HAS_GOTO;
+    return ir_block_create_jump(self, ctx, to);
 }
 
 ir_instr* ir_block_create_phi(ir_block *self, lex_ctx ctx, const char *label, int ot)
 {
     ir_value *out;
     ir_instr *in;
-    if (self->final) {
-        irerror(self->context, "unreachable statement (%s)", self->label);
-        return false;
-    }
+    if (!ir_check_unreachable(self))
+        return NULL;
     in = ir_instr_new(ctx, self, VINSTR_PHI);
     if (!in)
         return NULL;
@@ -1574,10 +1589,8 @@ ir_instr* ir_block_create_call(ir_block *self, lex_ctx ctx, const char *label, i
 {
     ir_value *out;
     ir_instr *in;
-    if (self->final) {
-        irerror(self->context, "unreachable statement (%s)", self->label);
-        return false;
-    }
+    if (!ir_check_unreachable(self))
+        return NULL;
     in = ir_instr_new(ctx, self, (noreturn ? VINSTR_NRCALL : INSTR_CALL0));
     if (!in)
         return NULL;
@@ -1598,6 +1611,15 @@ ir_instr* ir_block_create_call(ir_block *self, lex_ctx ctx, const char *label, i
         return NULL;
     }
     vec_push(self->instr, in);
+    /*
+    if (noreturn) {
+        if (!ir_block_create_return(self, ctx, NULL)) {
+            compile_error(ctx, "internal error: failed to generate dummy-return instruction");
+            ir_instr_delete(in);
+            return NULL;
+        }
+    }
+    */
     return in;
 }
 
@@ -1817,175 +1839,6 @@ ir_value* ir_block_create_load_from_ent(ir_block *self, lex_ctx ctx, const char
     return ir_block_create_general_instr(self, ctx, label, op, ent, field, outype);
 }
 
-ir_value* ir_block_create_add(ir_block *self, lex_ctx ctx,
-                              const char *label,
-                              ir_value *left, ir_value *right)
-{
-    int op = 0;
-    int l = left->vtype;
-    int r = right->vtype;
-    if (l == r) {
-        switch (l) {
-            default:
-                irerror(self->context, "invalid type for ir_block_create_add: %s", type_name[l]);
-                return NULL;
-            case TYPE_FLOAT:
-                op = INSTR_ADD_F;
-                break;
-#if 0
-            case TYPE_INTEGER:
-                op = INSTR_ADD_I;
-                break;
-#endif
-            case TYPE_VECTOR:
-                op = INSTR_ADD_V;
-                break;
-        }
-    } else {
-#if 0
-        if ( (l == TYPE_FLOAT && r == TYPE_INTEGER) )
-            op = INSTR_ADD_FI;
-        else if ( (l == TYPE_INTEGER && r == TYPE_FLOAT) )
-            op = INSTR_ADD_IF;
-        else
-#endif
-        {
-            irerror(self->context, "invalid type for ir_block_create_add: %s", type_name[l]);
-            return NULL;
-        }
-    }
-    return ir_block_create_binop(self, ctx, label, op, left, right);
-}
-
-ir_value* ir_block_create_sub(ir_block *self, lex_ctx ctx,
-                              const char *label,
-                              ir_value *left, ir_value *right)
-{
-    int op = 0;
-    int l = left->vtype;
-    int r = right->vtype;
-    if (l == r) {
-
-        switch (l) {
-            default:
-                irerror(self->context, "invalid type for ir_block_create_sub: %s", type_name[l]);
-                return NULL;
-            case TYPE_FLOAT:
-                op = INSTR_SUB_F;
-                break;
-#if 0
-            case TYPE_INTEGER:
-                op = INSTR_SUB_I;
-                break;
-#endif
-            case TYPE_VECTOR:
-                op = INSTR_SUB_V;
-                break;
-        }
-    } else {
-#if 0
-        if ( (l == TYPE_FLOAT && r == TYPE_INTEGER) )
-            op = INSTR_SUB_FI;
-        else if ( (l == TYPE_INTEGER && r == TYPE_FLOAT) )
-            op = INSTR_SUB_IF;
-        else
-#endif
-        {
-            irerror(self->context, "invalid type for ir_block_create_sub: %s", type_name[l]);
-            return NULL;
-        }
-    }
-    return ir_block_create_binop(self, ctx, label, op, left, right);
-}
-
-ir_value* ir_block_create_mul(ir_block *self, lex_ctx ctx,
-                              const char *label,
-                              ir_value *left, ir_value *right)
-{
-    int op = 0;
-    int l = left->vtype;
-    int r = right->vtype;
-    if (l == r) {
-
-        switch (l) {
-            default:
-                irerror(self->context, "invalid type for ir_block_create_mul: %s", type_name[l]);
-                return NULL;
-            case TYPE_FLOAT:
-                op = INSTR_MUL_F;
-                break;
-#if 0
-            case TYPE_INTEGER:
-                op = INSTR_MUL_I;
-                break;
-#endif
-            case TYPE_VECTOR:
-                op = INSTR_MUL_V;
-                break;
-        }
-    } else {
-        if ( (l == TYPE_VECTOR && r == TYPE_FLOAT) )
-            op = INSTR_MUL_VF;
-        else if ( (l == TYPE_FLOAT && r == TYPE_VECTOR) )
-            op = INSTR_MUL_FV;
-#if 0
-        else if ( (l == TYPE_VECTOR && r == TYPE_INTEGER) )
-            op = INSTR_MUL_VI;
-        else if ( (l == TYPE_INTEGER && r == TYPE_VECTOR) )
-            op = INSTR_MUL_IV;
-        else if ( (l == TYPE_FLOAT && r == TYPE_INTEGER) )
-            op = INSTR_MUL_FI;
-        else if ( (l == TYPE_INTEGER && r == TYPE_FLOAT) )
-            op = INSTR_MUL_IF;
-#endif
-        else {
-            irerror(self->context, "invalid type for ir_block_create_mul: %s", type_name[l]);
-            return NULL;
-        }
-    }
-    return ir_block_create_binop(self, ctx, label, op, left, right);
-}
-
-ir_value* ir_block_create_div(ir_block *self, lex_ctx ctx,
-                              const char *label,
-                              ir_value *left, ir_value *right)
-{
-    int op = 0;
-    int l = left->vtype;
-    int r = right->vtype;
-    if (l == r) {
-
-        switch (l) {
-            default:
-                irerror(self->context, "invalid type for ir_block_create_div: %s", type_name[l]);
-                return NULL;
-            case TYPE_FLOAT:
-                op = INSTR_DIV_F;
-                break;
-#if 0
-            case TYPE_INTEGER:
-                op = INSTR_DIV_I;
-                break;
-#endif
-        }
-    } else {
-#if 0
-        if ( (l == TYPE_VECTOR && r == TYPE_FLOAT) )
-            op = INSTR_DIV_VF;
-        else if ( (l == TYPE_FLOAT && r == TYPE_INTEGER) )
-            op = INSTR_DIV_FI;
-        else if ( (l == TYPE_INTEGER && r == TYPE_FLOAT) )
-            op = INSTR_DIV_IF;
-        else
-#endif
-        {
-            irerror(self->context, "invalid type for ir_block_create_div: %s", type_name[l]);
-            return NULL;
-        }
-    }
-    return ir_block_create_binop(self, ctx, label, op, left, right);
-}
-
 /* PHI resolving breaks the SSA, and must thus be the last
  * step before life-range calculation.
  */
@@ -2153,7 +2006,7 @@ static void ir_block_enumerate(ir_block *self, size_t *_eid)
 void ir_function_enumerate(ir_function *self)
 {
     size_t i;
-    size_t instruction_id = 0;
+    size_t instruction_id = 1;
     for (i = 0; i < vec_size(self->blocks); ++i)
     {
         self->blocks[i]->eid = i;
@@ -2168,6 +2021,10 @@ bool ir_function_calculate_liferanges(ir_function *self)
     size_t i;
     bool changed;
 
+    /* parameters live at 0 */
+    for (i = 0; i < vec_size(self->params); ++i)
+        ir_value_life_merge(self->locals[i], 0);
+
     do {
         self->run_id++;
         changed = false;
@@ -2185,8 +2042,25 @@ bool ir_function_calculate_liferanges(ir_function *self)
         ir_block *block = self->blocks[0];
         for (i = 0; i < vec_size(block->living); ++i) {
             ir_value *v = block->living[i];
-            if (v->memberof || v->store != store_local)
+            if (v->store != store_local)
                 continue;
+            if ((v->members[0] && v->members[1] && v->members[2])) {
+                /* all vector members have been accessed - only treat this as uninitialized
+                 * if any of them is also uninitialized.
+                 */
+                if (!vec_ir_value_find(block->living, v->members[0], NULL) &&
+                    !vec_ir_value_find(block->living, v->members[1], NULL) &&
+                    !vec_ir_value_find(block->living, v->members[2], NULL))
+                {
+                    continue;
+                }
+            }
+            if (v->memberof) {
+                /* A member is only uninitialized if the whole vector is also uninitialized */
+                if (!vec_ir_value_find(block->living, v->memberof, NULL))
+                    continue;
+            }
+            self->flags |= IR_FLAG_HAS_UNINITIALIZED;
             if (irwarning(v->context, WARN_USED_UNINITIALIZED,
                           "variable `%s` may be used uninitialized in this function", v->name))
             {
@@ -2212,7 +2086,7 @@ typedef struct {
 static bool function_allocator_alloc(function_allocator *alloc, const ir_value *var)
 {
     ir_value *slot;
-    size_t vsize = type_sizeof[var->vtype];
+    size_t vsize = ir_value_sizeof(var);
 
     slot = ir_value_var("reg", store_global, var->vtype);
     if (!slot)
@@ -2253,7 +2127,7 @@ bool ir_function_allocate_locals(ir_function *self)
 
     for (i = 0; i < vec_size(self->locals); ++i)
     {
-        if (!OPTS_OPTIMIZATION(OPTIM_LOCALTEMPS))
+        if (!OPTS_OPTIMIZATION(OPTIM_LOCAL_TEMPS))
             self->locals[i]->unique_life = true;
         if (!function_allocator_alloc(&alloc, self->locals[i]))
             goto error;
@@ -2279,7 +2153,7 @@ bool ir_function_allocate_locals(ir_function *self)
              * will be required later when overlapping temps + locals
              */
             if (a < vec_size(self->params) &&
-                alloc.sizes[a] < type_sizeof[v->vtype])
+                alloc.sizes[a] < ir_value_sizeof(v))
             {
                 continue;
             }
@@ -2291,8 +2165,8 @@ bool ir_function_allocate_locals(ir_function *self)
                 goto error;
 
             /* adjust size for this slot */
-            if (alloc.sizes[a] < type_sizeof[v->vtype])
-                alloc.sizes[a] = type_sizeof[v->vtype];
+            if (alloc.sizes[a] < ir_value_sizeof(v))
+                alloc.sizes[a] = ir_value_sizeof(v);
 
             self->values[i]->code.local = a;
             break;
@@ -2339,6 +2213,7 @@ error:
 cleanup:
     for (i = 0; i < vec_size(alloc.locals); ++i)
         ir_value_delete(alloc.locals[i]);
+    vec_free(alloc.unique);
     vec_free(alloc.locals);
     vec_free(alloc.sizes);
     vec_free(alloc.positions);
@@ -2458,26 +2333,6 @@ static bool ir_block_life_propagate(ir_block *self, ir_block *prev, bool *change
     { --i;
         instr = self->instr[i];
 
-        /* PHI operands are always read operands */
-        for (p = 0; p < vec_size(instr->phi); ++p)
-        {
-            value = instr->phi[p].value;
-            if (value->memberof)
-                value = value->memberof;
-            if (!vec_ir_value_find(self->living, value, NULL))
-                vec_push(self->living, value);
-        }
-
-        /* call params are read operands too */
-        for (p = 0; p < vec_size(instr->params); ++p)
-        {
-            value = instr->params[p];
-            if (value->memberof)
-                value = value->memberof;
-            if (!vec_ir_value_find(self->living, value, NULL))
-                vec_push(self->living, value);
-        }
-
         /* See which operands are read and write operands */
         ir_op_read_write(instr->opcode, &read, &write);
 
@@ -2494,15 +2349,15 @@ static bool ir_block_life_propagate(ir_block *self, ir_block *prev, bool *change
             *changed = *changed || tempbool;
         }
 
-        /* Go through the 3 main operands */
+        /* Go through the 3 main operands
+         * writes first, then reads
+         */
         for (o = 0; o < 3; ++o)
         {
             if (!instr->_ops[o]) /* no such operand */
                 continue;
 
             value = instr->_ops[o];
-            if (value->memberof)
-                value = value->memberof;
 
             /* We only care about locals */
             /* we also calculate parameter liferanges so that locals
@@ -2512,13 +2367,6 @@ static bool ir_block_life_propagate(ir_block *self, ir_block *prev, bool *change
                 value->store != store_param)
                 continue;
 
-            /* read operands */
-            if (read & (1<<o))
-            {
-                if (!vec_ir_value_find(self->living, value, NULL))
-                    vec_push(self->living, value);
-            }
-
             /* write operands */
             /* When we write to a local, we consider it "dead" for the
              * remaining upper part of the function, since in SSA a value
@@ -2551,16 +2399,51 @@ static bool ir_block_life_propagate(ir_block *self, ir_block *prev, bool *change
                      * (A) doesn't.
                      */
                     tempbool = ir_value_life_merge(value, instr->eid);
-                    /*
-                    if (tempbool)
-                        con_err( "value added id %s %i\n", value->name, (int)instr->eid);
-                    */
                     *changed = *changed || tempbool;
                     /* Then remove */
                     vec_remove(self->living, idx, 1);
                 }
             }
         }
+
+        for (o = 0; o < 3; ++o)
+        {
+            if (!instr->_ops[o]) /* no such operand */
+                continue;
+
+            value = instr->_ops[o];
+
+            /* We only care about locals */
+            /* we also calculate parameter liferanges so that locals
+             * can take up parameter slots */
+            if (value->store != store_value &&
+                value->store != store_local &&
+                value->store != store_param)
+                continue;
+
+            /* read operands */
+            if (read & (1<<o))
+            {
+                if (!vec_ir_value_find(self->living, value, NULL))
+                    vec_push(self->living, value);
+            }
+        }
+        /* PHI operands are always read operands */
+        for (p = 0; p < vec_size(instr->phi); ++p)
+        {
+            value = instr->phi[p].value;
+            if (!vec_ir_value_find(self->living, value, NULL))
+                vec_push(self->living, value);
+        }
+
+        /* call params are read operands too */
+        for (p = 0; p < vec_size(instr->params); ++p)
+        {
+            value = instr->params[p];
+            if (!vec_ir_value_find(self->living, value, NULL))
+                vec_push(self->living, value);
+        }
+
         /* (A) */
         tempbool = ir_block_living_add_instr(self, instr->eid);
         /*con_err( "living added values\n");*/
@@ -2597,7 +2480,7 @@ static bool ir_block_life_propagate(ir_block *self, ir_block *prev, bool *change
  *
  * Breaking conventions is annoying...
  */
-static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool islocal);
+static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool islocal, bool defs_only);
 
 static bool gen_global_field(ir_value *global)
 {
@@ -2876,11 +2759,12 @@ tailcall:
             code_push_statement(&stmt, instr->context.line);
 
             retvalue = instr->_ops[0];
-            if (retvalue && retvalue->store != store_return && vec_size(retvalue->life))
+            if (retvalue && retvalue->store != store_return &&
+                (retvalue->store == store_global || vec_size(retvalue->life)))
             {
                 /* not to be kept in OFS_RETURN */
-                if (retvalue->vtype == TYPE_FIELD)
-                    stmt.opcode = field_store_instr[retvalue->vtype];
+                if (retvalue->vtype == TYPE_FIELD && OPTS_FLAG(ADJUST_VECTOR_FIELDS))
+                    stmt.opcode = field_store_instr[retvalue->fieldtype];
                 else
                     stmt.opcode = type_store_instr[retvalue->vtype];
                 stmt.o1.u1 = OFS_RETURN;
@@ -2997,9 +2881,6 @@ static bool gen_global_function(ir_builder *ir, ir_value *global)
     ir_function          *irfun;
 
     size_t i;
-#ifndef NEW_ALLOC_STRAT
-    size_t local_var_end;
-#endif
 
     if (!global->hasvalue || (!global->constval.vfunc))
     {
@@ -3020,58 +2901,11 @@ static bool gen_global_function(ir_builder *ir, ir_value *global)
         if ((int32_t)i >= fun.nargs)
             fun.argsize[i] = 0;
         else
-            fun.argsize[i] = type_sizeof[irfun->params[i]];
+            fun.argsize[i] = type_sizeof_[irfun->params[i]];
     }
 
-    fun.firstlocal = vec_size(code_globals);
-
-#ifndef NEW_ALLOC_STRAT
-    local_var_end = fun.firstlocal;
-    for (i = 0; i < vec_size(irfun->locals); ++i) {
-        if (!ir_builder_gen_global(ir, irfun->locals[i], true)) {
-            irerror(irfun->locals[i]->context, "Failed to generate local %s", irfun->locals[i]->name);
-            return false;
-        }
-    }
-    if (vec_size(irfun->locals)) {
-        ir_value *last = vec_last(irfun->locals);
-        local_var_end = last->code.globaladdr;
-        if (last->vtype == TYPE_FIELD && last->fieldtype == TYPE_VECTOR)
-            local_var_end += type_sizeof[TYPE_VECTOR];
-        else
-            local_var_end += type_sizeof[last->vtype];
-    }
-    for (i = 0; i < vec_size(irfun->values); ++i)
-    {
-        /* generate code.globaladdr for ssa values */
-        ir_value *v = irfun->values[i];
-        ir_value_code_setaddr(v, local_var_end + v->code.local);
-    }
-    for (i = 0; i < irfun->allocated_locals; ++i) {
-        /* fill the locals with zeros */
-        vec_push(code_globals, 0);
-    }
-
-    fun.locals = vec_size(code_globals) - fun.firstlocal;
-#else
-    fun.locals = irfun->allocated_locals;
-    for (i = 0; i < vec_size(irfun->locals); ++i) {
-        if (!ir_builder_gen_global(ir, irfun->locals[i], true)) {
-            irerror(irfun->locals[i]->context, "Failed to generate local %s", irfun->locals[i]->name);
-            return false;
-        }
-        ir_value_code_setaddr(irfun->locals[i], fun.firstlocal + irfun->locals[i]->code.local);
-    }
-    for (i = vec_size(code_globals) - fun.firstlocal; i < fun.locals; ++i) {
-        vec_push(code_globals, 0);
-    }
-    for (i = 0; i < vec_size(irfun->values); ++i)
-    {
-        /* generate code.globaladdr for ssa values */
-        ir_value *v = irfun->values[i];
-        ir_value_code_setaddr(v, fun.firstlocal + v->code.local);
-    }
-#endif
+    fun.firstlocal = 0;
+    fun.locals     = irfun->allocated_locals;
 
     if (irfun->builtin)
         fun.entry = irfun->builtin+1;
@@ -3141,6 +2975,40 @@ static bool gen_function_extparam_copy(ir_function *self)
     return true;
 }
 
+static bool gen_function_locals(ir_builder *ir, ir_value *global)
+{
+    prog_section_function *def;
+    ir_function           *irfun;
+    size_t                 i;
+    uint32_t               firstlocal;
+
+    irfun = global->constval.vfunc;
+    def   = code_functions + irfun->code_function_def;
+
+    if (opts.g || !OPTS_OPTIMIZATION(OPTIM_OVERLAP_LOCALS) || (irfun->flags & IR_FLAG_MASK_NO_OVERLAP))
+        firstlocal = def->firstlocal = vec_size(code_globals);
+    else {
+        firstlocal = def->firstlocal = ir->first_common_local;
+        ++opts_optimizationcount[OPTIM_OVERLAP_LOCALS];
+    }
+
+    for (i = vec_size(code_globals); i < firstlocal + irfun->allocated_locals; ++i)
+        vec_push(code_globals, 0);
+    for (i = 0; i < vec_size(irfun->locals); ++i) {
+        ir_value_code_setaddr(irfun->locals[i], firstlocal + irfun->locals[i]->code.local);
+        if (!ir_builder_gen_global(ir, irfun->locals[i], true, true)) {
+            irerror(irfun->locals[i]->context, "failed to generate local %s", irfun->locals[i]->name);
+            return false;
+        }
+    }
+    for (i = 0; i < vec_size(irfun->values); ++i)
+    {
+        ir_value *v = irfun->values[i];
+        ir_value_code_setaddr(v, firstlocal + v->code.local);
+    }
+    return true;
+}
+
 static bool gen_global_function_code(ir_builder *ir, ir_value *global)
 {
     prog_section_function *fundef;
@@ -3168,6 +3036,10 @@ static bool gen_global_function_code(ir_builder *ir, ir_value *global)
     fundef = &code_functions[irfun->code_function_def];
 
     fundef->entry = vec_size(code_statements);
+    if (!gen_function_locals(ir, global)) {
+        irerror(irfun->context, "Failed to generate locals for function %s", irfun->name);
+        return false;
+    }
     if (!gen_function_extparam_copy(irfun)) {
         irerror(irfun->context, "Failed to generate extparam-copy code for function %s", irfun->name);
         return false;
@@ -3184,7 +3056,7 @@ static void gen_vector_defs(prog_section_def def, const char *name)
     char  *component;
     size_t len, i;
 
-    if (!name || OPTS_FLAG(SINGLE_VECTOR_DEFS))
+    if (!name || name[0] == '#' || OPTS_FLAG(SINGLE_VECTOR_DEFS))
         return;
 
     def.type = TYPE_FLOAT;
@@ -3235,26 +3107,42 @@ static void gen_vector_fields(prog_section_field fld, const char *name)
     }
 }
 
-static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool islocal)
+static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool islocal, bool defs_only)
 {
     size_t           i;
     int32_t         *iptr;
     prog_section_def def;
+    bool             pushdef = false;
 
-    def.type   = global->vtype;
-    def.offset = vec_size(code_globals);
-
-    if (global->name) {
-        if (global->name[0] == '#') {
-            if (!self->str_immediate)
-                self->str_immediate = code_genstring("IMMEDIATE");
-            def.name = global->code.name = self->str_immediate;
+    if (opts.g || !islocal)
+    {
+        pushdef = true;
+        def.type   = global->vtype;
+        def.offset = vec_size(code_globals);
+
+        if (global->name) {
+            if (global->name[0] == '#') {
+                if (!self->str_immediate)
+                    self->str_immediate = code_genstring("IMMEDIATE");
+                def.name = global->code.name = self->str_immediate;
+            }
+            else
+                def.name = global->code.name = code_genstring(global->name);
         }
         else
-            def.name = global->code.name = code_genstring(global->name);
+            def.name   = 0;
+        if (defs_only) {
+            def.offset = ir_value_code_addr(global);
+            vec_push(code_defs, def);
+            if (global->vtype == TYPE_VECTOR)
+                gen_vector_defs(def, global->name);
+            else if (global->vtype == TYPE_FIELD && global->fieldtype == TYPE_VECTOR)
+                gen_vector_defs(def, global->name);
+            return true;
+        }
     }
-    else
-        def.name   = 0;
+    if (defs_only)
+        return true;
 
     switch (global->vtype)
     {
@@ -3279,14 +3167,17 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool isloc
         ir_value_code_setaddr(global, vec_size(code_globals));
         vec_push(code_globals, 0);
         /* Add the def */
-        vec_push(code_defs, def);
+        if (pushdef) vec_push(code_defs, def);
         return true;
     case TYPE_POINTER:
-        vec_push(code_defs, def);
+        if (pushdef) vec_push(code_defs, def);
         return gen_global_pointer(global);
     case TYPE_FIELD:
-        vec_push(code_defs, def);
-        gen_vector_defs(def, global->name);
+        if (pushdef) {
+            vec_push(code_defs, def);
+            if (global->fieldtype == TYPE_VECTOR)
+                gen_vector_defs(def, global->name);
+        }
         return gen_global_field(global);
     case TYPE_ENTITY:
         /* fall through */
@@ -3301,7 +3192,7 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool isloc
         }
         if (!islocal && global->cvq != CV_CONST)
             def.type |= DEF_SAVEGLOBAL;
-        vec_push(code_defs, def);
+        if (pushdef) vec_push(code_defs, def);
 
         return global->code.globaladdr >= 0;
     }
@@ -3315,7 +3206,7 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool isloc
         }
         if (!islocal && global->cvq != CV_CONST)
             def.type |= DEF_SAVEGLOBAL;
-        vec_push(code_defs, def);
+        if (pushdef) vec_push(code_defs, def);
         return global->code.globaladdr >= 0;
     }
     case TYPE_VECTOR:
@@ -3327,23 +3218,25 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool isloc
             vec_push(code_globals, iptr[0]);
             if (global->code.globaladdr < 0)
                 return false;
-            for (d = 1; d < type_sizeof[global->vtype]; ++d) {
+            for (d = 1; d < type_sizeof_[global->vtype]; ++d) {
                 vec_push(code_globals, iptr[d]);
             }
         } else {
             vec_push(code_globals, 0);
             if (global->code.globaladdr < 0)
                 return false;
-            for (d = 1; d < type_sizeof[global->vtype]; ++d) {
+            for (d = 1; d < type_sizeof_[global->vtype]; ++d) {
                 vec_push(code_globals, 0);
             }
         }
         if (!islocal && global->cvq != CV_CONST)
             def.type |= DEF_SAVEGLOBAL;
 
-        vec_push(code_defs, def);
-        def.type &= ~DEF_SAVEGLOBAL;
-        gen_vector_defs(def, global->name);
+        if (pushdef) {
+            vec_push(code_defs, def);
+            def.type &= ~DEF_SAVEGLOBAL;
+            gen_vector_defs(def, global->name);
+        }
         return global->code.globaladdr >= 0;
     }
     case TYPE_FUNCTION:
@@ -3359,13 +3252,13 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool isloc
         }
         if (!islocal && global->cvq != CV_CONST)
             def.type |= DEF_SAVEGLOBAL;
-        vec_push(code_defs, def);
+        if (pushdef) vec_push(code_defs, def);
         return true;
     case TYPE_VARIANT:
         /* assume biggest type */
             ir_value_code_setaddr(global, vec_size(code_globals));
             vec_push(code_globals, 0);
-            for (i = 1; i < type_sizeof[TYPE_VARIANT]; ++i)
+            for (i = 1; i < type_sizeof_[TYPE_VARIANT]; ++i)
                 vec_push(code_globals, 0);
             return true;
     default:
@@ -3378,7 +3271,7 @@ static bool ir_builder_gen_global(ir_builder *self, ir_value *global, bool isloc
 
 static void ir_builder_prepare_field(ir_value *field)
 {
-    field->code.fieldaddr = code_alloc_field(type_sizeof[field->fieldtype]);
+    field->code.fieldaddr = code_alloc_field(type_sizeof_[field->fieldtype]);
 }
 
 static bool ir_builder_gen_field(ir_builder *self, ir_value *field)
@@ -3455,7 +3348,7 @@ bool ir_builder_generate(ir_builder *self, const char *filename)
 {
     prog_section_statement stmt;
     size_t i;
-    char   *lnofile = NULL;
+    char  *lnofile = NULL;
 
     code_init();
 
@@ -3466,9 +3359,17 @@ bool ir_builder_generate(ir_builder *self, const char *filename)
 
     for (i = 0; i < vec_size(self->globals); ++i)
     {
-        if (!ir_builder_gen_global(self, self->globals[i], false)) {
+        if (!ir_builder_gen_global(self, self->globals[i], false, false)) {
             return false;
         }
+        if (self->globals[i]->vtype == TYPE_FUNCTION) {
+            ir_function *func = self->globals[i]->constval.vfunc;
+            if (func && self->max_locals < func->allocated_locals &&
+                !(func->flags & IR_FLAG_MASK_NO_OVERLAP))
+            {
+                self->max_locals = func->allocated_locals;
+            }
+        }
     }
 
     for (i = 0; i < vec_size(self->fields); ++i)
@@ -3478,6 +3379,12 @@ bool ir_builder_generate(ir_builder *self, const char *filename)
         }
     }
 
+    /* generate common locals */
+    self->first_common_local = vec_size(code_globals);
+    for (i = 0; i < self->max_locals; ++i) {
+        vec_push(code_globals, 0);
+    }
+
     /* generate function code */
     for (i = 0; i < vec_size(self->globals); ++i)
     {
@@ -3542,8 +3449,8 @@ bool ir_builder_generate(ir_builder *self, const char *filename)
 
 #define IND_BUFSZ 1024
 
-#ifdef WIN32
-# define strncat(dst, src, sz) strncat_s(dst, sz, src, _TRUNCATE)
+#ifdef _MSC_VER
+#   define strncat(dst, src, sz) strncat_s(dst, sz, src, _TRUNCATE)
 #endif
 
 const char *qc_opname(int op)