+++ /dev/null
-/*\r
-when I say JIT, I mean load time, not execution time.\r
-\r
-notes:\r
- qc jump offsets are all constants. we have no variable offset jumps (other than function calls/returns)\r
- field remapping... fields are in place, and cannot be adjusted. if a field is not set to 0, its assumed to be a constant.\r
-\r
-optimisations:\r
- none at the moment...\r
- instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C\r
- it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests\r
-\r
- this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.\r
- certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.\r
-\r
- OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.\r
-\r
- some instructions are jump sites. any cache must be flushed before the start of the instruction.\r
- some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.\r
- such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.\r
-\r
- store locals on a proper stack instead of the current absurd mechanism.\r
-\r
- eax - tmp\r
- ebx - prinst->edicttable\r
- ecx - tmp\r
- edx - tmp\r
- esi - debug opcode number\r
- edi - tmp (because its preserved by subfunctions\r
- ebp -\r
-\r
- to use gas to provide binary opcodes:\r
- vim -N blob.s && as blob.s && objdump.exe -d a.out\r
-\r
-\r
- notable mods to test:\r
- prydon gate, due to fpu mangling to carry values between maps\r
-*/\r
-\r
-#define PROGSUSED\r
-#include "progsint.h"\r
-\r
-#ifdef QCJIT\r
-\r
-#ifndef _WIN32\r
-#include <sys/mman.h>\r
-#endif\r
-\r
-static float ta, tb, nullfloat=0;\r
-\r
-struct jitstate\r
-{\r
- unsigned int *statementjumps; //[MAX_STATEMENTS*3]\r
- unsigned char **statementoffsets; //[MAX_STATEMENTS]\r
- unsigned int numjumps;\r
- unsigned char *code;\r
- unsigned int codesize;\r
- unsigned int jitstatements;\r
-\r
- float *glob;\r
- unsigned int cachedglobal;\r
- unsigned int cachereg;\r
-};\r
-\r
-static void EmitByte(struct jitstate *jit, unsigned char byte)\r
-{\r
- jit->code[jit->codesize++] = byte;\r
-}\r
-static void Emit4Byte(struct jitstate *jit, unsigned int value)\r
-{\r
- jit->code[jit->codesize++] = (value>> 0)&0xff;\r
- jit->code[jit->codesize++] = (value>> 8)&0xff;\r
- jit->code[jit->codesize++] = (value>>16)&0xff;\r
- jit->code[jit->codesize++] = (value>>24)&0xff;\r
-}\r
-static void EmitAdr(struct jitstate *jit, void *value)\r
-{\r
- Emit4Byte(jit, (unsigned int)value);\r
-}\r
-static void EmitFloat(struct jitstate *jit, float value)\r
-{\r
- union {float f; unsigned int i;} u;\r
- u.f = value;\r
- Emit4Byte(jit, u.i);\r
-}\r
-static void Emit2Byte(struct jitstate *jit, unsigned short value)\r
-{\r
- jit->code[jit->codesize++] = (value>> 0)&0xff;\r
- jit->code[jit->codesize++] = (value>> 8)&0xff;\r
-}\r
-\r
-static void EmitFOffset(struct jitstate *jit, void *func, int bias)\r
-{\r
- union {void *f; unsigned int i;} u;\r
- u.f = func;\r
- u.i -= (unsigned int)&jit->code[jit->codesize+bias];\r
- Emit4Byte(jit, u.i);\r
-}\r
-\r
-static void Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)\r
-{\r
- jit->statementjumps[jit->numjumps++] = jit->codesize;\r
- jit->statementjumps[jit->numjumps++] = statementnum;\r
- jit->statementjumps[jit->numjumps++] = offset;\r
-\r
- //the offset is filled in later\r
- jit->codesize += 4;\r
-}\r
-\r
-enum\r
-{\r
- REG_EAX,\r
- REG_ECX,\r
- REG_EDX,\r
- REG_EBX,\r
- REG_ESP,\r
- REG_EBP,\r
- REG_ESI,\r
- REG_EDI,\r
-\r
- /*I'm not going to list S1 here, as that makes things too awkward*/\r
- REG_S0,\r
- REG_NONE\r
-};\r
-#define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);\r
-#define CLEARREG(reg) XOR(reg,reg)\r
-#define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
-#define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
-#define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);\r
-#define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);\r
-#define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);\r
-\r
-#define ARGREGS(a,b,c) GCache_Load(jit, op[i].a, a, op[i].b, b, op[i].c, c)\r
-#define RESULTREG(r) GCache_Store(jit, op[i].c, r)\r
-\r
-//for the purposes of the cache, 'temp' offsets are only read when they have been written only within the preceeding control block.\r
-//if they were read at any other time, then we must write them out in full.\r
-//this logic applies only to locals of a function.\r
-//#define USECACHE\r
-\r
-static void GCache_Load(struct jitstate *jit, int ao, int ar, int bo, int br, int co, int cr)\r
-{\r
-#if USECACHE\r
- if (jit->cachedreg != REG_NONE)\r
- {\r
- /*something is cached, if its one of the input offsets then can chain the instruction*/\r
-\r
- if (jit->cachedglobal === ao && ar != REG_NONE)\r
- {\r
- if (jit->cachedreg == ar)\r
- ar = REG_NONE;\r
- }\r
- if (jit->cachedglobal === bo && br != REG_NONE)\r
- {\r
- if (jit->cachedreg == br)\r
- br = REG_NONE;\r
- }\r
- if (jit->cachedglobal === co && cr != REG_NONE)\r
- {\r
- if (jit->cachedreg == cr)\r
- cr = REG_NONE;\r
- }\r
-\r
- if (!istemp(ao))\r
- {\r
- /*purge the old cache*/\r
- switch(jit->cachedreg)\r
- {\r
- case REG_NONE:\r
- break;\r
- case REG_S0:\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + jit->cachedglobal);\r
- break;\r
- default:\r
- STOREREG(jit->cachedreg, jit->glob + jit->cachedglobal);\r
- break;\r
- }\r
- jit->cachedglobal = -1;\r
- jit->cachedreg = REG_NONE;\r
- }\r
-\r
-#endif\r
- switch(ar)\r
- {\r
- case REG_NONE:\r
- break;\r
- case REG_S0:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].a);\r
- break;\r
- default:\r
- LOADREG(jit->glob + ao, ar);\r
- break;\r
- }\r
-\r
- switch(br)\r
- {\r
- case REG_NONE:\r
- break;\r
- case REG_S0:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].b);\r
- break;\r
- default:\r
- LOADREG(jit->glob + bo, br);\r
- break;\r
- }\r
-\r
- switch(cr)\r
- {\r
- case REG_NONE:\r
- break;\r
- case REG_S0:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].c);\r
- break;\r
- default:\r
- LOADREG(jit->glob + co, cr);\r
- break;\r
- }\r
-}\r
-static void GCache_Store(struct jitstate *jit, int ofs, int reg)\r
-{\r
-#if USECACHE\r
- jit->cachedglobal = ofs;\r
- jit->cachedreg = reg;\r
-#else\r
- switch(reg)\r
- {\r
- case REG_NONE:\r
- break;\r
- case REG_S0:\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + ofs);\r
- break;\r
- default:\r
- STOREREG(reg, jit->glob + ofs);\r
- break;\r
- }\r
-#endif\r
-}\r
-\r
-static void *LocalLoc(struct jitstate *jit)\r
-{\r
- return &jit->code[jit->codesize];\r
-}\r
-static void *LocalJmp(struct jitstate *jit, int cond)\r
-{\r
- /*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/\r
- if (cond == OP_GOTO)\r
- EmitByte(jit, 0xeb); //jmp\r
- else if (cond == OP_LE_F)\r
- EmitByte(jit, 0x76); //jbe\r
- else if (cond == OP_GE_F)\r
- EmitByte(jit, 0x73); //jae\r
- else if (cond == OP_LT_F)\r
- EmitByte(jit, 0x72); //jb\r
- else if (cond == OP_GT_F)\r
- EmitByte(jit, 0x77); //ja\r
- else if (cond == OP_LE_I)\r
- EmitByte(jit, 0x7e); //jle\r
- else if (cond == OP_LT_I)\r
- EmitByte(jit, 0x7c); //jl\r
- else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)\r
- EmitByte(jit, 0x75); //jne\r
- else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)\r
- EmitByte(jit, 0x74); //je\r
-#if defined(DEBUG) && defined(_WIN32)\r
- else\r
- {\r
- OutputDebugString("oh noes!\n");\r
- return NULL;\r
- }\r
-#endif\r
-\r
- EmitByte(jit, 0);\r
-\r
- return LocalLoc(jit);\r
-}\r
-static void LocalJmpLoc(void *jmp, void *loc)\r
-{\r
- int offs;\r
- unsigned char *a = jmp;\r
- offs = (char *)loc - (char *)jmp;\r
-#if defined(DEBUG) && defined(_WIN32)\r
- if (offs > 127 || offs <= -128)\r
- {\r
- OutputDebugStringA("bad jump\n");\r
- a[-2] = 0xcd;\r
- a[-1] = 0xcc;\r
- return;\r
- }\r
-#endif\r
- a[-1] = offs;\r
-}\r
-\r
-static void FixupJumps(struct jitstate *jit)\r
-{\r
- unsigned int j;\r
- unsigned char *codesrc;\r
- unsigned char *codedst;\r
- unsigned int offset;\r
-\r
- unsigned int v;\r
-\r
- for (j = 0; j < jit->numjumps;)\r
- {\r
- v = jit->statementjumps[j++];\r
- codesrc = &jit->code[v];\r
-\r
- v = jit->statementjumps[j++];\r
- codedst = jit->statementoffsets[v];\r
-\r
- v = jit->statementjumps[j++];\r
- offset = (int)(codedst - (codesrc-v)); //3rd term because the jump is relative to the instruction start, not the instruction's offset\r
-\r
- codesrc[0] = (offset>> 0)&0xff;\r
- codesrc[1] = (offset>> 8)&0xff;\r
- codesrc[2] = (offset>>16)&0xff;\r
- codesrc[3] = (offset>>24)&0xff;\r
- }\r
-}\r
-\r
-int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);\r
-int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);\r
-\r
-void PR_CloseJit(struct jitstate *jit)\r
-{\r
- if (jit)\r
- {\r
- free(jit->statementjumps);\r
- free(jit->statementoffsets);\r
-#ifndef _WIN32\r
- munmap(jit->code, jit->jitstatements * 500);\r
-#else\r
- free(jit->code);\r
-#endif\r
- free(jit)\r
- }\r
-}\r
-\r
-#define EmitByte(v) EmitByte(jit, v)\r
-#define EmitAdr(v) EmitAdr(jit, v)\r
-#define EmitFOffset(a,b) EmitFOffset(jit, a, b)\r
-#define Emit4ByteJump(a,b) Emit4ByteJump(jit, a, b)\r
-#define Emit4Byte(v) Emit4Byte(jit, v)\r
-#define EmitFloat(v) EmitFloat(jit, v)\r
-#define LocalJmp(v) LocalJmp(jit, v)\r
-#define LocalLoc() LocalLoc(jit)\r
-\r
-\r
-struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)\r
-{\r
- struct jitstate *jit;\r
-\r
- void *j0, *l0;\r
- void *j1, *l1;\r
- void *j2, *l2;\r
- unsigned int i;\r
- dstatement16_t *op = (dstatement16_t*)current_progstate->statements;\r
- unsigned int numstatements = current_progstate->progs->numstatements;\r
- int *glob = (int*)current_progstate->globals;\r
-\r
- if (current_progstate->numbuiltins)\r
- return NULL;\r
- jit = malloc(sizeof(*jit));\r
- jit->jitstatements = numstatements;\r
-\r
- jit->statementjumps = malloc(numstatements*12);\r
- jit->statementoffsets = malloc(numstatements*4);\r
-#ifndef _WIN32\r
- jit->code = mmap(NULL, numstatements*500, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);\r
-#else\r
- jit->code = malloc(numstatements*500);\r
-#endif\r
- if (!jit->code)\r
- return NULL;\r
-\r
- jit->numjumps = 0;\r
- jit->codesize = 0;\r
-\r
-\r
-\r
- for (i = 0; i < numstatements; i++)\r
- {\r
- jit->statementoffsets[i] = &jit->code[jit->codesize];\r
-\r
- /*DEBUG*/\r
- SETREGI(op[i].op, REG_ESI);\r
-\r
- switch(op[i].op)\r
- {\r
- //jumps\r
- case OP_IF_I:\r
- //integer compare\r
- //if a, goto b\r
-\r
- //cmpl $0,glob[A]\r
- EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
- //jne B\r
- EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
- break;\r
-\r
- case OP_IFNOT_I:\r
- //integer compare\r
- //if !a, goto b\r
-\r
- //cmpl $0,glob[A]\r
- EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
- //je B\r
- EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
- break;\r
-\r
- case OP_GOTO:\r
- EmitByte(0xE9);Emit4ByteJump(i + (signed short)op[i].a, -4);\r
- break;\r
- \r
- //function returns\r
- case OP_DONE:\r
- case OP_RETURN:\r
- //done and return are the same\r
-\r
- //part 1: store A into OFS_RETURN\r
-\r
- if (!op[i].a)\r
- {\r
- //assumption: anything that returns address 0 is a void or zero return.\r
- //thus clear eax and copy that to the return vector.\r
- CLEARREG(REG_EAX);\r
- STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
- STOREREG(REG_EAX, glob + OFS_RETURN+1);\r
- STOREREG(REG_EAX, glob + OFS_RETURN+2);\r
- }\r
- else\r
- {\r
- LOADREG(glob + op[i].a+0, REG_EAX);\r
- LOADREG(glob + op[i].a+1, REG_EDX);\r
- LOADREG(glob + op[i].a+2, REG_ECX);\r
- STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
- STOREREG(REG_EDX, glob + OFS_RETURN+1);\r
- STOREREG(REG_ECX, glob + OFS_RETURN+2);\r
- }\r
- \r
- //call leavefunction to get the return address\r
- \r
-// pushl progfuncs\r
- EmitByte(0x68);EmitAdr(progfuncs);\r
-// call PR_LeaveFunction\r
- EmitByte(0xe8);EmitFOffset(PR_LeaveFunction, 4);\r
-// add $4,%esp\r
- EmitByte(0x83);EmitByte(0xc4);EmitByte(0x04);\r
-// movl pr_depth,%edx\r
- EmitByte(0x8b);EmitByte(0x15);EmitAdr(&pr_depth);\r
-// cmp prinst->exitdepth,%edx\r
- EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst->exitdepth);\r
-// je returntoc\r
- j1 = LocalJmp(OP_EQ_E);\r
-// mov statementoffsets[%eax*4],%eax\r
- EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
-// jmp *eax\r
- EmitByte(0xff);EmitByte(0xe0);\r
-// returntoc:\r
- l1 = LocalLoc();\r
-// ret\r
- EmitByte(0xc3);\r
-\r
- LocalJmpLoc(j1,l1);\r
- break;\r
-\r
- //function calls\r
- case OP_CALL0:\r
- case OP_CALL1:\r
- case OP_CALL2:\r
- case OP_CALL3:\r
- case OP_CALL4:\r
- case OP_CALL5:\r
- case OP_CALL6:\r
- case OP_CALL7:\r
- case OP_CALL8:\r
- //FIXME: the size of this instruction is going to hurt cache performance if every single function call is expanded into this HUGE CHUNK of gibberish!\r
- //FIXME: consider the feasability of just calling a C function and just jumping to the address it returns.\r
-\r
- //save the state in place the rest of the engine can cope with\r
- //movl $i, pr_xstatement\r
- EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);\r
- //movl $(op[i].op-OP_CALL0), pr_argc\r
- EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);\r
-\r
- //figure out who we're calling, and what that involves\r
- //%eax = glob[A]\r
- LOADREG(glob + op[i].a, REG_EAX);\r
- //eax is now the func num\r
-\r
- //mov %eax,%ecx\r
- EmitByte(0x89); EmitByte(0xc1);\r
- //shr $24,%ecx\r
- EmitByte(0xc1); EmitByte(0xe9); EmitByte(0x18);\r
- //ecx is now the progs num for the new func\r
-\r
- //cmp %ecx,pr_typecurrent\r
- EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);\r
- //je sameprogs\r
- j1 = LocalJmp(OP_EQ_I);\r
- {\r
- //can't handle switching progs\r
-\r
- //FIXME: recurse though PR_ExecuteProgram\r
- //push eax\r
- //push progfuncs\r
- //call PR_ExecuteProgram\r
- //add $8,%esp\r
- //remember to change the je above\r
-\r
- //err... exit depth? no idea\r
- EmitByte(0xcd);EmitByte(op[i].op); //int $X\r
-\r
-\r
- //ret\r
- EmitByte(0xc3);\r
- }\r
- //sameprogs:\r
- l1 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
-\r
- //andl $0x00ffffff, %eax\r
- EmitByte(0x25);Emit4Byte(0x00ffffff);\r
- \r
- //mov $sizeof(dfunction_t),%edx\r
- EmitByte(0xba);Emit4Byte(sizeof(dfunction_t));\r
- //mul %edx\r
- EmitByte(0xf7); EmitByte(0xe2);\r
- //add pr_functions,%eax\r
- EmitByte(0x05); EmitAdr(pr_functions);\r
-\r
- //eax is now the dfunction_t to be called\r
- //edx is clobbered.\r
-\r
- //mov (%eax),%edx\r
- EmitByte(0x8b);EmitByte(0x10);\r
- //edx is now the first statement number\r
- //cmp $0,%edx\r
- EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);\r
- //jl isabuiltin\r
- j1 = LocalJmp(OP_LT_I);\r
- {\r
- /* call the function*/\r
- //push %ecx\r
- EmitByte(0x51);\r
- //push %eax\r
- EmitByte(0x50);\r
- //pushl progfuncs\r
- EmitByte(0x68);EmitAdr(progfuncs);\r
- //call PR_EnterFunction\r
- EmitByte(0xe8);EmitFOffset(PR_EnterFunction, 4);\r
- //sub $12,%esp\r
- EmitByte(0x83);EmitByte(0xc4);EmitByte(0xc);\r
- //eax is now the next statement number (first of the new function, usually equal to ecx, but not always)\r
-\r
- //jmp statementoffsets[%eax*4]\r
- EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
- }\r
- /*its a builtin, figure out which, and call it*/\r
- //isabuiltin:\r
- l1 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
-\r
- //push current_progstate->globals\r
- EmitByte(0x68);EmitAdr(current_progstate->globals);\r
- //push progfuncs\r
- EmitByte(0x68);EmitAdr(progfuncs);\r
- //neg %edx\r
- EmitByte(0xf7);EmitByte(0xda);\r
- //call externs->globalbuiltins[%edx,4]\r
-//FIXME: make sure this dereferences\r
- EmitByte(0xff);EmitByte(0x14);EmitByte(0x95);EmitAdr(externs->globalbuiltins);\r
- //add $8,%esp\r
- EmitByte(0x83);EmitByte(0xc4);EmitByte(0x8);\r
-\r
- //but that builtin might have been Abort()\r
-\r
- LOADREG(&prinst->continuestatement, REG_EAX);\r
- //cmp $-1,%eax\r
- EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);\r
- //je donebuiltincall\r
- j1 = LocalJmp(OP_EQ_I);\r
- {\r
- //mov $-1,prinst->continuestatement\r
- EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement);Emit4Byte((unsigned int)-1);\r
-\r
- //jmp statementoffsets[%eax*4]\r
- EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);\r
- }\r
- //donebuiltincall:\r
- l1 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
- break;\r
-\r
- case OP_MUL_F:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //fmuls glob[B]\r
- EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
- case OP_DIV_F:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //fdivs glob[B]\r
- EmitByte(0xd8);EmitByte(0x35);EmitAdr(glob + op[i].b);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
- case OP_ADD_F:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //fadds glob[B]\r
- EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
- case OP_SUB_F:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //fsubs glob[B]\r
- EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
-\r
- case OP_NOT_F:\r
- //fldz\r
- EmitByte(0xd9);EmitByte(0xee);\r
- //fcomps glob[A]\r
- EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);\r
- //fnstsw %ax\r
- EmitByte(0xdf);EmitByte(0xe0);\r
- //testb 0x40,%ah\r
- EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);\r
- \r
- j1 = LocalJmp(OP_NE_F);\r
- {\r
- STOREF(0.0f, glob + op[i].c);\r
- j2 = LocalJmp(OP_GOTO);\r
- }\r
- {\r
- //noteq:\r
- l1 = LocalLoc();\r
- STOREF(1.0f, glob + op[i].c);\r
- }\r
- //end:\r
- l2 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
- LocalJmpLoc(j2,l2);\r
- break;\r
-\r
- case OP_STORE_F:\r
- case OP_STORE_S:\r
- case OP_STORE_ENT:\r
- case OP_STORE_FLD:\r
- case OP_STORE_FNC:\r
- LOADREG(glob + op[i].a, REG_EAX);\r
- STOREREG(REG_EAX, glob + op[i].b);\r
- break;\r
-\r
- case OP_STORE_V:\r
- LOADREG(glob + op[i].a+0, REG_EAX);\r
- LOADREG(glob + op[i].a+1, REG_EDX);\r
- LOADREG(glob + op[i].a+2, REG_ECX);\r
- STOREREG(REG_EAX, glob + op[i].b+0);\r
- STOREREG(REG_EDX, glob + op[i].b+1);\r
- STOREREG(REG_ECX, glob + op[i].b+2);\r
- break;\r
-\r
- case OP_LOAD_F:\r
- case OP_LOAD_S:\r
- case OP_LOAD_ENT:\r
- case OP_LOAD_FLD:\r
- case OP_LOAD_FNC:\r
- case OP_LOAD_V:\r
- //a is the ent number, b is the field\r
- //c is the dest\r
-\r
- LOADREG(glob + op[i].a, REG_EAX);\r
- LOADREG(glob + op[i].b, REG_ECX);\r
-\r
- //FIXME: bound eax (ent number)\r
- //FIXME: bound ecx (field index)\r
- //mov (ebx,eax,4).%eax\r
- EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);\r
- //eax is now an edictrun_t\r
- //mov fields(,%eax,4),%edx\r
- EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);\r
- //edx is now the field array for that ent\r
-\r
- //mov fieldajust(%edx,%ecx,4),%eax\r
- EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
-\r
- STOREREG(REG_EAX, glob + op[i].c)\r
-\r
- if (op[i].op == OP_LOAD_V)\r
- {\r
- //mov fieldajust+4(%edx,%ecx,4),%eax\r
- EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);\r
- STOREREG(REG_EAX, glob + op[i].c+1)\r
-\r
- //mov fieldajust+8(%edx,%ecx,4),%eax\r
- EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->fieldadjust*4);\r
- STOREREG(REG_EAX, glob + op[i].c+2)\r
- }\r
- break;\r
-\r
- case OP_ADDRESS:\r
- //a is the ent number, b is the field\r
- //c is the dest\r
-\r
- LOADREG(glob + op[i].a, REG_EAX);\r
- LOADREG(glob + op[i].b, REG_ECX);\r
-\r
- //FIXME: bound eax (ent number)\r
- //FIXME: bound ecx (field index)\r
- //mov (ebx,eax,4).%eax\r
- EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);\r
- //eax is now an edictrun_t\r
- //mov fields(,%eax,4),%edx\r
- EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);\r
- //edx is now the field array for that ent\r
- //mov fieldajust(%edx,%ecx,4),%eax //offset = progfuncs->fieldadjust\r
- //EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->fieldadjust*4);\r
- EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
- STOREREG(REG_EAX, glob + op[i].c);\r
- break;\r
-\r
- case OP_STOREP_F:\r
- case OP_STOREP_S:\r
- case OP_STOREP_ENT:\r
- case OP_STOREP_FLD:\r
- case OP_STOREP_FNC:\r
- LOADREG(glob + op[i].a, REG_EAX);\r
- LOADREG(glob + op[i].b, REG_ECX);\r
- //mov %eax,(%ecx)\r
- EmitByte(0x89);EmitByte(0x01);\r
- break;\r
-\r
- case OP_STOREP_V:\r
- LOADREG(glob + op[i].b, REG_ECX);\r
-\r
- LOADREG(glob + op[i].a+0, REG_EAX);\r
- //mov %eax,0(%ecx)\r
- EmitByte(0x89);EmitByte(0x01);\r
-\r
- LOADREG(glob + op[i].a+1, REG_EAX);\r
- //mov %eax,4(%ecx)\r
- EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);\r
-\r
- LOADREG(glob + op[i].a+2, REG_EAX);\r
- //mov %eax,8(%ecx)\r
- EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);\r
- break;\r
-\r
- case OP_NE_I:\r
- case OP_NE_E:\r
- case OP_NE_FNC:\r
- case OP_EQ_I:\r
- case OP_EQ_E:\r
- case OP_EQ_FNC:\r
- //integer equality\r
- LOADREG(glob + op[i].a, REG_EAX);\r
-\r
- //cmp glob[B],%eax\r
- EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);\r
- j1 = LocalJmp(op[i].op);\r
- {\r
- STOREF(0.0f, glob + op[i].c);\r
- j2 = LocalJmp(OP_GOTO);\r
- }\r
- {\r
- l1 = LocalLoc();\r
- STOREF(1.0f, glob + op[i].c);\r
- }\r
- l2 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
- LocalJmpLoc(j2,l2);\r
- break;\r
-\r
- case OP_NOT_I:\r
- case OP_NOT_ENT:\r
- case OP_NOT_FNC:\r
- //cmp glob[B],$0\r
- EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00); \r
- j1 = LocalJmp(OP_NE_I);\r
- {\r
- STOREF(1.0f, glob + op[i].c);\r
- j2 = LocalJmp(OP_GOTO);\r
- }\r
- {\r
- l1 = LocalLoc();\r
- STOREF(0.0f, glob + op[i].c);\r
- }\r
- l2 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
- LocalJmpLoc(j2,l2);\r
- break;\r
-\r
- case OP_BITOR_F: //floats...\r
- //flds glob[A]\r
- EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //flds glob[B]\r
- EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
- //fistp tb\r
- EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
- //fistp ta\r
- EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
- LOADREG(&ta, REG_EAX)\r
- //or %eax,tb\r
- EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);\r
- //fild tb\r
- EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
- //fstps glob[C]\r
- EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
-\r
- case OP_BITAND_F:\r
- //flds glob[A]\r
- EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //flds glob[B]\r
- EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
- //fistp tb\r
- EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
- //fistp ta\r
- EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
- /*two args are now at ta and tb*/\r
- LOADREG(&ta, REG_EAX)\r
- //and tb,%eax\r
- EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);\r
- /*we just wrote the int value to tb, convert that to a float and store it at c*/\r
- //fild tb\r
- EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
- //fstps glob[C]\r
- EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
-\r
- case OP_AND_F:\r
- //test floats properly, so we don't get confused with -0.0\r
- //FIXME: is it feasable to grab the value as an int and test it against 0x7fffffff?\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);\r
- //fcomps nullfloat\r
- EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
- //fnstsw %ax\r
- EmitByte(0xdf); EmitByte(0xe0);\r
- //test $0x40,%ah\r
- EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
- //jz onefalse\r
- EmitByte(0x75); EmitByte(0x1f);\r
-\r
- //flds glob[B]\r
- EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);\r
- //fcomps nullfloat\r
- EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
- //fnstsw %ax\r
- EmitByte(0xdf); EmitByte(0xe0);\r
- //test $0x40,%ah\r
- EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
- //jnz onefalse\r
- EmitByte(0x75); EmitByte(0x0c);\r
-\r
- //mov float0,glob[C]\r
- EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
- //jmp done\r
- EmitByte(0xeb); EmitByte(0x0a);\r
-\r
- //onefalse:\r
- //mov float1,glob[C]\r
- EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
- //done:\r
- break;\r
- case OP_OR_F:\r
- //test floats properly, so we don't get confused with -0.0\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);\r
- //fcomps nullfloat\r
- EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
- //fnstsw %ax\r
- EmitByte(0xdf); EmitByte(0xe0);\r
- //test $0x40,%ah\r
- EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
- //je onetrue\r
- EmitByte(0x74); EmitByte(0x1f);\r
-\r
- //flds glob[B]\r
- EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);\r
- //fcomps nullfloat\r
- EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
- //fnstsw %ax\r
- EmitByte(0xdf); EmitByte(0xe0);\r
- //test $0x40,%ah\r
- EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
- //je onetrue\r
- EmitByte(0x74); EmitByte(0x0c);\r
-\r
- //mov float0,glob[C]\r
- EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
- //jmp done\r
- EmitByte(0xeb); EmitByte(0x0a);\r
-\r
- //onetrue:\r
- //mov float1,glob[C]\r
- EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
- //done:\r
- break;\r
-\r
- case OP_EQ_S:\r
- case OP_NE_S:\r
- {\r
- //put a in ecx\r
- LOADREG(glob + op[i].a, REG_ECX);\r
- //put b in edi\r
- LOADREG(glob + op[i].b, REG_EDI);\r
-/*\r
- //early out if they're equal\r
- //cmp %ecx,%edi\r
- EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);\r
- j1c = LocalJmp(OP_EQ_S);\r
-\r
- //if a is 0, check if b is ""\r
- //jecxz ais0\r
- EmitByte(0xe3); EmitByte(0x1a);\r
-\r
- //if b is 0, check if a is ""\r
- //cmp $0,%edi\r
- EmitByte(0x83); EmitByte(0xff); EmitByte(0x00);\r
- //jne bnot0\r
- EmitByte(0x75); EmitByte(0x2a);\r
- {\r
- //push a\r
- EmitByte(0x51);\r
- //push progfuncs\r
- EmitByte(0x68); EmitAdr(progfuncs);\r
- //call PR_StringToNative\r
- EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
- //add $8,%esp\r
- EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
- //cmpb $0,(%eax)\r
- EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
- j1b = LocalJmp(OP_EQ_S);\r
- j0b = LocalJmp(OP_GOTO);\r
- }\r
-\r
- //ais0:\r
- {\r
- //push edi\r
- EmitByte(0x57);\r
- //push progfuncs\r
- EmitByte(0x68); EmitAdr(progfuncs);\r
- //call PR_StringToNative\r
- EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
- //add $8,%esp\r
- EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
- //cmpb $0,(%eax)\r
- EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
- //je _true\r
- EmitByte(0x74); EmitByte(0x36);\r
- //jmp _false\r
- EmitByte(0xeb); EmitByte(0x28);\r
- }\r
- //bnot0:\r
-*/\r
-LOADREG(glob + op[i].a, REG_ECX);\r
- //push ecx\r
- EmitByte(0x51);\r
- //push progfuncs\r
- EmitByte(0x68); EmitAdr(progfuncs);\r
- //call PR_StringToNative\r
- EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
- //push %eax\r
- EmitByte(0x50);\r
-\r
-LOADREG(glob + op[i].b, REG_EDI);\r
- //push %edi\r
- EmitByte(0x57);\r
- //push progfuncs\r
- EmitByte(0x68); EmitAdr(progfuncs);\r
- //call PR_StringToNative\r
- EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
- //add $8,%esp\r
- EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
-\r
-\r
- //push %eax\r
- EmitByte(0x50);\r
- //call strcmp\r
- EmitByte(0xe8); EmitFOffset(strcmp,4);\r
- //add $16,%esp\r
- EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);\r
-\r
- //cmp $0,%eax\r
- EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
- j1 = LocalJmp(OP_EQ_S);\r
- {\r
- l0 = LocalLoc();\r
- STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);\r
- j2 = LocalJmp(OP_GOTO);\r
- }\r
- {\r
- l1 = LocalLoc();\r
- STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);\r
- }\r
- l2 = LocalLoc();\r
-\r
-// LocalJmpLoc(j0b, l0);\r
- LocalJmpLoc(j1, l1);\r
-// LocalJmpLoc(j1b, l1);\r
- LocalJmpLoc(j2, l2);\r
- }\r
- break;\r
-\r
- case OP_NOT_S:\r
- LOADREG(glob + op[i].a, REG_EAX)\r
-\r
- //cmp $0,%eax\r
- EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
- j2 = LocalJmp(OP_EQ_S);\r
-\r
- //push %eax\r
- EmitByte(0x50);\r
- //push progfuncs\r
- EmitByte(0x68); EmitAdr(progfuncs);\r
- //call PR_StringToNative\r
- EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
- //add $8,%esp\r
- EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
-\r
- //cmpb $0,(%eax)\r
- EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
- j1 = LocalJmp(OP_EQ_S);\r
- {\r
- STOREF(0.0f, glob + op[i].c);\r
- j0 = LocalJmp(OP_GOTO);\r
- }\r
- {\r
- l1 = LocalLoc();\r
- STOREF(1.0f, glob + op[i].c);\r
- }\r
- l2 = LocalLoc();\r
- LocalJmpLoc(j2, l1);\r
- LocalJmpLoc(j1, l1);\r
- LocalJmpLoc(j0, l2);\r
- break;\r
-\r
- case OP_ADD_V:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
- //fadds glob[B]\r
- EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+0);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
- //fadds glob[B]\r
- EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+1);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
- //fadds glob[B]\r
- EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+2);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
- break;\r
- case OP_SUB_V:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
- //fsubs glob[B]\r
- EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+0);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
- //fsubs glob[B]\r
- EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+1);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
- //fsubs glob[B]\r
- EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+2);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
- break;\r
-\r
- case OP_MUL_V:\r
- //this is actually a dotproduct\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
- //fmuls glob[B]\r
- EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+0);\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
- //fmuls glob[B]\r
- EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+1);\r
-\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
- //fmuls glob[B]\r
- EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+2);\r
-\r
- //faddp\r
- EmitByte(0xde);EmitByte(0xc1);\r
- //faddp\r
- EmitByte(0xde);EmitByte(0xc1);\r
-\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
-\r
- case OP_EQ_F:\r
- case OP_NE_F:\r
- case OP_LE_F:\r
- case OP_GE_F:\r
- case OP_LT_F:\r
- case OP_GT_F:\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);\r
- //flds glob[B]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //fcomip %st(1),%st\r
- EmitByte(0xdf);EmitByte(0xe9);\r
- //fstp %st(0) (aka: pop)\r
- EmitByte(0xdd);EmitByte(0xd8);\r
-\r
- j1 = LocalJmp(op[i].op);\r
- {\r
- STOREF(0.0f, glob + op[i].c);\r
- j2 = LocalJmp(OP_GOTO);\r
- }\r
- {\r
- l1 = LocalLoc();\r
- STOREF(1.0f, glob + op[i].c);\r
- }\r
- l2 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
- LocalJmpLoc(j2,l2);\r
- break;\r
-\r
- case OP_MUL_FV:\r
- case OP_MUL_VF:\r
- //\r
- {\r
- int v;\r
- int f;\r
- if (op[i].op == OP_MUL_FV)\r
- {\r
- f = op[i].a;\r
- v = op[i].b;\r
- }\r
- else\r
- {\r
- v = op[i].a;\r
- f = op[i].b;\r
- }\r
-\r
- //flds glob[F]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);\r
-\r
- //flds glob[V0]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);\r
- //fmul st(1)\r
- EmitByte(0xd8);EmitByte(0xc9);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
-\r
- //flds glob[V0]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);\r
- //fmul st(1)\r
- EmitByte(0xd8);EmitByte(0xc9);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
-\r
- //flds glob[V0]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);\r
- //fmul st(1)\r
- EmitByte(0xd8);EmitByte(0xc9);\r
- //fstps glob[C]\r
- EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
-\r
- //fstp %st(0) (aka: pop)\r
- EmitByte(0xdd);EmitByte(0xd8);\r
- }\r
- break;\r
-\r
- case OP_STATE:\r
- //externs->stateop(progfuncs, OPA->_float, OPB->function);\r
- //push b\r
- EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].b);\r
- //push a\r
- EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].a);\r
- //push $progfuncs\r
- EmitByte(0x68); EmitAdr(progfuncs);\r
- //call externs->stateop\r
- EmitByte(0xe8); EmitFOffset(externs->stateop, 4);\r
- //add $12,%esp\r
- EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);\r
- break;\r
-#if 1\r
-/* case OP_NOT_V:\r
- //flds 0\r
- //flds glob[A+0]\r
- //fcomip %st(1),%st\r
- //jne _true\r
- //flds glob[A+1]\r
- //fcomip %st(1),%st\r
- //jne _true\r
- //flds glob[A+1]\r
- //fcomip %st(1),%st\r
- //jne _true\r
- //mov 1,C\r
- //jmp done\r
- //_true:\r
- //mov 0,C\r
- //done:\r
- break;\r
-*/\r
- \r
- case OP_NOT_V:\r
- EmitByte(0xcd);EmitByte(op[i].op);\r
- printf("QCJIT: instruction %i is not implemented\n", op[i].op);\r
- break;\r
-#endif\r
- case OP_NE_V:\r
- case OP_EQ_V:\r
- {\r
- void *f0, *f1, *f2, *floc;\r
-//compare v[0]\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
- //flds glob[B]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+0);\r
- //fcomip %st(1),%st\r
- EmitByte(0xdf);EmitByte(0xe9);\r
- //fstp %st(0) (aka: pop)\r
- EmitByte(0xdd);EmitByte(0xd8);\r
-\r
- /*if the condition is true, don't fail*/\r
- j1 = LocalJmp(op[i].op);\r
- {\r
- STOREF(0.0f, glob + op[i].c);\r
- f0 = LocalJmp(OP_GOTO);\r
- }\r
- l1 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
-\r
-//compare v[1]\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
- //flds glob[B]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);\r
- //fcomip %st(1),%st\r
- EmitByte(0xdf);EmitByte(0xe9);\r
- //fstp %st(0) (aka: pop)\r
- EmitByte(0xdd);EmitByte(0xd8);\r
-\r
- /*if the condition is true, don't fail*/\r
- j1 = LocalJmp(op[i].op);\r
- {\r
- STOREF(0.0f, glob + op[i].c);\r
- f1 = LocalJmp(OP_GOTO);\r
- }\r
- l1 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
-\r
-//compare v[2]\r
- //flds glob[A]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
- //flds glob[B]\r
- EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);\r
- //fcomip %st(1),%st\r
- EmitByte(0xdf);EmitByte(0xe9);\r
- //fstp %st(0) (aka: pop)\r
- EmitByte(0xdd);EmitByte(0xd8);\r
-\r
- /*if the condition is true, don't fail*/\r
- j1 = LocalJmp(op[i].op);\r
- {\r
- STOREF(0.0f, glob + op[i].c);\r
- f2 = LocalJmp(OP_GOTO);\r
- }\r
- l1 = LocalLoc();\r
- LocalJmpLoc(j1,l1);\r
-\r
-//success!\r
- STOREF(1.0f, glob + op[i].c);\r
-\r
- floc = LocalLoc();\r
- LocalJmpLoc(f0,floc);\r
- LocalJmpLoc(f1,floc);\r
- LocalJmpLoc(f2,floc);\r
- break;\r
- }\r
-\r
- /*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/\r
- case OP_FETCH_GBL_F:\r
- case OP_FETCH_GBL_S:\r
- case OP_FETCH_GBL_E:\r
- case OP_FETCH_GBL_FNC:\r
- case OP_FETCH_GBL_V:\r
- {\r
- unsigned int max = ((unsigned int*)glob)[op[i].a-1];\r
- unsigned int base = op[i].a;\r
- //flds glob[B]\r
- EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
- //fistp ta\r
- EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
- LOADREG(&ta, REG_EAX)\r
- //FIXME: if eax >= $max, abort\r
-\r
- if (op[i].op == OP_FETCH_GBL_V)\r
- {\r
- /*scale the index by 3*/\r
- SETREGI(3, REG_EDX)\r
- //mul %edx\r
- EmitByte(0xf7); EmitByte(0xe2);\r
- }\r
-\r
- //lookup global\r
- //mov &glob[base](,%eax,4),%edx\r
- EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));\r
- STOREREG(REG_EDX, glob + op[i].c+0)\r
- if (op[i].op == OP_FETCH_GBL_V)\r
- {\r
- //mov &glob[base+1](,%eax,4),%edx\r
- EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));\r
- STOREREG(REG_EDX, glob + op[i].c+1)\r
- //mov &glob[base+2](,%eax,4),%edx\r
- EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));\r
- STOREREG(REG_EDX, glob + op[i].c+2)\r
- }\r
- break;\r
- }\r
-\r
- /*fteqcc generates these from writing 'fast arrays'*/\r
- case OP_GLOBALADDRESS:\r
- LOADREG(glob + op[i].b, REG_EAX);\r
- //lea &glob[A](, %eax, 4),%eax\r
- EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);\r
- STOREREG(REG_EAX, glob + op[i].c);\r
- break;\r
-// case OP_BOUNDCHECK:\r
- //FIXME: assert b <= a < c\r
- break;\r
- case OP_CONV_FTOI:\r
- //flds glob[A]\r
- EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
- //fistp glob[C]\r
- EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
- break;\r
- case OP_MUL_I:\r
- LOADREG(glob + op[i].a, REG_EAX);\r
- //mull glob[C] (arg*eax => edx:eax)\r
- EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);\r
- STOREREG(REG_EAX, glob + op[i].c);\r
- break;\r
-\r
- /*other extended opcodes*/\r
- case OP_BITOR_I:\r
- LOADREG(glob + op[i].a, REG_EAX)\r
- //or %eax,tb\r
- EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
- STOREREG(REG_EAX, glob + op[i].c);\r
- break;\r
-\r
-\r
- default:\r
- {\r
- enum qcop_e e = op[i].op;\r
- printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);\r
- }\r
-\r
-\r
- free(jit->statementjumps); //[MAX_STATEMENTS]\r
- free(jit->statementoffsets); //[MAX_STATEMENTS]\r
- free(jit->code);\r
- free(jit);\r
- return NULL;\r
- }\r
- }\r
-\r
- FixupJumps(jit);\r
-\r
- /* most likely want executable memory calls somewhere else more common */\r
-#ifdef _WIN32\r
- {\r
- DWORD old;\r
-\r
- //this memory is on the heap.\r
- //this means that we must maintain read/write protection, or libc will crash us\r
- VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);\r
- }\r
-#else\r
- mprotect(jit->code, jit->codesize, PROT_READ|PROT_EXEC);\r
-#endif\r
-\r
-// externs->WriteFile("jit.x86", jit->code, jit->codesize);\r
-\r
- return jit;\r
-}\r
-\r
-float foo(float arg)\r
-{\r
- float f;\r
- if (!arg)\r
- f = 1;\r
- else\r
- f = 0;\r
- return f;\r
-}\r
-\r
-void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)\r
-{\r
-#ifdef __GNUC__\r
- //call, it clobbers pretty much everything.\r
- asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");\r
-#elif defined(_MSC_VER)\r
- void *entry = jit->statementoffsets[statement+1];\r
- void *edicttable = prinst->edicttable;\r
- __asm {\r
- pushad\r
- mov eax,entry\r
- mov ebx,edicttable\r
- call eax\r
- popad\r
- }\r
-#else\r
- #error "Sorry, no idea how to enter assembler safely for your compiler"\r
-#endif\r
-}\r
-#endif\r