2 when I say JIT, I mean load time, not execution time.
\r
5 qc jump offsets are all constants. we have no variable offset jumps (other than function calls/returns)
\r
6 field remapping... fields are in place, and cannot be adjusted. if a field is not set to 0, its assumed to be a constant.
\r
9 none at the moment...
\r
10 instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C
\r
11 it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests
\r
13 this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.
\r
14 certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.
\r
16 OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.
\r
18 some instructions are jump sites. any cache must be flushed before the start of the instruction.
\r
19 some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.
\r
20 such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.
\r
22 store locals on a proper stack instead of the current absurd mechanism.
\r
25 ebx - prinst->edicttable
\r
28 esi - debug opcode number
\r
29 edi - tmp (because its preserved by subfunctions
\r
32 to use gas to provide binary opcodes:
\r
33 vim -N blob.s && as blob.s && objdump.exe -d a.out
\r
36 notable mods to test:
\r
37 prydon gate, due to fpu mangling to carry values between maps
\r
41 #include "progsint.h"
\r
46 #include <sys/mman.h>
\r
49 static float ta, tb, nullfloat=0;
\r
53 unsigned int *statementjumps; //[MAX_STATEMENTS*3]
\r
54 unsigned char **statementoffsets; //[MAX_STATEMENTS]
\r
55 unsigned int numjumps;
\r
56 unsigned char *code;
\r
57 unsigned int codesize;
\r
58 unsigned int jitstatements;
\r
61 unsigned int cachedglobal;
\r
62 unsigned int cachereg;
\r
65 static void EmitByte(struct jitstate *jit, unsigned char byte)
\r
67 jit->code[jit->codesize++] = byte;
\r
69 static void Emit4Byte(struct jitstate *jit, unsigned int value)
\r
71 jit->code[jit->codesize++] = (value>> 0)&0xff;
\r
72 jit->code[jit->codesize++] = (value>> 8)&0xff;
\r
73 jit->code[jit->codesize++] = (value>>16)&0xff;
\r
74 jit->code[jit->codesize++] = (value>>24)&0xff;
\r
76 static void EmitAdr(struct jitstate *jit, void *value)
\r
78 Emit4Byte(jit, (unsigned int)value);
\r
80 static void EmitFloat(struct jitstate *jit, float value)
\r
82 union {float f; unsigned int i;} u;
\r
84 Emit4Byte(jit, u.i);
\r
86 static void Emit2Byte(struct jitstate *jit, unsigned short value)
\r
88 jit->code[jit->codesize++] = (value>> 0)&0xff;
\r
89 jit->code[jit->codesize++] = (value>> 8)&0xff;
\r
92 static void EmitFOffset(struct jitstate *jit, void *func, int bias)
\r
94 union {void *f; unsigned int i;} u;
\r
96 u.i -= (unsigned int)&jit->code[jit->codesize+bias];
\r
97 Emit4Byte(jit, u.i);
\r
100 static void Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)
\r
102 jit->statementjumps[jit->numjumps++] = jit->codesize;
\r
103 jit->statementjumps[jit->numjumps++] = statementnum;
\r
104 jit->statementjumps[jit->numjumps++] = offset;
\r
106 //the offset is filled in later
\r
107 jit->codesize += 4;
\r
121 /*I'm not going to list S1 here, as that makes things too awkward*/
\r
125 #define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);
\r
126 #define CLEARREG(reg) XOR(reg,reg)
\r
127 #define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
\r
128 #define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
\r
129 #define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);
\r
130 #define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);
\r
131 #define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);
\r
133 #define ARGREGS(a,b,c) GCache_Load(jit, op[i].a, a, op[i].b, b, op[i].c, c)
\r
134 #define RESULTREG(r) GCache_Store(jit, op[i].c, r)
\r
136 //for the purposes of the cache, 'temp' offsets are only read when they have been written only within the preceeding control block.
\r
137 //if they were read at any other time, then we must write them out in full.
\r
138 //this logic applies only to locals of a function.
\r
141 static void GCache_Load(struct jitstate *jit, int ao, int ar, int bo, int br, int co, int cr)
\r
144 if (jit->cachedreg != REG_NONE)
\r
146 /*something is cached, if its one of the input offsets then can chain the instruction*/
\r
148 if (jit->cachedglobal === ao && ar != REG_NONE)
\r
150 if (jit->cachedreg == ar)
\r
153 if (jit->cachedglobal === bo && br != REG_NONE)
\r
155 if (jit->cachedreg == br)
\r
158 if (jit->cachedglobal === co && cr != REG_NONE)
\r
160 if (jit->cachedreg == cr)
\r
166 /*purge the old cache*/
\r
167 switch(jit->cachedreg)
\r
173 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + jit->cachedglobal);
\r
176 STOREREG(jit->cachedreg, jit->glob + jit->cachedglobal);
\r
179 jit->cachedglobal = -1;
\r
180 jit->cachedreg = REG_NONE;
\r
190 EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].a);
\r
193 LOADREG(jit->glob + ao, ar);
\r
203 EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].b);
\r
206 LOADREG(jit->glob + bo, br);
\r
216 EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].c);
\r
219 LOADREG(jit->glob + co, cr);
\r
223 static void GCache_Store(struct jitstate *jit, int ofs, int reg)
\r
226 jit->cachedglobal = ofs;
\r
227 jit->cachedreg = reg;
\r
235 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + ofs);
\r
238 STOREREG(reg, jit->glob + ofs);
\r
244 static void *LocalLoc(struct jitstate *jit)
\r
246 return &jit->code[jit->codesize];
\r
248 static void *LocalJmp(struct jitstate *jit, int cond)
\r
250 /*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/
\r
251 if (cond == OP_GOTO)
\r
252 EmitByte(jit, 0xeb); //jmp
\r
253 else if (cond == OP_LE_F)
\r
254 EmitByte(jit, 0x76); //jbe
\r
255 else if (cond == OP_GE_F)
\r
256 EmitByte(jit, 0x73); //jae
\r
257 else if (cond == OP_LT_F)
\r
258 EmitByte(jit, 0x72); //jb
\r
259 else if (cond == OP_GT_F)
\r
260 EmitByte(jit, 0x77); //ja
\r
261 else if (cond == OP_LE_I)
\r
262 EmitByte(jit, 0x7e); //jle
\r
263 else if (cond == OP_LT_I)
\r
264 EmitByte(jit, 0x7c); //jl
\r
265 else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)
\r
266 EmitByte(jit, 0x75); //jne
\r
267 else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)
\r
268 EmitByte(jit, 0x74); //je
\r
269 #if defined(DEBUG) && defined(_WIN32)
\r
272 OutputDebugString("oh noes!\n");
\r
279 return LocalLoc(jit);
\r
281 static void LocalJmpLoc(void *jmp, void *loc)
\r
284 unsigned char *a = jmp;
\r
285 offs = (char *)loc - (char *)jmp;
\r
286 #if defined(DEBUG) && defined(_WIN32)
\r
287 if (offs > 127 || offs <= -128)
\r
289 OutputDebugStringA("bad jump\n");
\r
298 static void FixupJumps(struct jitstate *jit)
\r
301 unsigned char *codesrc;
\r
302 unsigned char *codedst;
\r
303 unsigned int offset;
\r
307 for (j = 0; j < jit->numjumps;)
\r
309 v = jit->statementjumps[j++];
\r
310 codesrc = &jit->code[v];
\r
312 v = jit->statementjumps[j++];
\r
313 codedst = jit->statementoffsets[v];
\r
315 v = jit->statementjumps[j++];
\r
316 offset = (int)(codedst - (codesrc-v)); //3rd term because the jump is relative to the instruction start, not the instruction's offset
\r
318 codesrc[0] = (offset>> 0)&0xff;
\r
319 codesrc[1] = (offset>> 8)&0xff;
\r
320 codesrc[2] = (offset>>16)&0xff;
\r
321 codesrc[3] = (offset>>24)&0xff;
\r
325 int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);
\r
326 int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);
\r
328 void PR_CloseJit(struct jitstate *jit)
\r
332 free(jit->statementjumps);
\r
333 free(jit->statementoffsets);
\r
335 munmap(jit->code, jit->jitstatements * 500);
\r
343 #define EmitByte(v) EmitByte(jit, v)
\r
344 #define EmitAdr(v) EmitAdr(jit, v)
\r
345 #define EmitFOffset(a,b) EmitFOffset(jit, a, b)
\r
346 #define Emit4ByteJump(a,b) Emit4ByteJump(jit, a, b)
\r
347 #define Emit4Byte(v) Emit4Byte(jit, v)
\r
348 #define EmitFloat(v) EmitFloat(jit, v)
\r
349 #define LocalJmp(v) LocalJmp(jit, v)
\r
350 #define LocalLoc() LocalLoc(jit)
\r
353 struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)
\r
355 struct jitstate *jit;
\r
361 dstatement16_t *op = (dstatement16_t*)current_progstate->statements;
\r
362 unsigned int numstatements = current_progstate->progs->numstatements;
\r
363 int *glob = (int*)current_progstate->globals;
\r
365 if (current_progstate->numbuiltins)
\r
367 jit = malloc(sizeof(*jit));
\r
368 jit->jitstatements = numstatements;
\r
370 jit->statementjumps = malloc(numstatements*12);
\r
371 jit->statementoffsets = malloc(numstatements*4);
\r
373 jit->code = mmap(NULL, numstatements*500, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
\r
375 jit->code = malloc(numstatements*500);
\r
385 for (i = 0; i < numstatements; i++)
\r
387 jit->statementoffsets[i] = &jit->code[jit->codesize];
\r
390 SETREGI(op[i].op, REG_ESI);
\r
400 EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
\r
402 EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);
\r
410 EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
\r
412 EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);
\r
416 EmitByte(0xE9);Emit4ByteJump(i + (signed short)op[i].a, -4);
\r
422 //done and return are the same
\r
424 //part 1: store A into OFS_RETURN
\r
428 //assumption: anything that returns address 0 is a void or zero return.
\r
429 //thus clear eax and copy that to the return vector.
\r
431 STOREREG(REG_EAX, glob + OFS_RETURN+0);
\r
432 STOREREG(REG_EAX, glob + OFS_RETURN+1);
\r
433 STOREREG(REG_EAX, glob + OFS_RETURN+2);
\r
437 LOADREG(glob + op[i].a+0, REG_EAX);
\r
438 LOADREG(glob + op[i].a+1, REG_EDX);
\r
439 LOADREG(glob + op[i].a+2, REG_ECX);
\r
440 STOREREG(REG_EAX, glob + OFS_RETURN+0);
\r
441 STOREREG(REG_EDX, glob + OFS_RETURN+1);
\r
442 STOREREG(REG_ECX, glob + OFS_RETURN+2);
\r
445 //call leavefunction to get the return address
\r
448 EmitByte(0x68);EmitAdr(progfuncs);
\r
449 // call PR_LeaveFunction
\r
450 EmitByte(0xe8);EmitFOffset(PR_LeaveFunction, 4);
\r
452 EmitByte(0x83);EmitByte(0xc4);EmitByte(0x04);
\r
453 // movl pr_depth,%edx
\r
454 EmitByte(0x8b);EmitByte(0x15);EmitAdr(&pr_depth);
\r
455 // cmp prinst->exitdepth,%edx
\r
456 EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst->exitdepth);
\r
458 j1 = LocalJmp(OP_EQ_E);
\r
459 // mov statementoffsets[%eax*4],%eax
\r
460 EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
\r
462 EmitByte(0xff);EmitByte(0xe0);
\r
468 LocalJmpLoc(j1,l1);
\r
481 //FIXME: the size of this instruction is going to hurt cache performance if every single function call is expanded into this HUGE CHUNK of gibberish!
\r
482 //FIXME: consider the feasability of just calling a C function and just jumping to the address it returns.
\r
484 //save the state in place the rest of the engine can cope with
\r
485 //movl $i, pr_xstatement
\r
486 EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);
\r
487 //movl $(op[i].op-OP_CALL0), pr_argc
\r
488 EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);
\r
490 //figure out who we're calling, and what that involves
\r
492 LOADREG(glob + op[i].a, REG_EAX);
\r
493 //eax is now the func num
\r
496 EmitByte(0x89); EmitByte(0xc1);
\r
498 EmitByte(0xc1); EmitByte(0xe9); EmitByte(0x18);
\r
499 //ecx is now the progs num for the new func
\r
501 //cmp %ecx,pr_typecurrent
\r
502 EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);
\r
504 j1 = LocalJmp(OP_EQ_I);
\r
506 //can't handle switching progs
\r
508 //FIXME: recurse though PR_ExecuteProgram
\r
511 //call PR_ExecuteProgram
\r
513 //remember to change the je above
\r
515 //err... exit depth? no idea
\r
516 EmitByte(0xcd);EmitByte(op[i].op); //int $X
\r
524 LocalJmpLoc(j1,l1);
\r
526 //andl $0x00ffffff, %eax
\r
527 EmitByte(0x25);Emit4Byte(0x00ffffff);
\r
529 //mov $sizeof(dfunction_t),%edx
\r
530 EmitByte(0xba);Emit4Byte(sizeof(dfunction_t));
\r
532 EmitByte(0xf7); EmitByte(0xe2);
\r
533 //add pr_functions,%eax
\r
534 EmitByte(0x05); EmitAdr(pr_functions);
\r
536 //eax is now the dfunction_t to be called
\r
537 //edx is clobbered.
\r
540 EmitByte(0x8b);EmitByte(0x10);
\r
541 //edx is now the first statement number
\r
543 EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);
\r
545 j1 = LocalJmp(OP_LT_I);
\r
547 /* call the function*/
\r
553 EmitByte(0x68);EmitAdr(progfuncs);
\r
554 //call PR_EnterFunction
\r
555 EmitByte(0xe8);EmitFOffset(PR_EnterFunction, 4);
\r
557 EmitByte(0x83);EmitByte(0xc4);EmitByte(0xc);
\r
558 //eax is now the next statement number (first of the new function, usually equal to ecx, but not always)
\r
560 //jmp statementoffsets[%eax*4]
\r
561 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
\r
563 /*its a builtin, figure out which, and call it*/
\r
566 LocalJmpLoc(j1,l1);
\r
568 //push current_progstate->globals
\r
569 EmitByte(0x68);EmitAdr(current_progstate->globals);
\r
571 EmitByte(0x68);EmitAdr(progfuncs);
\r
573 EmitByte(0xf7);EmitByte(0xda);
\r
574 //call externs->globalbuiltins[%edx,4]
\r
575 //FIXME: make sure this dereferences
\r
576 EmitByte(0xff);EmitByte(0x14);EmitByte(0x95);EmitAdr(externs->globalbuiltins);
\r
578 EmitByte(0x83);EmitByte(0xc4);EmitByte(0x8);
\r
580 //but that builtin might have been Abort()
\r
582 LOADREG(&prinst->continuestatement, REG_EAX);
\r
584 EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);
\r
585 //je donebuiltincall
\r
586 j1 = LocalJmp(OP_EQ_I);
\r
588 //mov $-1,prinst->continuestatement
\r
589 EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement);Emit4Byte((unsigned int)-1);
\r
591 //jmp statementoffsets[%eax*4]
\r
592 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);
\r
596 LocalJmpLoc(j1,l1);
\r
601 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
603 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b);
\r
605 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
609 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
611 EmitByte(0xd8);EmitByte(0x35);EmitAdr(glob + op[i].b);
\r
613 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
617 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
619 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
621 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
625 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
627 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b);
\r
629 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
634 EmitByte(0xd9);EmitByte(0xee);
\r
636 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);
\r
638 EmitByte(0xdf);EmitByte(0xe0);
\r
640 EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);
\r
642 j1 = LocalJmp(OP_NE_F);
\r
644 STOREF(0.0f, glob + op[i].c);
\r
645 j2 = LocalJmp(OP_GOTO);
\r
650 STOREF(1.0f, glob + op[i].c);
\r
654 LocalJmpLoc(j1,l1);
\r
655 LocalJmpLoc(j2,l2);
\r
663 LOADREG(glob + op[i].a, REG_EAX);
\r
664 STOREREG(REG_EAX, glob + op[i].b);
\r
668 LOADREG(glob + op[i].a+0, REG_EAX);
\r
669 LOADREG(glob + op[i].a+1, REG_EDX);
\r
670 LOADREG(glob + op[i].a+2, REG_ECX);
\r
671 STOREREG(REG_EAX, glob + op[i].b+0);
\r
672 STOREREG(REG_EDX, glob + op[i].b+1);
\r
673 STOREREG(REG_ECX, glob + op[i].b+2);
\r
682 //a is the ent number, b is the field
\r
685 LOADREG(glob + op[i].a, REG_EAX);
\r
686 LOADREG(glob + op[i].b, REG_ECX);
\r
688 //FIXME: bound eax (ent number)
\r
689 //FIXME: bound ecx (field index)
\r
690 //mov (ebx,eax,4).%eax
\r
691 EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
\r
692 //eax is now an edictrun_t
\r
693 //mov fields(,%eax,4),%edx
\r
694 EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
\r
695 //edx is now the field array for that ent
\r
697 //mov fieldajust(%edx,%ecx,4),%eax
\r
698 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);
\r
700 STOREREG(REG_EAX, glob + op[i].c)
\r
702 if (op[i].op == OP_LOAD_V)
\r
704 //mov fieldajust+4(%edx,%ecx,4),%eax
\r
705 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);
\r
706 STOREREG(REG_EAX, glob + op[i].c+1)
\r
708 //mov fieldajust+8(%edx,%ecx,4),%eax
\r
709 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->fieldadjust*4);
\r
710 STOREREG(REG_EAX, glob + op[i].c+2)
\r
715 //a is the ent number, b is the field
\r
718 LOADREG(glob + op[i].a, REG_EAX);
\r
719 LOADREG(glob + op[i].b, REG_ECX);
\r
721 //FIXME: bound eax (ent number)
\r
722 //FIXME: bound ecx (field index)
\r
723 //mov (ebx,eax,4).%eax
\r
724 EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
\r
725 //eax is now an edictrun_t
\r
726 //mov fields(,%eax,4),%edx
\r
727 EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
\r
728 //edx is now the field array for that ent
\r
729 //mov fieldajust(%edx,%ecx,4),%eax //offset = progfuncs->fieldadjust
\r
730 //EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->fieldadjust*4);
\r
731 EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);
\r
732 STOREREG(REG_EAX, glob + op[i].c);
\r
737 case OP_STOREP_ENT:
\r
738 case OP_STOREP_FLD:
\r
739 case OP_STOREP_FNC:
\r
740 LOADREG(glob + op[i].a, REG_EAX);
\r
741 LOADREG(glob + op[i].b, REG_ECX);
\r
743 EmitByte(0x89);EmitByte(0x01);
\r
747 LOADREG(glob + op[i].b, REG_ECX);
\r
749 LOADREG(glob + op[i].a+0, REG_EAX);
\r
751 EmitByte(0x89);EmitByte(0x01);
\r
753 LOADREG(glob + op[i].a+1, REG_EAX);
\r
755 EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);
\r
757 LOADREG(glob + op[i].a+2, REG_EAX);
\r
759 EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);
\r
769 LOADREG(glob + op[i].a, REG_EAX);
\r
772 EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);
\r
773 j1 = LocalJmp(op[i].op);
\r
775 STOREF(0.0f, glob + op[i].c);
\r
776 j2 = LocalJmp(OP_GOTO);
\r
780 STOREF(1.0f, glob + op[i].c);
\r
783 LocalJmpLoc(j1,l1);
\r
784 LocalJmpLoc(j2,l2);
\r
791 EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00);
\r
792 j1 = LocalJmp(OP_NE_I);
\r
794 STOREF(1.0f, glob + op[i].c);
\r
795 j2 = LocalJmp(OP_GOTO);
\r
799 STOREF(0.0f, glob + op[i].c);
\r
802 LocalJmpLoc(j1,l1);
\r
803 LocalJmpLoc(j2,l2);
\r
806 case OP_BITOR_F: //floats...
\r
808 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
810 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
812 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
\r
814 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
\r
815 LOADREG(&ta, REG_EAX)
\r
817 EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);
\r
819 EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
\r
821 EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
826 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
828 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
830 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
\r
832 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
\r
833 /*two args are now at ta and tb*/
\r
834 LOADREG(&ta, REG_EAX)
\r
836 EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);
\r
837 /*we just wrote the int value to tb, convert that to a float and store it at c*/
\r
839 EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
\r
841 EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
845 //test floats properly, so we don't get confused with -0.0
\r
846 //FIXME: is it feasable to grab the value as an int and test it against 0x7fffffff?
\r
849 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
\r
851 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
853 EmitByte(0xdf); EmitByte(0xe0);
\r
855 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
857 EmitByte(0x75); EmitByte(0x1f);
\r
860 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
\r
862 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
864 EmitByte(0xdf); EmitByte(0xe0);
\r
866 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
868 EmitByte(0x75); EmitByte(0x0c);
\r
870 //mov float0,glob[C]
\r
871 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
\r
873 EmitByte(0xeb); EmitByte(0x0a);
\r
876 //mov float1,glob[C]
\r
877 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
\r
881 //test floats properly, so we don't get confused with -0.0
\r
884 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
\r
886 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
888 EmitByte(0xdf); EmitByte(0xe0);
\r
890 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
892 EmitByte(0x74); EmitByte(0x1f);
\r
895 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
\r
897 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
899 EmitByte(0xdf); EmitByte(0xe0);
\r
901 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
903 EmitByte(0x74); EmitByte(0x0c);
\r
905 //mov float0,glob[C]
\r
906 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
\r
908 EmitByte(0xeb); EmitByte(0x0a);
\r
911 //mov float1,glob[C]
\r
912 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
\r
920 LOADREG(glob + op[i].a, REG_ECX);
\r
922 LOADREG(glob + op[i].b, REG_EDI);
\r
924 //early out if they're equal
\r
926 EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);
\r
927 j1c = LocalJmp(OP_EQ_S);
\r
929 //if a is 0, check if b is ""
\r
931 EmitByte(0xe3); EmitByte(0x1a);
\r
933 //if b is 0, check if a is ""
\r
935 EmitByte(0x83); EmitByte(0xff); EmitByte(0x00);
\r
937 EmitByte(0x75); EmitByte(0x2a);
\r
942 EmitByte(0x68); EmitAdr(progfuncs);
\r
943 //call PR_StringToNative
\r
944 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
946 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
948 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
\r
949 j1b = LocalJmp(OP_EQ_S);
\r
950 j0b = LocalJmp(OP_GOTO);
\r
958 EmitByte(0x68); EmitAdr(progfuncs);
\r
959 //call PR_StringToNative
\r
960 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
962 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
964 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
\r
966 EmitByte(0x74); EmitByte(0x36);
\r
968 EmitByte(0xeb); EmitByte(0x28);
\r
972 LOADREG(glob + op[i].a, REG_ECX);
\r
976 EmitByte(0x68); EmitAdr(progfuncs);
\r
977 //call PR_StringToNative
\r
978 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
982 LOADREG(glob + op[i].b, REG_EDI);
\r
986 EmitByte(0x68); EmitAdr(progfuncs);
\r
987 //call PR_StringToNative
\r
988 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
990 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
996 EmitByte(0xe8); EmitFOffset(strcmp,4);
\r
998 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);
\r
1001 EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
\r
1002 j1 = LocalJmp(OP_EQ_S);
\r
1005 STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);
\r
1006 j2 = LocalJmp(OP_GOTO);
\r
1010 STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);
\r
1014 // LocalJmpLoc(j0b, l0);
\r
1015 LocalJmpLoc(j1, l1);
\r
1016 // LocalJmpLoc(j1b, l1);
\r
1017 LocalJmpLoc(j2, l2);
\r
1022 LOADREG(glob + op[i].a, REG_EAX)
\r
1025 EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
\r
1026 j2 = LocalJmp(OP_EQ_S);
\r
1031 EmitByte(0x68); EmitAdr(progfuncs);
\r
1032 //call PR_StringToNative
\r
1033 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
1035 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
1038 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
\r
1039 j1 = LocalJmp(OP_EQ_S);
\r
1041 STOREF(0.0f, glob + op[i].c);
\r
1042 j0 = LocalJmp(OP_GOTO);
\r
1046 STOREF(1.0f, glob + op[i].c);
\r
1049 LocalJmpLoc(j2, l1);
\r
1050 LocalJmpLoc(j1, l1);
\r
1051 LocalJmpLoc(j0, l2);
\r
1056 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
1058 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
\r
1060 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
\r
1063 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
1065 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
\r
1067 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
\r
1070 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
1072 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
\r
1074 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
\r
1078 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
1080 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+0);
\r
1082 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
\r
1085 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
1087 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+1);
\r
1089 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
\r
1092 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
1094 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+2);
\r
1096 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
\r
1100 //this is actually a dotproduct
\r
1102 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
1104 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+0);
\r
1107 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
1109 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+1);
\r
1112 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
1114 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+2);
\r
1117 EmitByte(0xde);EmitByte(0xc1);
\r
1119 EmitByte(0xde);EmitByte(0xc1);
\r
1122 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
1132 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
1134 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
1135 //fcomip %st(1),%st
\r
1136 EmitByte(0xdf);EmitByte(0xe9);
\r
1137 //fstp %st(0) (aka: pop)
\r
1138 EmitByte(0xdd);EmitByte(0xd8);
\r
1140 j1 = LocalJmp(op[i].op);
\r
1142 STOREF(0.0f, glob + op[i].c);
\r
1143 j2 = LocalJmp(OP_GOTO);
\r
1147 STOREF(1.0f, glob + op[i].c);
\r
1150 LocalJmpLoc(j1,l1);
\r
1151 LocalJmpLoc(j2,l2);
\r
1160 if (op[i].op == OP_MUL_FV)
\r
1172 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);
\r
1175 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);
\r
1177 EmitByte(0xd8);EmitByte(0xc9);
\r
1179 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
\r
1182 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);
\r
1184 EmitByte(0xd8);EmitByte(0xc9);
\r
1186 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
\r
1189 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);
\r
1191 EmitByte(0xd8);EmitByte(0xc9);
\r
1193 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
\r
1195 //fstp %st(0) (aka: pop)
\r
1196 EmitByte(0xdd);EmitByte(0xd8);
\r
1201 //externs->stateop(progfuncs, OPA->_float, OPB->function);
\r
1203 EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].b);
\r
1205 EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].a);
\r
1207 EmitByte(0x68); EmitAdr(progfuncs);
\r
1208 //call externs->stateop
\r
1209 EmitByte(0xe8); EmitFOffset(externs->stateop, 4);
\r
1211 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);
\r
1217 //fcomip %st(1),%st
\r
1220 //fcomip %st(1),%st
\r
1223 //fcomip %st(1),%st
\r
1234 EmitByte(0xcd);EmitByte(op[i].op);
\r
1235 printf("QCJIT: instruction %i is not implemented\n", op[i].op);
\r
1241 void *f0, *f1, *f2, *floc;
\r
1244 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
1246 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
\r
1247 //fcomip %st(1),%st
\r
1248 EmitByte(0xdf);EmitByte(0xe9);
\r
1249 //fstp %st(0) (aka: pop)
\r
1250 EmitByte(0xdd);EmitByte(0xd8);
\r
1252 /*if the condition is true, don't fail*/
\r
1253 j1 = LocalJmp(op[i].op);
\r
1255 STOREF(0.0f, glob + op[i].c);
\r
1256 f0 = LocalJmp(OP_GOTO);
\r
1259 LocalJmpLoc(j1,l1);
\r
1263 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
1265 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
\r
1266 //fcomip %st(1),%st
\r
1267 EmitByte(0xdf);EmitByte(0xe9);
\r
1268 //fstp %st(0) (aka: pop)
\r
1269 EmitByte(0xdd);EmitByte(0xd8);
\r
1271 /*if the condition is true, don't fail*/
\r
1272 j1 = LocalJmp(op[i].op);
\r
1274 STOREF(0.0f, glob + op[i].c);
\r
1275 f1 = LocalJmp(OP_GOTO);
\r
1278 LocalJmpLoc(j1,l1);
\r
1282 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
1284 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
\r
1285 //fcomip %st(1),%st
\r
1286 EmitByte(0xdf);EmitByte(0xe9);
\r
1287 //fstp %st(0) (aka: pop)
\r
1288 EmitByte(0xdd);EmitByte(0xd8);
\r
1290 /*if the condition is true, don't fail*/
\r
1291 j1 = LocalJmp(op[i].op);
\r
1293 STOREF(0.0f, glob + op[i].c);
\r
1294 f2 = LocalJmp(OP_GOTO);
\r
1297 LocalJmpLoc(j1,l1);
\r
1300 STOREF(1.0f, glob + op[i].c);
\r
1302 floc = LocalLoc();
\r
1303 LocalJmpLoc(f0,floc);
\r
1304 LocalJmpLoc(f1,floc);
\r
1305 LocalJmpLoc(f2,floc);
\r
1309 /*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/
\r
1310 case OP_FETCH_GBL_F:
\r
1311 case OP_FETCH_GBL_S:
\r
1312 case OP_FETCH_GBL_E:
\r
1313 case OP_FETCH_GBL_FNC:
\r
1314 case OP_FETCH_GBL_V:
\r
1316 unsigned int max = ((unsigned int*)glob)[op[i].a-1];
\r
1317 unsigned int base = op[i].a;
\r
1319 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
1321 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
\r
1322 LOADREG(&ta, REG_EAX)
\r
1323 //FIXME: if eax >= $max, abort
\r
1325 if (op[i].op == OP_FETCH_GBL_V)
\r
1327 /*scale the index by 3*/
\r
1328 SETREGI(3, REG_EDX)
\r
1330 EmitByte(0xf7); EmitByte(0xe2);
\r
1334 //mov &glob[base](,%eax,4),%edx
\r
1335 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));
\r
1336 STOREREG(REG_EDX, glob + op[i].c+0)
\r
1337 if (op[i].op == OP_FETCH_GBL_V)
\r
1339 //mov &glob[base+1](,%eax,4),%edx
\r
1340 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));
\r
1341 STOREREG(REG_EDX, glob + op[i].c+1)
\r
1342 //mov &glob[base+2](,%eax,4),%edx
\r
1343 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));
\r
1344 STOREREG(REG_EDX, glob + op[i].c+2)
\r
1349 /*fteqcc generates these from writing 'fast arrays'*/
\r
1350 case OP_GLOBALADDRESS:
\r
1351 LOADREG(glob + op[i].b, REG_EAX);
\r
1352 //lea &glob[A](, %eax, 4),%eax
\r
1353 EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);
\r
1354 STOREREG(REG_EAX, glob + op[i].c);
\r
1356 // case OP_BOUNDCHECK:
\r
1357 //FIXME: assert b <= a < c
\r
1359 case OP_CONV_FTOI:
\r
1361 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
1363 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
1366 LOADREG(glob + op[i].a, REG_EAX);
\r
1367 //mull glob[C] (arg*eax => edx:eax)
\r
1368 EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);
\r
1369 STOREREG(REG_EAX, glob + op[i].c);
\r
1372 /*other extended opcodes*/
\r
1374 LOADREG(glob + op[i].a, REG_EAX)
\r
1376 EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
1377 STOREREG(REG_EAX, glob + op[i].c);
\r
1383 enum qcop_e e = op[i].op;
\r
1384 printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);
\r
1388 free(jit->statementjumps); //[MAX_STATEMENTS]
\r
1389 free(jit->statementoffsets); //[MAX_STATEMENTS]
\r
1398 /* most likely want executable memory calls somewhere else more common */
\r
1403 //this memory is on the heap.
\r
1404 //this means that we must maintain read/write protection, or libc will crash us
\r
1405 VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);
\r
1408 mprotect(jit->code, jit->codesize, PROT_READ|PROT_EXEC);
\r
1411 // externs->WriteFile("jit.x86", jit->code, jit->codesize);
\r
1416 float foo(float arg)
\r
1426 void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)
\r
1429 //call, it clobbers pretty much everything.
\r
1430 asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");
\r
1431 #elif defined(_MSC_VER)
\r
1432 void *entry = jit->statementoffsets[statement+1];
\r
1433 void *edicttable = prinst->edicttable;
\r
1437 mov ebx,edicttable
\r
1442 #error "Sorry, no idea how to enter assembler safely for your compiler"
\r