2 when I say JIT, I mean load time, not execution time.
\r
5 qc jump offsets are all constants. we have no variable offset jumps (other than function calls/returns)
\r
6 field remapping... fields are in place, and cannot be adjusted. if a field is not set to 0, its assumed to be a constant.
\r
9 none at the moment...
\r
10 instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C
\r
11 it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests
\r
13 this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.
\r
14 certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.
\r
16 OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.
\r
18 some instructions are jump sites. any cache must be flushed before the start of the instruction.
\r
19 some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.
\r
20 such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.
\r
22 store locals on a proper stack instead of the current absurd mechanism.
\r
25 ebx - prinst->edicttable
\r
28 esi - debug opcode number
\r
29 edi - tmp (because its preserved by subfunctions
\r
32 to use gas to provide binary opcodes:
\r
33 vim -N blob.s && as blob.s && objdump.exe -d a.out
\r
36 notable mods to test:
\r
37 prydon gate, due to fpu mangling to carry values between maps
\r
41 #include "progsint.h"
\r
45 static float ta, tb, nullfloat=0;
\r
49 unsigned int *statementjumps; //[MAX_STATEMENTS*3]
\r
50 unsigned char **statementoffsets; //[MAX_STATEMENTS]
\r
51 unsigned int numjumps;
\r
52 unsigned char *code;
\r
53 unsigned int codesize;
\r
54 unsigned int jitstatements;
\r
57 static void EmitByte(struct jitstate *jit, unsigned char byte)
\r
59 jit->code[jit->codesize++] = byte;
\r
61 static void Emit4Byte(struct jitstate *jit, unsigned int value)
\r
63 jit->code[jit->codesize++] = (value>> 0)&0xff;
\r
64 jit->code[jit->codesize++] = (value>> 8)&0xff;
\r
65 jit->code[jit->codesize++] = (value>>16)&0xff;
\r
66 jit->code[jit->codesize++] = (value>>24)&0xff;
\r
68 static void EmitAdr(struct jitstate *jit, void *value)
\r
70 Emit4Byte(jit, (unsigned int)value);
\r
72 static void EmitFloat(struct jitstate *jit, float value)
\r
74 union {float f; unsigned int i;} u;
\r
76 Emit4Byte(jit, u.i);
\r
78 static void Emit2Byte(struct jitstate *jit, unsigned short value)
\r
80 jit->code[jit->codesize++] = (value>> 0)&0xff;
\r
81 jit->code[jit->codesize++] = (value>> 8)&0xff;
\r
84 static void EmitFOffset(struct jitstate *jit, void *func, int bias)
\r
86 union {void *f; unsigned int i;} u;
\r
88 u.i -= (unsigned int)&jit->code[jit->codesize+bias];
\r
89 Emit4Byte(jit, u.i);
\r
92 static void Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)
\r
94 jit->statementjumps[jit->numjumps++] = jit->codesize;
\r
95 jit->statementjumps[jit->numjumps++] = statementnum;
\r
96 jit->statementjumps[jit->numjumps++] = offset;
\r
98 //the offset is filled in later
\r
113 #define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);
\r
114 #define CLEARREG(reg) XOR(reg,reg)
\r
115 #define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
\r
116 #define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
\r
117 #define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);
\r
118 #define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);
\r
119 #define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);
\r
121 static void *LocalLoc(struct jitstate *jit)
\r
123 return &jit->code[jit->codesize];
\r
125 static void *LocalJmp(struct jitstate *jit, int cond)
\r
127 /*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/
\r
128 if (cond == OP_GOTO)
\r
129 EmitByte(jit, 0xeb); //jmp
\r
130 else if (cond == OP_LE_F)
\r
131 EmitByte(jit, 0x76); //jbe
\r
132 else if (cond == OP_GE_F)
\r
133 EmitByte(jit, 0x73); //jae
\r
134 else if (cond == OP_LT_F)
\r
135 EmitByte(jit, 0x72); //jb
\r
136 else if (cond == OP_GT_F)
\r
137 EmitByte(jit, 0x77); //ja
\r
138 else if (cond == OP_LE_I)
\r
139 EmitByte(jit, 0x7e); //jle
\r
140 else if (cond == OP_LT_I)
\r
141 EmitByte(jit, 0x7c); //jl
\r
142 else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)
\r
143 EmitByte(jit, 0x75); //jne
\r
144 else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)
\r
145 EmitByte(jit, 0x74); //je
\r
146 #if defined(DEBUG) && defined(_WIN32)
\r
149 OutputDebugString("oh noes!\n");
\r
156 return LocalLoc(jit);
\r
158 static void LocalJmpLoc(void *jmp, void *loc)
\r
161 unsigned char *a = jmp;
\r
162 offs = (char *)loc - (char *)jmp;
\r
163 #if defined(DEBUG) && defined(_WIN32)
\r
164 if (offs > 127 || offs <= -128)
\r
166 OutputDebugStringA("bad jump\n");
\r
175 static void FixupJumps(struct jitstate *jit)
\r
178 unsigned char *codesrc;
\r
179 unsigned char *codedst;
\r
180 unsigned int offset;
\r
184 for (j = 0; j < jit->numjumps;)
\r
186 v = jit->statementjumps[j++];
\r
187 codesrc = &jit->code[v];
\r
189 v = jit->statementjumps[j++];
\r
190 codedst = jit->statementoffsets[v];
\r
192 v = jit->statementjumps[j++];
\r
193 offset = (int)(codedst - (codesrc-v)); //3rd term because the jump is relative to the instruction start, not the instruction's offset
\r
195 codesrc[0] = (offset>> 0)&0xff;
\r
196 codesrc[1] = (offset>> 8)&0xff;
\r
197 codesrc[2] = (offset>>16)&0xff;
\r
198 codesrc[3] = (offset>>24)&0xff;
\r
202 int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);
\r
203 int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);
\r
205 void PR_CloseJit(struct jitstate *jit)
\r
207 free(jit->statementjumps);
\r
208 free(jit->statementoffsets);
\r
212 #define EmitByte(v) EmitByte(jit, v)
\r
213 #define EmitAdr(v) EmitAdr(jit, v)
\r
214 #define EmitFOffset(a,b) EmitFOffset(jit, a, b)
\r
215 #define Emit4ByteJump(a,b) Emit4ByteJump(jit, a, b)
\r
216 #define Emit4Byte(v) Emit4Byte(jit, v)
\r
217 #define EmitFloat(v) EmitFloat(jit, v)
\r
218 #define LocalJmp(v) LocalJmp(jit, v)
\r
219 #define LocalLoc() LocalLoc(jit)
\r
222 struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)
\r
224 struct jitstate *jit;
\r
230 dstatement16_t *op = (dstatement16_t*)current_progstate->statements;
\r
231 unsigned int numstatements = current_progstate->progs->numstatements;
\r
232 int *glob = (int*)current_progstate->globals;
\r
234 if (current_progstate->numbuiltins)
\r
236 jit = malloc(sizeof(*jit));
\r
237 jit->jitstatements = numstatements;
\r
239 jit->statementjumps = malloc(numstatements*12);
\r
240 jit->statementoffsets = malloc(numstatements*4);
\r
241 jit->code = malloc(numstatements*500);
\r
250 for (i = 0; i < numstatements; i++)
\r
252 jit->statementoffsets[i] = &jit->code[jit->codesize];
\r
255 SETREGI(op[i].op, REG_ESI);
\r
265 EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
\r
267 EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);
\r
275 EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
\r
277 EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);
\r
281 EmitByte(0xE9);Emit4ByteJump(i + (signed short)op[i].a, -4);
\r
287 //done and return are the same
\r
289 //part 1: store A into OFS_RETURN
\r
293 //assumption: anything that returns address 0 is a void or zero return.
\r
294 //thus clear eax and copy that to the return vector.
\r
296 STOREREG(REG_EAX, glob + OFS_RETURN+0);
\r
297 STOREREG(REG_EAX, glob + OFS_RETURN+1);
\r
298 STOREREG(REG_EAX, glob + OFS_RETURN+2);
\r
302 LOADREG(glob + op[i].a+0, REG_EAX);
\r
303 LOADREG(glob + op[i].a+1, REG_EDX);
\r
304 LOADREG(glob + op[i].a+2, REG_ECX);
\r
305 STOREREG(REG_EAX, glob + OFS_RETURN+0);
\r
306 STOREREG(REG_EDX, glob + OFS_RETURN+1);
\r
307 STOREREG(REG_ECX, glob + OFS_RETURN+2);
\r
310 //call leavefunction to get the return address
\r
313 EmitByte(0x68);EmitAdr(progfuncs);
\r
314 // call PR_LeaveFunction
\r
315 EmitByte(0xe8);EmitFOffset(PR_LeaveFunction, 4);
\r
317 EmitByte(0x83);EmitByte(0xc4);EmitByte(0x04);
\r
318 // movl pr_depth,%edx
\r
319 EmitByte(0x8b);EmitByte(0x15);EmitAdr(&pr_depth);
\r
320 // cmp prinst->exitdepth,%edx
\r
321 EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst->exitdepth);
\r
323 j1 = LocalJmp(OP_EQ_E);
\r
324 // mov statementoffsets[%eax*4],%eax
\r
325 EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
\r
327 EmitByte(0xff);EmitByte(0xe0);
\r
333 LocalJmpLoc(j1,l1);
\r
346 //save the state in place the rest of the engine can cope with
\r
347 //movl $i, pr_xstatement
\r
348 EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);
\r
349 //movl $(op[i].op-OP_CALL0), pr_argc
\r
350 EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);
\r
352 //figure out who we're calling, and what that involves
\r
354 LOADREG(glob + op[i].a, REG_EAX);
\r
355 //eax is now the func num
\r
358 EmitByte(0x89); EmitByte(0xc1);
\r
360 EmitByte(0xc1); EmitByte(0xe9); EmitByte(0x18);
\r
361 //ecx is now the progs num for the new func
\r
363 //cmp %ecx,pr_typecurrent
\r
364 EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);
\r
366 j1 = LocalJmp(OP_EQ_I);
\r
368 //can't handle switching progs
\r
370 //FIXME: recurse though PR_ExecuteProgram
\r
373 //call PR_ExecuteProgram
\r
375 //remember to change the je above
\r
377 //err... exit depth? no idea
\r
378 EmitByte(0xcd);EmitByte(op[i].op); //int $X
\r
386 LocalJmpLoc(j1,l1);
\r
388 //andl $0x00ffffff, %eax
\r
389 EmitByte(0x25);Emit4Byte(0x00ffffff);
\r
391 //mov $sizeof(dfunction_t),%edx
\r
392 EmitByte(0xba);Emit4Byte(sizeof(dfunction_t));
\r
394 EmitByte(0xf7); EmitByte(0xe2);
\r
395 //add pr_functions,%eax
\r
396 EmitByte(0x05); EmitAdr(pr_functions);
\r
398 //eax is now the dfunction_t to be called
\r
399 //edx is clobbered.
\r
402 EmitByte(0x8b);EmitByte(0x10);
\r
403 //edx is now the first statement number
\r
405 EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);
\r
407 j1 = LocalJmp(OP_LT_I);
\r
409 /* call the function*/
\r
415 EmitByte(0x68);EmitAdr(progfuncs);
\r
416 //call PR_EnterFunction
\r
417 EmitByte(0xe8);EmitFOffset(PR_EnterFunction, 4);
\r
419 EmitByte(0x83);EmitByte(0xc4);EmitByte(0xc);
\r
420 //eax is now the next statement number (first of the new function, usually equal to ecx, but not always)
\r
422 //jmp statementoffsets[%eax*4]
\r
423 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
\r
425 /*its a builtin, figure out which, and call it*/
\r
428 LocalJmpLoc(j1,l1);
\r
430 //push current_progstate->globals
\r
431 EmitByte(0x68);EmitAdr(current_progstate->globals);
\r
433 EmitByte(0x68);EmitAdr(progfuncs);
\r
435 EmitByte(0xf7);EmitByte(0xda);
\r
436 //call externs->globalbuiltins[%edx,4]
\r
437 //FIXME: make sure this dereferences
\r
438 EmitByte(0xff);EmitByte(0x14);EmitByte(0x95);EmitAdr(externs->globalbuiltins);
\r
440 EmitByte(0x83);EmitByte(0xc4);EmitByte(0x8);
\r
442 //but that builtin might have been Abort()
\r
444 LOADREG(&prinst->continuestatement, REG_EAX);
\r
446 EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);
\r
447 //je donebuiltincall
\r
448 j1 = LocalJmp(OP_EQ_I);
\r
450 //mov $-1,prinst->continuestatement
\r
451 EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement);Emit4Byte((unsigned int)-1);
\r
453 //jmp statementoffsets[%eax*4]
\r
454 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);
\r
458 LocalJmpLoc(j1,l1);
\r
463 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
465 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b);
\r
467 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
471 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
473 EmitByte(0xd8);EmitByte(0x35);EmitAdr(glob + op[i].b);
\r
475 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
479 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
481 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
483 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
487 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
489 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b);
\r
491 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
496 EmitByte(0xd9);EmitByte(0xee);
\r
498 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);
\r
500 EmitByte(0xdf);EmitByte(0xe0);
\r
502 EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);
\r
504 j1 = LocalJmp(OP_NE_F);
\r
506 STOREF(0.0f, glob + op[i].c);
\r
507 j2 = LocalJmp(OP_GOTO);
\r
512 STOREF(1.0f, glob + op[i].c);
\r
516 LocalJmpLoc(j1,l1);
\r
517 LocalJmpLoc(j2,l2);
\r
525 LOADREG(glob + op[i].a, REG_EAX);
\r
526 STOREREG(REG_EAX, glob + op[i].b);
\r
530 LOADREG(glob + op[i].a+0, REG_EAX);
\r
531 LOADREG(glob + op[i].a+1, REG_EDX);
\r
532 LOADREG(glob + op[i].a+2, REG_ECX);
\r
533 STOREREG(REG_EAX, glob + op[i].b+0);
\r
534 STOREREG(REG_EDX, glob + op[i].b+1);
\r
535 STOREREG(REG_ECX, glob + op[i].b+2);
\r
544 //a is the ent number, b is the field
\r
547 LOADREG(glob + op[i].a, REG_EAX);
\r
548 LOADREG(glob + op[i].b, REG_ECX);
\r
550 //FIXME: bound eax (ent number)
\r
551 //FIXME: bound ecx (field index)
\r
552 //mov (ebx,eax,4).%eax
\r
553 EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
\r
554 //eax is now an edictrun_t
\r
555 //mov fields(,%eax,4),%edx
\r
556 EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
\r
557 //edx is now the field array for that ent
\r
559 //mov fieldajust(%edx,%ecx,4),%eax
\r
560 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);
\r
562 STOREREG(REG_EAX, glob + op[i].c)
\r
564 if (op[i].op == OP_LOAD_V)
\r
566 //mov fieldajust+4(%edx,%ecx,4),%eax
\r
567 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);
\r
568 STOREREG(REG_EAX, glob + op[i].c+1)
\r
570 //mov fieldajust+8(%edx,%ecx,4),%eax
\r
571 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->fieldadjust*4);
\r
572 STOREREG(REG_EAX, glob + op[i].c+2)
\r
577 //a is the ent number, b is the field
\r
580 LOADREG(glob + op[i].a, REG_EAX);
\r
581 LOADREG(glob + op[i].b, REG_ECX);
\r
583 //FIXME: bound eax (ent number)
\r
584 //FIXME: bound ecx (field index)
\r
585 //mov (ebx,eax,4).%eax
\r
586 EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
\r
587 //eax is now an edictrun_t
\r
588 //mov fields(,%eax,4),%edx
\r
589 EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
\r
590 //edx is now the field array for that ent
\r
591 //mov fieldajust(%edx,%ecx,4),%eax //offset = progfuncs->fieldadjust
\r
592 //EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->fieldadjust*4);
\r
593 EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);
\r
594 STOREREG(REG_EAX, glob + op[i].c);
\r
599 case OP_STOREP_ENT:
\r
600 case OP_STOREP_FLD:
\r
601 case OP_STOREP_FNC:
\r
602 LOADREG(glob + op[i].a, REG_EAX);
\r
603 LOADREG(glob + op[i].b, REG_ECX);
\r
605 EmitByte(0x89);EmitByte(0x01);
\r
609 LOADREG(glob + op[i].b, REG_ECX);
\r
611 LOADREG(glob + op[i].a+0, REG_EAX);
\r
613 EmitByte(0x89);EmitByte(0x01);
\r
615 LOADREG(glob + op[i].a+1, REG_EAX);
\r
617 EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);
\r
619 LOADREG(glob + op[i].a+2, REG_EAX);
\r
621 EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);
\r
631 LOADREG(glob + op[i].a, REG_EAX);
\r
634 EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);
\r
635 j1 = LocalJmp(op[i].op);
\r
637 STOREF(0.0f, glob + op[i].c);
\r
638 j2 = LocalJmp(OP_GOTO);
\r
642 STOREF(1.0f, glob + op[i].c);
\r
645 LocalJmpLoc(j1,l1);
\r
646 LocalJmpLoc(j2,l2);
\r
653 EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00);
\r
654 j1 = LocalJmp(OP_NE_I);
\r
656 STOREF(1.0f, glob + op[i].c);
\r
657 j2 = LocalJmp(OP_GOTO);
\r
661 STOREF(0.0f, glob + op[i].c);
\r
664 LocalJmpLoc(j1,l1);
\r
665 LocalJmpLoc(j2,l2);
\r
668 case OP_BITOR_F: //floats...
\r
670 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
672 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
674 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
\r
676 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
\r
677 LOADREG(&ta, REG_EAX)
\r
679 EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);
\r
681 EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
\r
683 EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
688 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
690 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
692 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
\r
694 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
\r
695 /*two args are now at ta and tb*/
\r
696 LOADREG(&ta, REG_EAX)
\r
698 EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);
\r
699 /*we just wrote the int value to tb, convert that to a float and store it at c*/
\r
701 EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
\r
703 EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
707 //test floats properly, so we don't get confused with -0.0
\r
710 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
\r
712 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
714 EmitByte(0xdf); EmitByte(0xe0);
\r
716 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
718 EmitByte(0x75); EmitByte(0x1f);
\r
721 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
\r
723 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
725 EmitByte(0xdf); EmitByte(0xe0);
\r
727 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
729 EmitByte(0x75); EmitByte(0x0c);
\r
731 //mov float0,glob[C]
\r
732 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
\r
734 EmitByte(0xeb); EmitByte(0x0a);
\r
737 //mov float1,glob[C]
\r
738 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
\r
742 //test floats properly, so we don't get confused with -0.0
\r
745 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
\r
747 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
749 EmitByte(0xdf); EmitByte(0xe0);
\r
751 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
753 EmitByte(0x74); EmitByte(0x1f);
\r
756 EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
\r
758 EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
\r
760 EmitByte(0xdf); EmitByte(0xe0);
\r
762 EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
\r
764 EmitByte(0x74); EmitByte(0x0c);
\r
766 //mov float0,glob[C]
\r
767 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
\r
769 EmitByte(0xeb); EmitByte(0x0a);
\r
772 //mov float1,glob[C]
\r
773 EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
\r
781 LOADREG(glob + op[i].a, REG_ECX);
\r
783 LOADREG(glob + op[i].b, REG_EDI);
\r
785 //early out if they're equal
\r
787 EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);
\r
788 j1c = LocalJmp(OP_EQ_S);
\r
790 //if a is 0, check if b is ""
\r
792 EmitByte(0xe3); EmitByte(0x1a);
\r
794 //if b is 0, check if a is ""
\r
796 EmitByte(0x83); EmitByte(0xff); EmitByte(0x00);
\r
798 EmitByte(0x75); EmitByte(0x2a);
\r
803 EmitByte(0x68); EmitAdr(progfuncs);
\r
804 //call PR_StringToNative
\r
805 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
807 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
809 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
\r
810 j1b = LocalJmp(OP_EQ_S);
\r
811 j0b = LocalJmp(OP_GOTO);
\r
819 EmitByte(0x68); EmitAdr(progfuncs);
\r
820 //call PR_StringToNative
\r
821 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
823 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
825 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
\r
827 EmitByte(0x74); EmitByte(0x36);
\r
829 EmitByte(0xeb); EmitByte(0x28);
\r
833 LOADREG(glob + op[i].a, REG_ECX);
\r
837 EmitByte(0x68); EmitAdr(progfuncs);
\r
838 //call PR_StringToNative
\r
839 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
843 LOADREG(glob + op[i].b, REG_EDI);
\r
847 EmitByte(0x68); EmitAdr(progfuncs);
\r
848 //call PR_StringToNative
\r
849 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
851 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
857 EmitByte(0xe8); EmitFOffset(strcmp,4);
\r
859 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);
\r
862 EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
\r
863 j1 = LocalJmp(OP_EQ_S);
\r
866 STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);
\r
867 j2 = LocalJmp(OP_GOTO);
\r
871 STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);
\r
875 // LocalJmpLoc(j0b, l0);
\r
876 LocalJmpLoc(j1, l1);
\r
877 // LocalJmpLoc(j1b, l1);
\r
878 LocalJmpLoc(j2, l2);
\r
883 LOADREG(glob + op[i].a, REG_EAX)
\r
886 EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
\r
887 j2 = LocalJmp(OP_EQ_S);
\r
892 EmitByte(0x68); EmitAdr(progfuncs);
\r
893 //call PR_StringToNative
\r
894 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
\r
896 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
\r
899 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
\r
900 j1 = LocalJmp(OP_EQ_S);
\r
902 STOREF(0.0f, glob + op[i].c);
\r
903 j0 = LocalJmp(OP_GOTO);
\r
907 STOREF(1.0f, glob + op[i].c);
\r
910 LocalJmpLoc(j2, l1);
\r
911 LocalJmpLoc(j1, l1);
\r
912 LocalJmpLoc(j0, l2);
\r
917 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
919 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
\r
921 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
\r
924 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
926 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
\r
928 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
\r
931 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
933 EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
\r
935 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
\r
939 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
941 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+0);
\r
943 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
\r
946 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
948 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+1);
\r
950 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
\r
953 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
955 EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+2);
\r
957 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
\r
961 //this is actually a dotproduct
\r
963 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
965 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+0);
\r
968 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
970 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+1);
\r
973 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
975 EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+2);
\r
978 EmitByte(0xde);EmitByte(0xc1);
\r
980 EmitByte(0xde);EmitByte(0xc1);
\r
983 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
993 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
995 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
996 //fcomip %st(1),%st
\r
997 EmitByte(0xdf);EmitByte(0xe9);
\r
998 //fstp %st(0) (aka: pop)
\r
999 EmitByte(0xdd);EmitByte(0xd8);
\r
1001 j1 = LocalJmp(op[i].op);
\r
1003 STOREF(0.0f, glob + op[i].c);
\r
1004 j2 = LocalJmp(OP_GOTO);
\r
1008 STOREF(1.0f, glob + op[i].c);
\r
1011 LocalJmpLoc(j1,l1);
\r
1012 LocalJmpLoc(j2,l2);
\r
1021 if (op[i].op == OP_MUL_FV)
\r
1033 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);
\r
1036 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);
\r
1038 EmitByte(0xd8);EmitByte(0xc9);
\r
1040 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
\r
1043 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);
\r
1045 EmitByte(0xd8);EmitByte(0xc9);
\r
1047 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
\r
1050 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);
\r
1052 EmitByte(0xd8);EmitByte(0xc9);
\r
1054 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
\r
1056 //fstp %st(0) (aka: pop)
\r
1057 EmitByte(0xdd);EmitByte(0xd8);
\r
1062 //externs->stateop(progfuncs, OPA->_float, OPB->function);
\r
1064 EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].b);
\r
1066 EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].a);
\r
1068 EmitByte(0x68); EmitAdr(progfuncs);
\r
1069 //call externs->stateop
\r
1070 EmitByte(0xe8); EmitFOffset(externs->stateop, 4);
\r
1072 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);
\r
1078 //fcomip %st(1),%st
\r
1081 //fcomip %st(1),%st
\r
1084 //fcomip %st(1),%st
\r
1095 EmitByte(0xcd);EmitByte(op[i].op);
\r
1096 printf("QCJIT: instruction %i is not implemented\n", op[i].op);
\r
1102 void *f0, *f1, *f2, *floc;
\r
1105 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
\r
1107 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
\r
1108 //fcomip %st(1),%st
\r
1109 EmitByte(0xdf);EmitByte(0xe9);
\r
1110 //fstp %st(0) (aka: pop)
\r
1111 EmitByte(0xdd);EmitByte(0xd8);
\r
1113 /*if the condition is true, don't fail*/
\r
1114 j1 = LocalJmp(op[i].op);
\r
1116 STOREF(0.0f, glob + op[i].c);
\r
1117 f0 = LocalJmp(OP_GOTO);
\r
1120 LocalJmpLoc(j1,l1);
\r
1124 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
\r
1126 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
\r
1127 //fcomip %st(1),%st
\r
1128 EmitByte(0xdf);EmitByte(0xe9);
\r
1129 //fstp %st(0) (aka: pop)
\r
1130 EmitByte(0xdd);EmitByte(0xd8);
\r
1132 /*if the condition is true, don't fail*/
\r
1133 j1 = LocalJmp(op[i].op);
\r
1135 STOREF(0.0f, glob + op[i].c);
\r
1136 f1 = LocalJmp(OP_GOTO);
\r
1139 LocalJmpLoc(j1,l1);
\r
1143 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
\r
1145 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
\r
1146 //fcomip %st(1),%st
\r
1147 EmitByte(0xdf);EmitByte(0xe9);
\r
1148 //fstp %st(0) (aka: pop)
\r
1149 EmitByte(0xdd);EmitByte(0xd8);
\r
1151 /*if the condition is true, don't fail*/
\r
1152 j1 = LocalJmp(op[i].op);
\r
1154 STOREF(0.0f, glob + op[i].c);
\r
1155 f2 = LocalJmp(OP_GOTO);
\r
1158 LocalJmpLoc(j1,l1);
\r
1161 STOREF(1.0f, glob + op[i].c);
\r
1163 floc = LocalLoc();
\r
1164 LocalJmpLoc(f0,floc);
\r
1165 LocalJmpLoc(f1,floc);
\r
1166 LocalJmpLoc(f2,floc);
\r
1170 /*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/
\r
1171 case OP_FETCH_GBL_F:
\r
1172 case OP_FETCH_GBL_S:
\r
1173 case OP_FETCH_GBL_E:
\r
1174 case OP_FETCH_GBL_FNC:
\r
1175 case OP_FETCH_GBL_V:
\r
1177 unsigned int max = ((unsigned int*)glob)[op[i].a-1];
\r
1178 unsigned int base = op[i].a;
\r
1180 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
1182 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
\r
1183 LOADREG(&ta, REG_EAX)
\r
1184 //FIXME: if eax >= $max, abort
\r
1186 if (op[i].op == OP_FETCH_GBL_V)
\r
1188 /*scale the index by 3*/
\r
1189 SETREGI(3, REG_EDX)
\r
1191 EmitByte(0xf7); EmitByte(0xe2);
\r
1195 //mov &glob[base](,%eax,4),%edx
\r
1196 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));
\r
1197 STOREREG(REG_EDX, glob + op[i].c+0)
\r
1198 if (op[i].op == OP_FETCH_GBL_V)
\r
1200 //mov &glob[base+1](,%eax,4),%edx
\r
1201 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));
\r
1202 STOREREG(REG_EDX, glob + op[i].c+1)
\r
1203 //mov &glob[base+2](,%eax,4),%edx
\r
1204 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));
\r
1205 STOREREG(REG_EDX, glob + op[i].c+2)
\r
1210 /*fteqcc generates these from writing 'fast arrays'*/
\r
1211 case OP_GLOBALADDRESS:
\r
1212 LOADREG(glob + op[i].b, REG_EAX);
\r
1213 //lea &glob[A](, %eax, 4),%eax
\r
1214 EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);
\r
1215 STOREREG(REG_EAX, glob + op[i].c);
\r
1217 // case OP_BOUNDCHECK:
\r
1218 //FIXME: assert b <= a < c
\r
1220 case OP_CONV_FTOI:
\r
1222 EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
\r
1224 EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);
\r
1227 LOADREG(glob + op[i].a, REG_EAX);
\r
1228 //mull glob[C] (arg*eax => edx:eax)
\r
1229 EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);
\r
1230 STOREREG(REG_EAX, glob + op[i].c);
\r
1233 /*other extended opcodes*/
\r
1235 LOADREG(glob + op[i].a, REG_EAX)
\r
1237 EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);
\r
1238 STOREREG(REG_EAX, glob + op[i].c);
\r
1244 enum qcop_e e = op[i].op;
\r
1245 printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);
\r
1249 free(jit->statementjumps); //[MAX_STATEMENTS]
\r
1250 free(jit->statementoffsets); //[MAX_STATEMENTS]
\r
1263 //this memory is on the heap.
\r
1264 //this means that we must maintain read/write protection, or libc will crash us
\r
1265 VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);
\r
1269 // externs->WriteFile("jit.x86", jit->code, jit->codesize);
\r
1274 float foo(float arg)
\r
1284 void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)
\r
1287 //call, it clobbers pretty much everything.
\r
1288 asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");
\r
1289 #elif defined(_MSC_VER)
\r
1290 void *entry = jit->statementoffsets[statement+1];
\r
1291 void *edicttable = prinst->edicttable;
\r
1295 mov ebx,edicttable
\r
1300 #error "Sorry, no idea how to enter assembler safely for your compiler"
\r