]> git.xonotic.org Git - voretournament/voretournament.git/blob - misc/source/fteqcc-src/pr_x86.c
Update fteqcc source
[voretournament/voretournament.git] / misc / source / fteqcc-src / pr_x86.c
1 /*\r
2 when I say JIT, I mean load time, not execution time.\r
3 \r
4 notes:\r
5         qc jump offsets are all constants. we have no variable offset jumps (other than function calls/returns)\r
6         field remapping... fields are in place, and cannot be adjusted. if a field is not set to 0, its assumed to be a constant.\r
7 \r
8 optimisations:\r
9         none at the moment...\r
10         instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C\r
11         it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests\r
12 \r
13         this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.\r
14         certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.\r
15 \r
16         OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.\r
17 \r
18         some instructions are jump sites. any cache must be flushed before the start of the instruction.\r
19         some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.\r
20         such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.\r
21 \r
22         store locals on a proper stack instead of the current absurd mechanism.\r
23 \r
24         eax - tmp\r
25         ebx - prinst->edicttable\r
26         ecx     - tmp\r
27         edx - tmp\r
28         esi - debug opcode number\r
29         edi - tmp (because its preserved by subfunctions\r
30         ebp -\r
31 \r
32   to use gas to provide binary opcodes:\r
33   vim -N blob.s && as blob.s && objdump.exe -d a.out\r
34 \r
35 \r
36   notable mods to test:\r
37   prydon gate, due to fpu mangling to carry values between maps\r
38 */\r
39 \r
40 #define PROGSUSED\r
41 #include "progsint.h"\r
42 \r
43 #ifdef QCJIT\r
44 \r
45 #ifndef _WIN32\r
46 #include <sys/mman.h>\r
47 #endif\r
48 \r
49 static float ta, tb, nullfloat=0;\r
50 \r
51 struct jitstate\r
52 {\r
53         unsigned int *statementjumps;   //[MAX_STATEMENTS*3]\r
54         unsigned char **statementoffsets; //[MAX_STATEMENTS]\r
55         unsigned int numjumps;\r
56         unsigned char *code;\r
57         unsigned int codesize;\r
58         unsigned int jitstatements;\r
59 \r
60         float *glob;\r
61         unsigned int cachedglobal;\r
62         unsigned int cachereg;\r
63 };\r
64 \r
65 static void EmitByte(struct jitstate *jit, unsigned char byte)\r
66 {\r
67         jit->code[jit->codesize++] = byte;\r
68 }\r
69 static void Emit4Byte(struct jitstate *jit, unsigned int value)\r
70 {\r
71         jit->code[jit->codesize++] = (value>> 0)&0xff;\r
72         jit->code[jit->codesize++] = (value>> 8)&0xff;\r
73         jit->code[jit->codesize++] = (value>>16)&0xff;\r
74         jit->code[jit->codesize++] = (value>>24)&0xff;\r
75 }\r
76 static void EmitAdr(struct jitstate *jit, void *value)\r
77 {\r
78         Emit4Byte(jit, (unsigned int)value);\r
79 }\r
80 static void EmitFloat(struct jitstate *jit, float value)\r
81 {\r
82         union {float f; unsigned int i;} u;\r
83         u.f = value;\r
84         Emit4Byte(jit, u.i);\r
85 }\r
86 static void Emit2Byte(struct jitstate *jit, unsigned short value)\r
87 {\r
88         jit->code[jit->codesize++] = (value>> 0)&0xff;\r
89         jit->code[jit->codesize++] = (value>> 8)&0xff;\r
90 }\r
91 \r
92 static void EmitFOffset(struct jitstate *jit, void *func, int bias)\r
93 {\r
94         union {void *f; unsigned int i;} u;\r
95         u.f = func;\r
96         u.i -= (unsigned int)&jit->code[jit->codesize+bias];\r
97         Emit4Byte(jit, u.i);\r
98 }\r
99 \r
100 static void Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)\r
101 {\r
102         jit->statementjumps[jit->numjumps++] = jit->codesize;\r
103         jit->statementjumps[jit->numjumps++] = statementnum;\r
104         jit->statementjumps[jit->numjumps++] = offset;\r
105 \r
106         //the offset is filled in later\r
107         jit->codesize += 4;\r
108 }\r
109 \r
110 enum\r
111 {\r
112         REG_EAX,\r
113         REG_ECX,\r
114         REG_EDX,\r
115         REG_EBX,\r
116         REG_ESP,\r
117         REG_EBP,\r
118         REG_ESI,\r
119         REG_EDI,\r
120 \r
121         /*I'm not going to list S1 here, as that makes things too awkward*/\r
122         REG_S0,\r
123         REG_NONE\r
124 };\r
125 #define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);\r
126 #define CLEARREG(reg) XOR(reg,reg)\r
127 #define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
128 #define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);\r
129 #define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);\r
130 #define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);\r
131 #define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);\r
132 \r
133 #define ARGREGS(a,b,c)  GCache_Load(jit, op[i].a, a, op[i].b, b, op[i].c, c)\r
134 #define RESULTREG(r) GCache_Store(jit, op[i].c, r)\r
135 \r
136 //for the purposes of the cache, 'temp' offsets are only read when they have been written only within the preceeding control block.\r
137 //if they were read at any other time, then we must write them out in full.\r
138 //this logic applies only to locals of a function.\r
139 //#define USECACHE\r
140 \r
141 static void GCache_Load(struct jitstate *jit, int ao, int ar, int bo, int br, int co, int cr)\r
142 {\r
143 #if USECACHE\r
144         if (jit->cachedreg != REG_NONE)\r
145         {\r
146                 /*something is cached, if its one of the input offsets then can chain the instruction*/\r
147 \r
148                 if (jit->cachedglobal === ao && ar != REG_NONE)\r
149                 {\r
150                         if (jit->cachedreg == ar)\r
151                                 ar = REG_NONE;\r
152                 }\r
153                 if (jit->cachedglobal === bo && br != REG_NONE)\r
154                 {\r
155                         if (jit->cachedreg == br)\r
156                                 br = REG_NONE;\r
157                 }\r
158                 if (jit->cachedglobal === co && cr != REG_NONE)\r
159                 {\r
160                         if (jit->cachedreg == cr)\r
161                                 cr = REG_NONE;\r
162                 }\r
163 \r
164                 if (!istemp(ao))\r
165                 {\r
166                         /*purge the old cache*/\r
167                         switch(jit->cachedreg)\r
168                         {\r
169                         case REG_NONE:\r
170                                 break;\r
171                         case REG_S0:\r
172                                 //fstps glob[C]\r
173                                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + jit->cachedglobal);\r
174                                 break;\r
175                         default:\r
176                                 STOREREG(jit->cachedreg, jit->glob + jit->cachedglobal);\r
177                                 break;\r
178                 }\r
179                 jit->cachedglobal = -1;\r
180                 jit->cachedreg = REG_NONE;\r
181         }\r
182 \r
183 #endif\r
184         switch(ar)\r
185         {\r
186         case REG_NONE:\r
187                 break;\r
188         case REG_S0:\r
189                 //flds glob[A]\r
190                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].a);\r
191                 break;\r
192         default:\r
193                 LOADREG(jit->glob + ao, ar);\r
194                 break;\r
195         }\r
196 \r
197         switch(br)\r
198         {\r
199         case REG_NONE:\r
200                 break;\r
201         case REG_S0:\r
202                 //flds glob[A]\r
203                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].b);\r
204                 break;\r
205         default:\r
206                 LOADREG(jit->glob + bo, br);\r
207                 break;\r
208         }\r
209 \r
210         switch(cr)\r
211         {\r
212         case REG_NONE:\r
213                 break;\r
214         case REG_S0:\r
215                 //flds glob[A]\r
216                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].c);\r
217                 break;\r
218         default:\r
219                 LOADREG(jit->glob + co, cr);\r
220                 break;\r
221         }\r
222 }\r
223 static void GCache_Store(struct jitstate *jit, int ofs, int reg)\r
224 {\r
225 #if USECACHE\r
226         jit->cachedglobal = ofs;\r
227         jit->cachedreg = reg;\r
228 #else\r
229         switch(reg)\r
230         {\r
231         case REG_NONE:\r
232                 break;\r
233         case REG_S0:\r
234                 //fstps glob[C]\r
235                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + ofs);\r
236                 break;\r
237         default:\r
238                 STOREREG(reg, jit->glob + ofs);\r
239                 break;\r
240         }\r
241 #endif\r
242 }\r
243 \r
244 static void *LocalLoc(struct jitstate *jit)\r
245 {\r
246         return &jit->code[jit->codesize];\r
247 }\r
248 static void *LocalJmp(struct jitstate *jit, int cond)\r
249 {\r
250         /*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/\r
251         if (cond == OP_GOTO)\r
252                 EmitByte(jit, 0xeb);    //jmp\r
253         else if (cond == OP_LE_F)\r
254                 EmitByte(jit, 0x76);    //jbe\r
255         else if (cond == OP_GE_F)\r
256                 EmitByte(jit, 0x73);    //jae\r
257         else if (cond == OP_LT_F)\r
258                 EmitByte(jit, 0x72);    //jb\r
259         else if (cond == OP_GT_F)\r
260                 EmitByte(jit, 0x77);    //ja\r
261         else if (cond == OP_LE_I)\r
262                 EmitByte(jit, 0x7e);    //jle\r
263         else if (cond == OP_LT_I)\r
264                 EmitByte(jit, 0x7c);    //jl\r
265         else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)\r
266                 EmitByte(jit, 0x75);    //jne\r
267         else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)\r
268                 EmitByte(jit, 0x74);    //je\r
269 #if defined(DEBUG) && defined(_WIN32)\r
270         else\r
271         {\r
272                 OutputDebugString("oh noes!\n");\r
273                 return NULL;\r
274         }\r
275 #endif\r
276 \r
277         EmitByte(jit, 0);\r
278 \r
279         return LocalLoc(jit);\r
280 }\r
281 static void LocalJmpLoc(void *jmp, void *loc)\r
282 {\r
283         int offs;\r
284         unsigned char *a = jmp;\r
285         offs = (char *)loc - (char *)jmp;\r
286 #if defined(DEBUG) && defined(_WIN32)\r
287         if (offs > 127 || offs <= -128)\r
288         {\r
289                 OutputDebugStringA("bad jump\n");\r
290                 a[-2] = 0xcd;\r
291                 a[-1] = 0xcc;\r
292                 return;\r
293         }\r
294 #endif\r
295         a[-1] = offs;\r
296 }\r
297 \r
298 static void FixupJumps(struct jitstate *jit)\r
299 {\r
300         unsigned int j;\r
301         unsigned char *codesrc;\r
302         unsigned char *codedst;\r
303         unsigned int offset;\r
304 \r
305         unsigned int v;\r
306 \r
307         for (j = 0; j < jit->numjumps;)\r
308         {\r
309                 v = jit->statementjumps[j++];\r
310                 codesrc = &jit->code[v];\r
311 \r
312                 v = jit->statementjumps[j++];\r
313                 codedst = jit->statementoffsets[v];\r
314 \r
315                 v = jit->statementjumps[j++];\r
316                 offset = (int)(codedst - (codesrc-v));  //3rd term because the jump is relative to the instruction start, not the instruction's offset\r
317 \r
318                 codesrc[0] = (offset>> 0)&0xff;\r
319                 codesrc[1] = (offset>> 8)&0xff;\r
320                 codesrc[2] = (offset>>16)&0xff;\r
321                 codesrc[3] = (offset>>24)&0xff;\r
322         }\r
323 }\r
324 \r
325 int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);\r
326 int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);\r
327 \r
328 void PR_CloseJit(struct jitstate *jit)\r
329 {\r
330         if (jit)\r
331         {\r
332                 free(jit->statementjumps);\r
333                 free(jit->statementoffsets);\r
334 #ifndef _WIN32\r
335                 munmap(jit->code, jit->jitstatements * 500);\r
336 #else\r
337                 free(jit->code);\r
338 #endif\r
339                 free(jit)\r
340         }\r
341 }\r
342 \r
343 #define EmitByte(v) EmitByte(jit, v)\r
344 #define EmitAdr(v) EmitAdr(jit, v)\r
345 #define EmitFOffset(a,b) EmitFOffset(jit, a, b)\r
346 #define Emit4ByteJump(a,b) Emit4ByteJump(jit, a, b)\r
347 #define Emit4Byte(v) Emit4Byte(jit, v)\r
348 #define EmitFloat(v) EmitFloat(jit, v)\r
349 #define LocalJmp(v) LocalJmp(jit, v)\r
350 #define LocalLoc() LocalLoc(jit)\r
351 \r
352 \r
353 struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)\r
354 {\r
355         struct jitstate *jit;\r
356 \r
357         void *j0, *l0;\r
358         void *j1, *l1;\r
359         void *j2, *l2;\r
360         unsigned int i;\r
361         dstatement16_t *op = (dstatement16_t*)current_progstate->statements;\r
362         unsigned int numstatements = current_progstate->progs->numstatements;\r
363         int *glob = (int*)current_progstate->globals;\r
364 \r
365         if (current_progstate->numbuiltins)\r
366                 return NULL;\r
367         jit = malloc(sizeof(*jit));\r
368         jit->jitstatements = numstatements;\r
369 \r
370         jit->statementjumps = malloc(numstatements*12);\r
371         jit->statementoffsets = malloc(numstatements*4);\r
372 #ifndef _WIN32\r
373         jit->code = mmap(NULL, numstatements*500, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);\r
374 #else\r
375         jit->code = malloc(numstatements*500);\r
376 #endif\r
377         if (!jit->code)\r
378                 return NULL;\r
379 \r
380         jit->numjumps = 0;\r
381         jit->codesize = 0;\r
382 \r
383 \r
384 \r
385         for (i = 0; i < numstatements; i++)\r
386         {\r
387                 jit->statementoffsets[i] = &jit->code[jit->codesize];\r
388 \r
389                 /*DEBUG*/\r
390                 SETREGI(op[i].op, REG_ESI);\r
391 \r
392                 switch(op[i].op)\r
393                 {\r
394                 //jumps\r
395                 case OP_IF_I:\r
396                         //integer compare\r
397                         //if a, goto b\r
398 \r
399                         //cmpl $0,glob[A]\r
400                         EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
401                         //jne B\r
402                         EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
403                         break;\r
404 \r
405                 case OP_IFNOT_I:\r
406                         //integer compare\r
407                         //if !a, goto b\r
408 \r
409                         //cmpl $0,glob[A]\r
410                         EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);\r
411                         //je B\r
412                         EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);\r
413                         break;\r
414 \r
415                 case OP_GOTO:\r
416                         EmitByte(0xE9);Emit4ByteJump(i + (signed short)op[i].a, -4);\r
417                         break;\r
418                         \r
419                 //function returns\r
420                 case OP_DONE:\r
421                 case OP_RETURN:\r
422                         //done and return are the same\r
423 \r
424                         //part 1: store A into OFS_RETURN\r
425 \r
426                         if (!op[i].a)\r
427                         {\r
428                                 //assumption: anything that returns address 0 is a void or zero return.\r
429                                 //thus clear eax and copy that to the return vector.\r
430                                 CLEARREG(REG_EAX);\r
431                                 STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
432                                 STOREREG(REG_EAX, glob + OFS_RETURN+1);\r
433                                 STOREREG(REG_EAX, glob + OFS_RETURN+2);\r
434                         }\r
435                         else\r
436                         {\r
437                                 LOADREG(glob + op[i].a+0, REG_EAX);\r
438                                 LOADREG(glob + op[i].a+1, REG_EDX);\r
439                                 LOADREG(glob + op[i].a+2, REG_ECX);\r
440                                 STOREREG(REG_EAX, glob + OFS_RETURN+0);\r
441                                 STOREREG(REG_EDX, glob + OFS_RETURN+1);\r
442                                 STOREREG(REG_ECX, glob + OFS_RETURN+2);\r
443                         }\r
444                         \r
445                         //call leavefunction to get the return address\r
446                         \r
447 //                      pushl progfuncs\r
448                         EmitByte(0x68);EmitAdr(progfuncs);\r
449 //                      call PR_LeaveFunction\r
450                         EmitByte(0xe8);EmitFOffset(PR_LeaveFunction, 4);\r
451 //                      add $4,%esp\r
452                         EmitByte(0x83);EmitByte(0xc4);EmitByte(0x04);\r
453 //                      movl pr_depth,%edx\r
454                         EmitByte(0x8b);EmitByte(0x15);EmitAdr(&pr_depth);\r
455 //                      cmp prinst->exitdepth,%edx\r
456                         EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst->exitdepth);\r
457 //                      je returntoc\r
458                         j1 = LocalJmp(OP_EQ_E);\r
459 //                              mov statementoffsets[%eax*4],%eax\r
460                                 EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
461 //                              jmp *eax\r
462                                 EmitByte(0xff);EmitByte(0xe0);\r
463 //                      returntoc:\r
464                         l1 = LocalLoc();\r
465 //                      ret\r
466                         EmitByte(0xc3);\r
467 \r
468                         LocalJmpLoc(j1,l1);\r
469                         break;\r
470 \r
471                 //function calls\r
472                 case OP_CALL0:\r
473                 case OP_CALL1:\r
474                 case OP_CALL2:\r
475                 case OP_CALL3:\r
476                 case OP_CALL4:\r
477                 case OP_CALL5:\r
478                 case OP_CALL6:\r
479                 case OP_CALL7:\r
480                 case OP_CALL8:\r
481                         //FIXME: the size of this instruction is going to hurt cache performance if every single function call is expanded into this HUGE CHUNK of gibberish!\r
482                         //FIXME: consider the feasability of just calling a C function and just jumping to the address it returns.\r
483 \r
484                 //save the state in place the rest of the engine can cope with\r
485                         //movl $i, pr_xstatement\r
486                         EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);\r
487                         //movl $(op[i].op-OP_CALL0), pr_argc\r
488                         EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);\r
489 \r
490                 //figure out who we're calling, and what that involves\r
491                         //%eax = glob[A]\r
492                         LOADREG(glob + op[i].a, REG_EAX);\r
493                 //eax is now the func num\r
494 \r
495                         //mov %eax,%ecx\r
496                         EmitByte(0x89); EmitByte(0xc1);\r
497                         //shr $24,%ecx\r
498                         EmitByte(0xc1); EmitByte(0xe9); EmitByte(0x18);\r
499                 //ecx is now the progs num for the new func\r
500 \r
501                         //cmp %ecx,pr_typecurrent\r
502                         EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);\r
503                         //je sameprogs\r
504                         j1 = LocalJmp(OP_EQ_I);\r
505                         {\r
506                                 //can't handle switching progs\r
507 \r
508                                 //FIXME: recurse though PR_ExecuteProgram\r
509                                 //push eax\r
510                                 //push progfuncs\r
511                                 //call PR_ExecuteProgram\r
512                                 //add $8,%esp\r
513                                 //remember to change the je above\r
514 \r
515                                 //err... exit depth? no idea\r
516                                 EmitByte(0xcd);EmitByte(op[i].op);      //int $X\r
517 \r
518 \r
519                                 //ret\r
520                                 EmitByte(0xc3);\r
521                         }\r
522                         //sameprogs:\r
523                         l1 = LocalLoc();\r
524                         LocalJmpLoc(j1,l1);\r
525 \r
526                         //andl $0x00ffffff, %eax\r
527                         EmitByte(0x25);Emit4Byte(0x00ffffff);\r
528                         \r
529                         //mov $sizeof(dfunction_t),%edx\r
530                         EmitByte(0xba);Emit4Byte(sizeof(dfunction_t));\r
531                         //mul %edx\r
532                         EmitByte(0xf7); EmitByte(0xe2);\r
533                         //add pr_functions,%eax\r
534                         EmitByte(0x05); EmitAdr(pr_functions);\r
535 \r
536                 //eax is now the dfunction_t to be called\r
537                 //edx is clobbered.\r
538 \r
539                         //mov (%eax),%edx\r
540                         EmitByte(0x8b);EmitByte(0x10);\r
541                 //edx is now the first statement number\r
542                         //cmp $0,%edx\r
543                         EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);\r
544                         //jl isabuiltin\r
545                         j1 = LocalJmp(OP_LT_I);\r
546                         {\r
547                                 /* call the function*/\r
548                                 //push %ecx\r
549                                 EmitByte(0x51);\r
550                                 //push %eax\r
551                                 EmitByte(0x50);\r
552                                 //pushl progfuncs\r
553                                 EmitByte(0x68);EmitAdr(progfuncs);\r
554                                 //call PR_EnterFunction\r
555                                 EmitByte(0xe8);EmitFOffset(PR_EnterFunction, 4);\r
556                                 //sub $12,%esp\r
557                                 EmitByte(0x83);EmitByte(0xc4);EmitByte(0xc);\r
558                 //eax is now the next statement number (first of the new function, usually equal to ecx, but not always)\r
559 \r
560                                 //jmp statementoffsets[%eax*4]\r
561                                 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);\r
562                         }\r
563                         /*its a builtin, figure out which, and call it*/\r
564                         //isabuiltin:\r
565                         l1 = LocalLoc();\r
566                         LocalJmpLoc(j1,l1);\r
567 \r
568                         //push current_progstate->globals\r
569                         EmitByte(0x68);EmitAdr(current_progstate->globals);\r
570                         //push progfuncs\r
571                         EmitByte(0x68);EmitAdr(progfuncs);\r
572                         //neg %edx\r
573                         EmitByte(0xf7);EmitByte(0xda);\r
574                         //call externs->globalbuiltins[%edx,4]\r
575 //FIXME: make sure this dereferences\r
576                         EmitByte(0xff);EmitByte(0x14);EmitByte(0x95);EmitAdr(externs->globalbuiltins);\r
577                         //add $8,%esp\r
578                         EmitByte(0x83);EmitByte(0xc4);EmitByte(0x8);\r
579 \r
580                 //but that builtin might have been Abort()\r
581 \r
582                         LOADREG(&prinst->continuestatement, REG_EAX);\r
583                         //cmp $-1,%eax\r
584                         EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);\r
585                         //je donebuiltincall\r
586                         j1 = LocalJmp(OP_EQ_I);\r
587                         {\r
588                                 //mov $-1,prinst->continuestatement\r
589                                 EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement);Emit4Byte((unsigned int)-1);\r
590 \r
591                                 //jmp statementoffsets[%eax*4]\r
592                                 EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);\r
593                         }\r
594                         //donebuiltincall:\r
595                         l1 = LocalLoc();\r
596                         LocalJmpLoc(j1,l1);\r
597                         break;\r
598 \r
599                 case OP_MUL_F:\r
600                         //flds glob[A]\r
601                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
602                         //fmuls glob[B]\r
603                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b);\r
604                         //fstps glob[C]\r
605                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
606                         break;\r
607                 case OP_DIV_F:\r
608                         //flds glob[A]\r
609                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
610                         //fdivs glob[B]\r
611                         EmitByte(0xd8);EmitByte(0x35);EmitAdr(glob + op[i].b);\r
612                         //fstps glob[C]\r
613                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
614                         break;\r
615                 case OP_ADD_F:\r
616                         //flds glob[A]\r
617                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
618                         //fadds glob[B]\r
619                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b);\r
620                         //fstps glob[C]\r
621                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
622                         break;\r
623                 case OP_SUB_F:\r
624                         //flds glob[A]\r
625                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
626                         //fsubs glob[B]\r
627                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b);\r
628                         //fstps glob[C]\r
629                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
630                         break;\r
631 \r
632                 case OP_NOT_F:\r
633                         //fldz\r
634                         EmitByte(0xd9);EmitByte(0xee);\r
635                         //fcomps        glob[A]\r
636                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);\r
637                         //fnstsw %ax\r
638                         EmitByte(0xdf);EmitByte(0xe0);\r
639                         //testb 0x40,%ah\r
640                         EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);\r
641                         \r
642                         j1 = LocalJmp(OP_NE_F);\r
643                         {\r
644                                 STOREF(0.0f, glob + op[i].c);\r
645                                 j2 = LocalJmp(OP_GOTO);\r
646                         }\r
647                         {\r
648                                 //noteq:\r
649                                 l1 = LocalLoc();\r
650                                 STOREF(1.0f, glob + op[i].c);\r
651                         }\r
652                         //end:\r
653                         l2 = LocalLoc();\r
654                         LocalJmpLoc(j1,l1);\r
655                         LocalJmpLoc(j2,l2);\r
656                         break;\r
657 \r
658                 case OP_STORE_F:\r
659                 case OP_STORE_S:\r
660                 case OP_STORE_ENT:\r
661                 case OP_STORE_FLD:\r
662                 case OP_STORE_FNC:\r
663                         LOADREG(glob + op[i].a, REG_EAX);\r
664                         STOREREG(REG_EAX, glob + op[i].b);\r
665                         break;\r
666 \r
667                 case OP_STORE_V:\r
668                         LOADREG(glob + op[i].a+0, REG_EAX);\r
669                         LOADREG(glob + op[i].a+1, REG_EDX);\r
670                         LOADREG(glob + op[i].a+2, REG_ECX);\r
671                         STOREREG(REG_EAX, glob + op[i].b+0);\r
672                         STOREREG(REG_EDX, glob + op[i].b+1);\r
673                         STOREREG(REG_ECX, glob + op[i].b+2);\r
674                         break;\r
675 \r
676                 case OP_LOAD_F:\r
677                 case OP_LOAD_S:\r
678                 case OP_LOAD_ENT:\r
679                 case OP_LOAD_FLD:\r
680                 case OP_LOAD_FNC:\r
681                 case OP_LOAD_V:\r
682                 //a is the ent number, b is the field\r
683                 //c is the dest\r
684 \r
685                         LOADREG(glob + op[i].a, REG_EAX);\r
686                         LOADREG(glob + op[i].b, REG_ECX);\r
687 \r
688                 //FIXME: bound eax (ent number)\r
689                 //FIXME: bound ecx (field index)\r
690                         //mov (ebx,eax,4).%eax\r
691                         EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);\r
692                 //eax is now an edictrun_t\r
693                         //mov fields(,%eax,4),%edx\r
694                         EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);\r
695                 //edx is now the field array for that ent\r
696 \r
697                         //mov fieldajust(%edx,%ecx,4),%eax\r
698                         EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
699 \r
700                         STOREREG(REG_EAX, glob + op[i].c)\r
701 \r
702                         if (op[i].op == OP_LOAD_V)\r
703                         {\r
704                                 //mov fieldajust+4(%edx,%ecx,4),%eax\r
705                                 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);\r
706                                 STOREREG(REG_EAX, glob + op[i].c+1)\r
707 \r
708                                 //mov fieldajust+8(%edx,%ecx,4),%eax\r
709                                 EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->fieldadjust*4);\r
710                                 STOREREG(REG_EAX, glob + op[i].c+2)\r
711                         }\r
712                         break;\r
713 \r
714                 case OP_ADDRESS:\r
715                         //a is the ent number, b is the field\r
716                 //c is the dest\r
717 \r
718                         LOADREG(glob + op[i].a, REG_EAX);\r
719                         LOADREG(glob + op[i].b, REG_ECX);\r
720 \r
721                 //FIXME: bound eax (ent number)\r
722                 //FIXME: bound ecx (field index)\r
723                         //mov (ebx,eax,4).%eax\r
724                         EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);\r
725                 //eax is now an edictrun_t\r
726                         //mov fields(,%eax,4),%edx\r
727                         EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);\r
728                 //edx is now the field array for that ent\r
729                         //mov fieldajust(%edx,%ecx,4),%eax      //offset = progfuncs->fieldadjust\r
730                         //EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->fieldadjust*4);\r
731                         EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);\r
732                         STOREREG(REG_EAX, glob + op[i].c);\r
733                         break;\r
734 \r
735                 case OP_STOREP_F:\r
736                 case OP_STOREP_S:\r
737                 case OP_STOREP_ENT:\r
738                 case OP_STOREP_FLD:\r
739                 case OP_STOREP_FNC:\r
740                         LOADREG(glob + op[i].a, REG_EAX);\r
741                         LOADREG(glob + op[i].b, REG_ECX);\r
742                         //mov %eax,(%ecx)\r
743                         EmitByte(0x89);EmitByte(0x01);\r
744                         break;\r
745 \r
746                 case OP_STOREP_V:\r
747                         LOADREG(glob + op[i].b, REG_ECX);\r
748 \r
749                         LOADREG(glob + op[i].a+0, REG_EAX);\r
750                         //mov %eax,0(%ecx)\r
751                         EmitByte(0x89);EmitByte(0x01);\r
752 \r
753                         LOADREG(glob + op[i].a+1, REG_EAX);\r
754                         //mov %eax,4(%ecx)\r
755                         EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);\r
756 \r
757                         LOADREG(glob + op[i].a+2, REG_EAX);\r
758                         //mov %eax,8(%ecx)\r
759                         EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);\r
760                         break;\r
761 \r
762                 case OP_NE_I:\r
763                 case OP_NE_E:\r
764                 case OP_NE_FNC:\r
765                 case OP_EQ_I:\r
766                 case OP_EQ_E:\r
767                 case OP_EQ_FNC:\r
768                         //integer equality\r
769                         LOADREG(glob + op[i].a, REG_EAX);\r
770 \r
771                         //cmp glob[B],%eax\r
772                         EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);\r
773                         j1 = LocalJmp(op[i].op);\r
774                         {\r
775                                 STOREF(0.0f, glob + op[i].c);\r
776                                 j2 = LocalJmp(OP_GOTO);\r
777                         }\r
778                         {\r
779                                 l1 = LocalLoc();\r
780                                 STOREF(1.0f, glob + op[i].c);\r
781                         }\r
782                         l2 = LocalLoc();\r
783                         LocalJmpLoc(j1,l1);\r
784                         LocalJmpLoc(j2,l2);\r
785                         break;\r
786 \r
787                 case OP_NOT_I:\r
788                 case OP_NOT_ENT:\r
789                 case OP_NOT_FNC:\r
790                         //cmp glob[B],$0\r
791                         EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00); \r
792                         j1 = LocalJmp(OP_NE_I);\r
793                         {\r
794                                 STOREF(1.0f, glob + op[i].c);\r
795                                 j2 = LocalJmp(OP_GOTO);\r
796                         }\r
797                         {\r
798                                 l1 = LocalLoc();\r
799                                 STOREF(0.0f, glob + op[i].c);\r
800                         }\r
801                         l2 = LocalLoc();\r
802                         LocalJmpLoc(j1,l1);\r
803                         LocalJmpLoc(j2,l2);\r
804                         break;\r
805 \r
806                 case OP_BITOR_F:        //floats...\r
807                         //flds glob[A]\r
808                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
809                         //flds glob[B]\r
810                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
811                         //fistp tb\r
812                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
813                         //fistp ta\r
814                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
815                         LOADREG(&ta, REG_EAX)\r
816                         //or %eax,tb\r
817                         EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);\r
818                         //fild tb\r
819                         EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
820                         //fstps glob[C]\r
821                         EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
822                         break;\r
823 \r
824                 case OP_BITAND_F:\r
825                         //flds glob[A]\r
826                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
827                         //flds glob[B]\r
828                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
829                         //fistp tb\r
830                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);\r
831                         //fistp ta\r
832                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
833                         /*two args are now at ta and tb*/\r
834                         LOADREG(&ta, REG_EAX)\r
835                         //and tb,%eax\r
836                         EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);\r
837                         /*we just wrote the int value to tb, convert that to a float and store it at c*/\r
838                         //fild tb\r
839                         EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);\r
840                         //fstps glob[C]\r
841                         EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
842                         break;\r
843 \r
844                 case OP_AND_F:\r
845                         //test floats properly, so we don't get confused with -0.0\r
846                         //FIXME: is it feasable to grab the value as an int and test it against 0x7fffffff?\r
847 \r
848                         //flds  glob[A]\r
849                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);\r
850                         //fcomps        nullfloat\r
851                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
852                         //fnstsw        %ax\r
853                         EmitByte(0xdf); EmitByte(0xe0);\r
854                         //test  $0x40,%ah\r
855                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
856                         //jz onefalse\r
857                         EmitByte(0x75); EmitByte(0x1f);\r
858 \r
859                         //flds  glob[B]\r
860                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);\r
861                         //fcomps        nullfloat\r
862                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
863                         //fnstsw        %ax\r
864                         EmitByte(0xdf); EmitByte(0xe0);\r
865                         //test  $0x40,%ah\r
866                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
867                         //jnz onefalse\r
868                         EmitByte(0x75); EmitByte(0x0c);\r
869 \r
870                         //mov float0,glob[C]\r
871                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
872                         //jmp done\r
873                         EmitByte(0xeb); EmitByte(0x0a);\r
874 \r
875                         //onefalse:\r
876                         //mov float1,glob[C]\r
877                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
878                         //done:\r
879                         break;\r
880                 case OP_OR_F:\r
881                         //test floats properly, so we don't get confused with -0.0\r
882 \r
883                         //flds  glob[A]\r
884                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);\r
885                         //fcomps        nullfloat\r
886                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
887                         //fnstsw        %ax\r
888                         EmitByte(0xdf); EmitByte(0xe0);\r
889                         //test  $0x40,%ah\r
890                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
891                         //je onetrue\r
892                         EmitByte(0x74); EmitByte(0x1f);\r
893 \r
894                         //flds  glob[B]\r
895                         EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);\r
896                         //fcomps        nullfloat\r
897                         EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);\r
898                         //fnstsw        %ax\r
899                         EmitByte(0xdf); EmitByte(0xe0);\r
900                         //test  $0x40,%ah\r
901                         EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);\r
902                         //je onetrue\r
903                         EmitByte(0x74); EmitByte(0x0c);\r
904 \r
905                         //mov float0,glob[C]\r
906                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);\r
907                         //jmp done\r
908                         EmitByte(0xeb); EmitByte(0x0a);\r
909 \r
910                         //onetrue:\r
911                         //mov float1,glob[C]\r
912                         EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);\r
913                         //done:\r
914                         break;\r
915 \r
916                 case OP_EQ_S:\r
917                 case OP_NE_S:\r
918                         {\r
919                         //put a in ecx\r
920                         LOADREG(glob + op[i].a, REG_ECX);\r
921                         //put b in edi\r
922                         LOADREG(glob + op[i].b, REG_EDI);\r
923 /*\r
924                         //early out if they're equal\r
925                         //cmp %ecx,%edi\r
926                         EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);\r
927                         j1c = LocalJmp(OP_EQ_S);\r
928 \r
929                         //if a is 0, check if b is ""\r
930                         //jecxz ais0\r
931                         EmitByte(0xe3); EmitByte(0x1a);\r
932 \r
933                         //if b is 0, check if a is ""\r
934                         //cmp $0,%edi\r
935                         EmitByte(0x83); EmitByte(0xff); EmitByte(0x00);\r
936                         //jne bnot0\r
937                         EmitByte(0x75); EmitByte(0x2a);\r
938                         {\r
939                                 //push a\r
940                                 EmitByte(0x51);\r
941                                 //push progfuncs\r
942                                 EmitByte(0x68); EmitAdr(progfuncs);\r
943                                 //call PR_StringToNative\r
944                                 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
945                                 //add $8,%esp\r
946                                 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
947                                 //cmpb $0,(%eax)\r
948                                 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
949                                 j1b = LocalJmp(OP_EQ_S);\r
950                                 j0b = LocalJmp(OP_GOTO);\r
951                         }\r
952 \r
953                         //ais0:\r
954                         {\r
955                                 //push edi\r
956                                 EmitByte(0x57);\r
957                                 //push progfuncs\r
958                                 EmitByte(0x68); EmitAdr(progfuncs);\r
959                                 //call PR_StringToNative\r
960                                 EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
961                                 //add $8,%esp\r
962                                 EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
963                                 //cmpb $0,(%eax)\r
964                                 EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
965                                 //je _true\r
966                                 EmitByte(0x74); EmitByte(0x36);\r
967                                 //jmp _false\r
968                                 EmitByte(0xeb); EmitByte(0x28);\r
969                         }\r
970                         //bnot0:\r
971 */\r
972 LOADREG(glob + op[i].a, REG_ECX);\r
973                         //push ecx\r
974                         EmitByte(0x51);\r
975                         //push progfuncs\r
976                         EmitByte(0x68); EmitAdr(progfuncs);\r
977                         //call PR_StringToNative\r
978                         EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
979                         //push %eax\r
980                         EmitByte(0x50);\r
981 \r
982 LOADREG(glob + op[i].b, REG_EDI);\r
983                         //push %edi\r
984                         EmitByte(0x57);\r
985                         //push progfuncs\r
986                         EmitByte(0x68); EmitAdr(progfuncs);\r
987                         //call PR_StringToNative\r
988                         EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
989                         //add $8,%esp\r
990                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
991 \r
992 \r
993                         //push %eax\r
994                         EmitByte(0x50);\r
995                         //call strcmp\r
996                         EmitByte(0xe8); EmitFOffset(strcmp,4);\r
997                         //add $16,%esp\r
998                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);\r
999 \r
1000                         //cmp $0,%eax\r
1001                         EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
1002                         j1 = LocalJmp(OP_EQ_S);\r
1003                         {\r
1004                                 l0 = LocalLoc();\r
1005                                 STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);\r
1006                                 j2 = LocalJmp(OP_GOTO);\r
1007                         }\r
1008                         {\r
1009                                 l1 = LocalLoc();\r
1010                                 STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);\r
1011                         }\r
1012                         l2 = LocalLoc();\r
1013 \r
1014 //                      LocalJmpLoc(j0b, l0);\r
1015                         LocalJmpLoc(j1, l1);\r
1016 //                      LocalJmpLoc(j1b, l1);\r
1017                         LocalJmpLoc(j2, l2);\r
1018                         }\r
1019                         break;\r
1020 \r
1021                 case OP_NOT_S:\r
1022                         LOADREG(glob + op[i].a, REG_EAX)\r
1023 \r
1024                         //cmp $0,%eax\r
1025                         EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);\r
1026                         j2 = LocalJmp(OP_EQ_S);\r
1027 \r
1028                         //push %eax\r
1029                         EmitByte(0x50);\r
1030                         //push progfuncs\r
1031                         EmitByte(0x68); EmitAdr(progfuncs);\r
1032                         //call PR_StringToNative\r
1033                         EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);\r
1034                         //add $8,%esp\r
1035                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);\r
1036 \r
1037                         //cmpb $0,(%eax)\r
1038                         EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);\r
1039                         j1 = LocalJmp(OP_EQ_S);\r
1040                         {\r
1041                                 STOREF(0.0f, glob + op[i].c);\r
1042                                 j0 = LocalJmp(OP_GOTO);\r
1043                         }\r
1044                         {\r
1045                                 l1 = LocalLoc();\r
1046                                 STOREF(1.0f, glob + op[i].c);\r
1047                         }\r
1048                         l2 = LocalLoc();\r
1049                         LocalJmpLoc(j2, l1);\r
1050                         LocalJmpLoc(j1, l1);\r
1051                         LocalJmpLoc(j0, l2);\r
1052                         break;\r
1053 \r
1054                 case OP_ADD_V:\r
1055                         //flds glob[A]\r
1056                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
1057                         //fadds glob[B]\r
1058                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+0);\r
1059                         //fstps glob[C]\r
1060                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
1061 \r
1062                         //flds glob[A]\r
1063                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
1064                         //fadds glob[B]\r
1065                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+1);\r
1066                         //fstps glob[C]\r
1067                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
1068 \r
1069                         //flds glob[A]\r
1070                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
1071                         //fadds glob[B]\r
1072                         EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+2);\r
1073                         //fstps glob[C]\r
1074                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
1075                         break;\r
1076                 case OP_SUB_V:\r
1077                         //flds glob[A]\r
1078                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
1079                         //fsubs glob[B]\r
1080                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+0);\r
1081                         //fstps glob[C]\r
1082                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
1083 \r
1084                         //flds glob[A]\r
1085                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
1086                         //fsubs glob[B]\r
1087                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+1);\r
1088                         //fstps glob[C]\r
1089                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
1090 \r
1091                         //flds glob[A]\r
1092                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
1093                         //fsubs glob[B]\r
1094                         EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+2);\r
1095                         //fstps glob[C]\r
1096                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
1097                         break;\r
1098 \r
1099                 case OP_MUL_V:\r
1100                         //this is actually a dotproduct\r
1101                         //flds glob[A]\r
1102                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
1103                         //fmuls glob[B]\r
1104                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+0);\r
1105 \r
1106                         //flds glob[A]\r
1107                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
1108                         //fmuls glob[B]\r
1109                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+1);\r
1110 \r
1111                         //flds glob[A]\r
1112                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
1113                         //fmuls glob[B]\r
1114                         EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+2);\r
1115 \r
1116                         //faddp\r
1117                         EmitByte(0xde);EmitByte(0xc1);\r
1118                         //faddp\r
1119                         EmitByte(0xde);EmitByte(0xc1);\r
1120 \r
1121                         //fstps glob[C]\r
1122                         EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
1123                         break;\r
1124 \r
1125                 case OP_EQ_F:\r
1126                 case OP_NE_F:\r
1127                 case OP_LE_F:\r
1128                 case OP_GE_F:\r
1129                 case OP_LT_F:\r
1130                 case OP_GT_F:\r
1131                         //flds glob[A]\r
1132                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);\r
1133                         //flds glob[B]\r
1134                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);\r
1135                         //fcomip %st(1),%st\r
1136                         EmitByte(0xdf);EmitByte(0xe9);\r
1137                         //fstp %st(0)   (aka: pop)\r
1138                         EmitByte(0xdd);EmitByte(0xd8);\r
1139 \r
1140                         j1 = LocalJmp(op[i].op);\r
1141                         {\r
1142                                 STOREF(0.0f, glob + op[i].c);\r
1143                                 j2 = LocalJmp(OP_GOTO);\r
1144                         }\r
1145                         {\r
1146                                 l1 = LocalLoc();\r
1147                                 STOREF(1.0f, glob + op[i].c);\r
1148                         }\r
1149                         l2 = LocalLoc();\r
1150                         LocalJmpLoc(j1,l1);\r
1151                         LocalJmpLoc(j2,l2);\r
1152                         break;\r
1153 \r
1154                 case OP_MUL_FV:\r
1155                 case OP_MUL_VF:\r
1156                         //\r
1157                         {\r
1158                                 int v;\r
1159                                 int f;\r
1160                                 if (op[i].op == OP_MUL_FV)\r
1161                                 {\r
1162                                         f = op[i].a;\r
1163                                         v = op[i].b;\r
1164                                 }\r
1165                                 else\r
1166                                 {\r
1167                                         v = op[i].a;\r
1168                                         f = op[i].b;\r
1169                                 }\r
1170 \r
1171                                 //flds glob[F]\r
1172                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);\r
1173 \r
1174                                 //flds glob[V0]\r
1175                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);\r
1176                                 //fmul st(1)\r
1177                                 EmitByte(0xd8);EmitByte(0xc9);\r
1178                                 //fstps glob[C]\r
1179                                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);\r
1180 \r
1181                                 //flds glob[V0]\r
1182                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);\r
1183                                 //fmul st(1)\r
1184                                 EmitByte(0xd8);EmitByte(0xc9);\r
1185                                 //fstps glob[C]\r
1186                                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);\r
1187 \r
1188                                 //flds glob[V0]\r
1189                                 EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);\r
1190                                 //fmul st(1)\r
1191                                 EmitByte(0xd8);EmitByte(0xc9);\r
1192                                 //fstps glob[C]\r
1193                                 EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);\r
1194 \r
1195                                 //fstp %st(0)   (aka: pop)\r
1196                                 EmitByte(0xdd);EmitByte(0xd8);\r
1197                         }\r
1198                         break;\r
1199 \r
1200                 case OP_STATE:\r
1201                         //externs->stateop(progfuncs, OPA->_float, OPB->function);\r
1202                         //push b\r
1203                         EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].b);\r
1204                         //push a\r
1205                         EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].a);\r
1206                         //push $progfuncs\r
1207                         EmitByte(0x68); EmitAdr(progfuncs);\r
1208                         //call externs->stateop\r
1209                         EmitByte(0xe8); EmitFOffset(externs->stateop, 4);\r
1210                         //add $12,%esp\r
1211                         EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);\r
1212                         break;\r
1213 #if 1\r
1214 /*              case OP_NOT_V:\r
1215                         //flds 0\r
1216                         //flds glob[A+0]\r
1217                         //fcomip %st(1),%st\r
1218                         //jne _true\r
1219                         //flds glob[A+1]\r
1220                         //fcomip %st(1),%st\r
1221                         //jne _true\r
1222                         //flds glob[A+1]\r
1223                         //fcomip %st(1),%st\r
1224                         //jne _true\r
1225                         //mov 1,C\r
1226                         //jmp done\r
1227                         //_true:\r
1228                         //mov 0,C\r
1229                         //done:\r
1230                         break;\r
1231 */\r
1232                         \r
1233                 case OP_NOT_V:\r
1234                         EmitByte(0xcd);EmitByte(op[i].op);\r
1235                         printf("QCJIT: instruction %i is not implemented\n", op[i].op);\r
1236                         break;\r
1237 #endif\r
1238                 case OP_NE_V:\r
1239                 case OP_EQ_V:\r
1240                 {\r
1241                         void *f0, *f1, *f2, *floc;\r
1242 //compare v[0]\r
1243                         //flds glob[A]\r
1244                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);\r
1245                         //flds glob[B]\r
1246                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+0);\r
1247                         //fcomip %st(1),%st\r
1248                         EmitByte(0xdf);EmitByte(0xe9);\r
1249                         //fstp %st(0)   (aka: pop)\r
1250                         EmitByte(0xdd);EmitByte(0xd8);\r
1251 \r
1252                         /*if the condition is true, don't fail*/\r
1253                         j1 = LocalJmp(op[i].op);\r
1254                         {\r
1255                                 STOREF(0.0f, glob + op[i].c);\r
1256                                 f0 = LocalJmp(OP_GOTO);\r
1257                         }\r
1258                         l1 = LocalLoc();\r
1259                         LocalJmpLoc(j1,l1);\r
1260 \r
1261 //compare v[1]\r
1262                         //flds glob[A]\r
1263                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);\r
1264                         //flds glob[B]\r
1265                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);\r
1266                         //fcomip %st(1),%st\r
1267                         EmitByte(0xdf);EmitByte(0xe9);\r
1268                         //fstp %st(0)   (aka: pop)\r
1269                         EmitByte(0xdd);EmitByte(0xd8);\r
1270 \r
1271                         /*if the condition is true, don't fail*/\r
1272                         j1 = LocalJmp(op[i].op);\r
1273                         {\r
1274                                 STOREF(0.0f, glob + op[i].c);\r
1275                                 f1 = LocalJmp(OP_GOTO);\r
1276                         }\r
1277                         l1 = LocalLoc();\r
1278                         LocalJmpLoc(j1,l1);\r
1279 \r
1280 //compare v[2]\r
1281                         //flds glob[A]\r
1282                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);\r
1283                         //flds glob[B]\r
1284                         EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);\r
1285                         //fcomip %st(1),%st\r
1286                         EmitByte(0xdf);EmitByte(0xe9);\r
1287                         //fstp %st(0)   (aka: pop)\r
1288                         EmitByte(0xdd);EmitByte(0xd8);\r
1289 \r
1290                         /*if the condition is true, don't fail*/\r
1291                         j1 = LocalJmp(op[i].op);\r
1292                         {\r
1293                                 STOREF(0.0f, glob + op[i].c);\r
1294                                 f2 = LocalJmp(OP_GOTO);\r
1295                         }\r
1296                         l1 = LocalLoc();\r
1297                         LocalJmpLoc(j1,l1);\r
1298 \r
1299 //success!\r
1300                         STOREF(1.0f, glob + op[i].c);\r
1301 \r
1302                         floc = LocalLoc();\r
1303                         LocalJmpLoc(f0,floc);\r
1304                         LocalJmpLoc(f1,floc);\r
1305                         LocalJmpLoc(f2,floc);\r
1306                         break;\r
1307                 }\r
1308 \r
1309                 /*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/\r
1310                 case OP_FETCH_GBL_F:\r
1311                 case OP_FETCH_GBL_S:\r
1312                 case OP_FETCH_GBL_E:\r
1313                 case OP_FETCH_GBL_FNC:\r
1314                 case OP_FETCH_GBL_V:\r
1315                 {\r
1316                         unsigned int max = ((unsigned int*)glob)[op[i].a-1];\r
1317                         unsigned int base = op[i].a;\r
1318                         //flds glob[B]\r
1319                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
1320                         //fistp ta\r
1321                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);\r
1322                         LOADREG(&ta, REG_EAX)\r
1323                         //FIXME: if eax >= $max, abort\r
1324 \r
1325                         if (op[i].op == OP_FETCH_GBL_V)\r
1326                         {\r
1327                                 /*scale the index by 3*/\r
1328                                 SETREGI(3, REG_EDX)\r
1329                                 //mul %edx\r
1330                                 EmitByte(0xf7); EmitByte(0xe2);\r
1331                         }\r
1332 \r
1333                         //lookup global\r
1334                         //mov &glob[base](,%eax,4),%edx\r
1335                         EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));\r
1336                         STOREREG(REG_EDX, glob + op[i].c+0)\r
1337                         if (op[i].op == OP_FETCH_GBL_V)\r
1338                         {\r
1339                                 //mov &glob[base+1](,%eax,4),%edx\r
1340                                 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));\r
1341                                 STOREREG(REG_EDX, glob + op[i].c+1)\r
1342                                 //mov &glob[base+2](,%eax,4),%edx\r
1343                                 EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));\r
1344                                 STOREREG(REG_EDX, glob + op[i].c+2)\r
1345                         }\r
1346                         break;\r
1347                 }\r
1348 \r
1349                 /*fteqcc generates these from writing 'fast arrays'*/\r
1350                 case OP_GLOBALADDRESS:\r
1351                         LOADREG(glob + op[i].b, REG_EAX);\r
1352                         //lea &glob[A](, %eax, 4),%eax\r
1353                         EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);\r
1354                         STOREREG(REG_EAX, glob + op[i].c);\r
1355                         break;\r
1356 //              case OP_BOUNDCHECK:\r
1357                         //FIXME: assert b <= a < c\r
1358                         break;\r
1359                 case OP_CONV_FTOI:\r
1360                         //flds glob[A]\r
1361                         EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);\r
1362                         //fistp glob[C]\r
1363                         EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);\r
1364                         break;\r
1365                 case OP_MUL_I:\r
1366                         LOADREG(glob + op[i].a, REG_EAX);\r
1367                         //mull glob[C]       (arg*eax => edx:eax)\r
1368                         EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);\r
1369                         STOREREG(REG_EAX, glob + op[i].c);\r
1370                         break;\r
1371 \r
1372                 /*other extended opcodes*/\r
1373                 case OP_BITOR_I:\r
1374                         LOADREG(glob + op[i].a, REG_EAX)\r
1375                         //or %eax,tb\r
1376                         EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);\r
1377                         STOREREG(REG_EAX, glob + op[i].c);\r
1378                         break;\r
1379 \r
1380 \r
1381                 default:\r
1382                         {\r
1383                                 enum qcop_e e = op[i].op;\r
1384                         printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);\r
1385                         }\r
1386 \r
1387 \r
1388                         free(jit->statementjumps);      //[MAX_STATEMENTS]\r
1389                         free(jit->statementoffsets); //[MAX_STATEMENTS]\r
1390                         free(jit->code);\r
1391                         free(jit);\r
1392                         return NULL;\r
1393                 }\r
1394         }\r
1395 \r
1396         FixupJumps(jit);\r
1397 \r
1398         /* most likely want executable memory calls somewhere else more common */\r
1399 #ifdef _WIN32\r
1400         {\r
1401                 DWORD old;\r
1402 \r
1403                 //this memory is on the heap.\r
1404                 //this means that we must maintain read/write protection, or libc will crash us\r
1405                 VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);\r
1406         }\r
1407 #else\r
1408         mprotect(jit->code, jit->codesize, PROT_READ|PROT_EXEC);\r
1409 #endif\r
1410 \r
1411 //      externs->WriteFile("jit.x86", jit->code, jit->codesize);\r
1412 \r
1413         return jit;\r
1414 }\r
1415 \r
1416 float foo(float arg)\r
1417 {\r
1418         float f;\r
1419         if (!arg)\r
1420                 f = 1;\r
1421         else\r
1422                 f = 0;\r
1423         return f;\r
1424 }\r
1425 \r
1426 void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)\r
1427 {\r
1428 #ifdef __GNUC__\r
1429         //call, it clobbers pretty much everything.\r
1430         asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");\r
1431 #elif defined(_MSC_VER)\r
1432         void *entry = jit->statementoffsets[statement+1];\r
1433         void *edicttable = prinst->edicttable;\r
1434         __asm {\r
1435                 pushad\r
1436                 mov eax,entry\r
1437                 mov ebx,edicttable\r
1438                 call eax\r
1439                 popad\r
1440         }\r
1441 #else\r
1442         #error "Sorry, no idea how to enter assembler safely for your compiler"\r
1443 #endif\r
1444 }\r
1445 #endif\r