1 // Pentium+ specific opcodes
3 extern flag float32_is_nan( float32 a ); // since its not defined in softfloat.h
5 INLINE void MMXPROLOG(i386_state *cpustate)
7 //cpustate->x87_sw &= ~(X87_SW_TOP_MASK << X87_SW_TOP_SHIFT); // top = 0
8 cpustate->x87_tw = 0; // tag word = 0
11 INLINE void READMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r)
13 r.q=READ64(cpustate, ea);
16 INLINE void WRITEMMX(i386_state *cpustate,UINT32 ea,MMX_REG &r)
18 WRITE64(cpustate, ea, r.q);
21 INLINE void READXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r)
23 r.q[0]=READ64(cpustate, ea);
24 r.q[1]=READ64(cpustate, ea+8);
27 INLINE void WRITEXMM(i386_state *cpustate,UINT32 ea,XMM_REG &r)
29 WRITE64(cpustate, ea, r.q[0]);
30 WRITE64(cpustate, ea+8, r.q[1]);
33 INLINE void READXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
35 r.q[0]=READ64(cpustate, ea);
38 INLINE void WRITEXMM_LO64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
40 WRITE64(cpustate, ea, r.q[0]);
43 INLINE void READXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
45 r.q[1]=READ64(cpustate, ea);
48 INLINE void WRITEXMM_HI64(i386_state *cpustate,UINT32 ea,XMM_REG &r)
50 WRITE64(cpustate, ea, r.q[1]);
53 static void PENTIUMOP(rdmsr)(i386_state *cpustate) // Opcode 0x0f 32
58 data = MSR_READ(cpustate,REG32(ECX),&valid_msr);
59 REG32(EDX) = data >> 32;
60 REG32(EAX) = data & 0xffffffff;
62 if(cpustate->CPL != 0 || valid_msr == 0) // if current privilege level isn't 0 or the register isn't recognized ...
63 FAULT(FAULT_GP,0) // ... throw a general exception fault
65 CYCLES(cpustate,CYCLES_RDMSR);
68 static void PENTIUMOP(wrmsr)(i386_state *cpustate) // Opcode 0x0f 30
73 data = (UINT64)REG32(EAX);
74 data |= (UINT64)(REG32(EDX)) << 32;
76 MSR_WRITE(cpustate,REG32(ECX),data,&valid_msr);
78 if(cpustate->CPL != 0 || valid_msr == 0) // if current privilege level isn't 0 or the register isn't recognized
79 FAULT(FAULT_GP,0) // ... throw a general exception fault
81 CYCLES(cpustate,1); // TODO: correct cycle count (~30-45)
84 static void PENTIUMOP(rdtsc)(i386_state *cpustate) // Opcode 0x0f 31
86 UINT64 ts = cpustate->tsc + (cpustate->base_cycles - cpustate->cycles);
87 REG32(EAX) = (UINT32)(ts);
88 REG32(EDX) = (UINT32)(ts >> 32);
90 CYCLES(cpustate,CYCLES_RDTSC);
93 static void PENTIUMOP(ud2)(i386_state *cpustate) // Opcode 0x0f 0b
95 i386_trap(cpustate, 6, 0, 0);
98 static void PENTIUMOP(rsm)(i386_state *cpustate)
100 UINT32 smram_state = cpustate->smbase + 0xfe00;
103 logerror("i386: Invalid RSM outside SMM at %08X\n", cpustate->pc - 1);
104 i386_trap(cpustate, 6, 0, 0);
108 // load state, no sanity checks anywhere
109 cpustate->smbase = READ32(cpustate, smram_state+SMRAM_SMBASE);
110 cpustate->cr[4] = READ32(cpustate, smram_state+SMRAM_IP5_CR4);
111 cpustate->sreg[ES].limit = READ32(cpustate, smram_state+SMRAM_IP5_ESLIM);
112 cpustate->sreg[ES].base = READ32(cpustate, smram_state+SMRAM_IP5_ESBASE);
113 cpustate->sreg[ES].flags = READ32(cpustate, smram_state+SMRAM_IP5_ESACC);
114 cpustate->sreg[CS].limit = READ32(cpustate, smram_state+SMRAM_IP5_CSLIM);
115 cpustate->sreg[CS].base = READ32(cpustate, smram_state+SMRAM_IP5_CSBASE);
116 cpustate->sreg[CS].flags = READ32(cpustate, smram_state+SMRAM_IP5_CSACC);
117 cpustate->sreg[SS].limit = READ32(cpustate, smram_state+SMRAM_IP5_SSLIM);
118 cpustate->sreg[SS].base = READ32(cpustate, smram_state+SMRAM_IP5_SSBASE);
119 cpustate->sreg[SS].flags = READ32(cpustate, smram_state+SMRAM_IP5_SSACC);
120 cpustate->sreg[DS].limit = READ32(cpustate, smram_state+SMRAM_IP5_DSLIM);
121 cpustate->sreg[DS].base = READ32(cpustate, smram_state+SMRAM_IP5_DSBASE);
122 cpustate->sreg[DS].flags = READ32(cpustate, smram_state+SMRAM_IP5_DSACC);
123 cpustate->sreg[FS].limit = READ32(cpustate, smram_state+SMRAM_IP5_FSLIM);
124 cpustate->sreg[FS].base = READ32(cpustate, smram_state+SMRAM_IP5_FSBASE);
125 cpustate->sreg[FS].flags = READ32(cpustate, smram_state+SMRAM_IP5_FSACC);
126 cpustate->sreg[GS].limit = READ32(cpustate, smram_state+SMRAM_IP5_GSLIM);
127 cpustate->sreg[GS].base = READ32(cpustate, smram_state+SMRAM_IP5_GSBASE);
128 cpustate->sreg[GS].flags = READ32(cpustate, smram_state+SMRAM_IP5_GSACC);
129 cpustate->ldtr.flags = READ32(cpustate, smram_state+SMRAM_IP5_LDTACC);
130 cpustate->ldtr.limit = READ32(cpustate, smram_state+SMRAM_IP5_LDTLIM);
131 cpustate->ldtr.base = READ32(cpustate, smram_state+SMRAM_IP5_LDTBASE);
132 cpustate->gdtr.limit = READ32(cpustate, smram_state+SMRAM_IP5_GDTLIM);
133 cpustate->gdtr.base = READ32(cpustate, smram_state+SMRAM_IP5_GDTBASE);
134 cpustate->idtr.limit = READ32(cpustate, smram_state+SMRAM_IP5_IDTLIM);
135 cpustate->idtr.base = READ32(cpustate, smram_state+SMRAM_IP5_IDTBASE);
136 cpustate->task.limit = READ32(cpustate, smram_state+SMRAM_IP5_TRLIM);
137 cpustate->task.base = READ32(cpustate, smram_state+SMRAM_IP5_TRBASE);
138 cpustate->task.flags = READ32(cpustate, smram_state+SMRAM_IP5_TRACC);
140 cpustate->sreg[ES].selector = READ32(cpustate, smram_state+SMRAM_ES);
141 cpustate->sreg[CS].selector = READ32(cpustate, smram_state+SMRAM_CS);
142 cpustate->sreg[SS].selector = READ32(cpustate, smram_state+SMRAM_SS);
143 cpustate->sreg[DS].selector = READ32(cpustate, smram_state+SMRAM_DS);
144 cpustate->sreg[FS].selector = READ32(cpustate, smram_state+SMRAM_FS);
145 cpustate->sreg[GS].selector = READ32(cpustate, smram_state+SMRAM_GS);
146 cpustate->ldtr.segment = READ32(cpustate, smram_state+SMRAM_LDTR);
147 cpustate->task.segment = READ32(cpustate, smram_state+SMRAM_TR);
149 cpustate->dr[7] = READ32(cpustate, smram_state+SMRAM_DR7);
150 cpustate->dr[6] = READ32(cpustate, smram_state+SMRAM_DR6);
151 REG32(EAX) = READ32(cpustate, smram_state+SMRAM_EAX);
152 REG32(ECX) = READ32(cpustate, smram_state+SMRAM_ECX);
153 REG32(EDX) = READ32(cpustate, smram_state+SMRAM_EDX);
154 REG32(EBX) = READ32(cpustate, smram_state+SMRAM_EBX);
155 REG32(ESP) = READ32(cpustate, smram_state+SMRAM_ESP);
156 REG32(EBP) = READ32(cpustate, smram_state+SMRAM_EBP);
157 REG32(ESI) = READ32(cpustate, smram_state+SMRAM_ESI);
158 REG32(EDI) = READ32(cpustate, smram_state+SMRAM_EDI);
159 cpustate->eip = READ32(cpustate, smram_state+SMRAM_EIP);
160 cpustate->eflags = READ32(cpustate, smram_state+SMRAM_EAX);
161 cpustate->cr[3] = READ32(cpustate, smram_state+SMRAM_CR3);
162 cpustate->cr[0] = READ32(cpustate, smram_state+SMRAM_CR0);
164 cpustate->CPL = (cpustate->sreg[SS].flags >> 13) & 3; // cpl == dpl of ss
166 for(int i = 0; i < GS; i++)
168 if(PROTECTED_MODE && !V8086_MODE)
170 cpustate->sreg[i].valid = cpustate->sreg[i].selector ? true : false;
171 cpustate->sreg[i].d = (cpustate->sreg[i].flags & 0x4000) ? 1 : 0;
174 cpustate->sreg[i].valid = true;
177 // if(!cpustate->smiact.isnull())
178 // cpustate->smiact(false);
179 cpustate->smm = false;
181 CHANGE_PC(cpustate,cpustate->eip);
182 cpustate->nmi_masked = false;
183 if(cpustate->smi_latched)
185 pentium_smi(cpustate);
188 if(cpustate->nmi_latched)
190 cpustate->nmi_latched = false;
191 i386_trap(cpustate, 2, 1, 0);
195 static void PENTIUMOP(prefetch_m8)(i386_state *cpustate) // Opcode 0x0f 18
197 UINT8 modrm = FETCH(cpustate);
198 UINT32 ea = GetEA(cpustate,modrm,0);
199 CYCLES(cpustate,1+(ea & 1)); // TODO: correct cycle count
202 static void PENTIUMOP(cmovo_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 40
205 UINT8 modrm = FETCH(cpustate);
209 if (cpustate->OF == 1)
211 src = LOAD_RM16(modrm);
212 STORE_REG16(modrm, src);
214 CYCLES(cpustate,1); // TODO: correct cycle count
218 UINT32 ea = GetEA(cpustate,modrm,0);
219 if (cpustate->OF == 1)
221 src = READ16(cpustate,ea);
222 STORE_REG16(modrm, src);
224 CYCLES(cpustate,1); // TODO: correct cycle count
228 static void PENTIUMOP(cmovo_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 40
231 UINT8 modrm = FETCH(cpustate);
235 if (cpustate->OF == 1)
237 src = LOAD_RM32(modrm);
238 STORE_REG32(modrm, src);
240 CYCLES(cpustate,1); // TODO: correct cycle count
244 UINT32 ea = GetEA(cpustate,modrm,0);
245 if (cpustate->OF == 1)
247 src = READ32(cpustate,ea);
248 STORE_REG32(modrm, src);
250 CYCLES(cpustate,1); // TODO: correct cycle count
254 static void PENTIUMOP(cmovno_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 41
257 UINT8 modrm = FETCH(cpustate);
261 if (cpustate->OF == 0)
263 src = LOAD_RM16(modrm);
264 STORE_REG16(modrm, src);
266 CYCLES(cpustate,1); // TODO: correct cycle count
270 UINT32 ea = GetEA(cpustate,modrm,0);
271 if (cpustate->OF == 0)
273 src = READ16(cpustate,ea);
274 STORE_REG16(modrm, src);
276 CYCLES(cpustate,1); // TODO: correct cycle count
280 static void PENTIUMOP(cmovno_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 41
283 UINT8 modrm = FETCH(cpustate);
287 if (cpustate->OF == 0)
289 src = LOAD_RM32(modrm);
290 STORE_REG32(modrm, src);
292 CYCLES(cpustate,1); // TODO: correct cycle count
296 UINT32 ea = GetEA(cpustate,modrm,0);
297 if (cpustate->OF == 0)
299 src = READ32(cpustate,ea);
300 STORE_REG32(modrm, src);
302 CYCLES(cpustate,1); // TODO: correct cycle count
306 static void PENTIUMOP(cmovb_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 42
309 UINT8 modrm = FETCH(cpustate);
313 if (cpustate->CF == 1)
315 src = LOAD_RM16(modrm);
316 STORE_REG16(modrm, src);
318 CYCLES(cpustate,1); // TODO: correct cycle count
322 UINT32 ea = GetEA(cpustate,modrm,0);
323 if (cpustate->CF == 1)
325 src = READ16(cpustate,ea);
326 STORE_REG16(modrm, src);
328 CYCLES(cpustate,1); // TODO: correct cycle count
332 static void PENTIUMOP(cmovb_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 42
335 UINT8 modrm = FETCH(cpustate);
339 if (cpustate->CF == 1)
341 src = LOAD_RM32(modrm);
342 STORE_REG32(modrm, src);
344 CYCLES(cpustate,1); // TODO: correct cycle count
348 UINT32 ea = GetEA(cpustate,modrm,0);
349 if (cpustate->CF == 1)
351 src = READ32(cpustate,ea);
352 STORE_REG32(modrm, src);
354 CYCLES(cpustate,1); // TODO: correct cycle count
358 static void PENTIUMOP(cmovae_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 43
361 UINT8 modrm = FETCH(cpustate);
365 if (cpustate->CF == 0)
367 src = LOAD_RM16(modrm);
368 STORE_REG16(modrm, src);
370 CYCLES(cpustate,1); // TODO: correct cycle count
374 UINT32 ea = GetEA(cpustate,modrm,0);
375 if (cpustate->CF == 0)
377 src = READ16(cpustate,ea);
378 STORE_REG16(modrm, src);
380 CYCLES(cpustate,1); // TODO: correct cycle count
384 static void PENTIUMOP(cmovae_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 43
387 UINT8 modrm = FETCH(cpustate);
391 if (cpustate->CF == 0)
393 src = LOAD_RM32(modrm);
394 STORE_REG32(modrm, src);
396 CYCLES(cpustate,1); // TODO: correct cycle count
400 UINT32 ea = GetEA(cpustate,modrm,0);
401 if (cpustate->CF == 0)
403 src = READ32(cpustate,ea);
404 STORE_REG32(modrm, src);
406 CYCLES(cpustate,1); // TODO: correct cycle count
410 static void PENTIUMOP(cmove_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 44
413 UINT8 modrm = FETCH(cpustate);
417 if (cpustate->ZF == 1)
419 src = LOAD_RM16(modrm);
420 STORE_REG16(modrm, src);
422 CYCLES(cpustate,1); // TODO: correct cycle count
426 UINT32 ea = GetEA(cpustate,modrm,0);
427 if (cpustate->ZF == 1)
429 src = READ16(cpustate,ea);
430 STORE_REG16(modrm, src);
432 CYCLES(cpustate,1); // TODO: correct cycle count
436 static void PENTIUMOP(cmove_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 44
439 UINT8 modrm = FETCH(cpustate);
443 if (cpustate->ZF == 1)
445 src = LOAD_RM32(modrm);
446 STORE_REG32(modrm, src);
448 CYCLES(cpustate,1); // TODO: correct cycle count
452 UINT32 ea = GetEA(cpustate,modrm,0);
453 if (cpustate->ZF == 1)
455 src = READ32(cpustate,ea);
456 STORE_REG32(modrm, src);
458 CYCLES(cpustate,1); // TODO: correct cycle count
462 static void PENTIUMOP(cmovne_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 45
465 UINT8 modrm = FETCH(cpustate);
469 if (cpustate->ZF == 0)
471 src = LOAD_RM16(modrm);
472 STORE_REG16(modrm, src);
474 CYCLES(cpustate,1); // TODO: correct cycle count
478 UINT32 ea = GetEA(cpustate,modrm,0);
479 if (cpustate->ZF == 0)
481 src = READ16(cpustate,ea);
482 STORE_REG16(modrm, src);
484 CYCLES(cpustate,1); // TODO: correct cycle count
488 static void PENTIUMOP(cmovne_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 45
491 UINT8 modrm = FETCH(cpustate);
495 if (cpustate->ZF == 0)
497 src = LOAD_RM32(modrm);
498 STORE_REG32(modrm, src);
500 CYCLES(cpustate,1); // TODO: correct cycle count
504 UINT32 ea = GetEA(cpustate,modrm,0);
505 if (cpustate->ZF == 0)
507 src = READ32(cpustate,ea);
508 STORE_REG32(modrm, src);
510 CYCLES(cpustate,1); // TODO: correct cycle count
514 static void PENTIUMOP(cmovbe_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 46
517 UINT8 modrm = FETCH(cpustate);
521 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
523 src = LOAD_RM16(modrm);
524 STORE_REG16(modrm, src);
526 CYCLES(cpustate,1); // TODO: correct cycle count
530 UINT32 ea = GetEA(cpustate,modrm,0);
531 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
533 src = READ16(cpustate,ea);
534 STORE_REG16(modrm, src);
536 CYCLES(cpustate,1); // TODO: correct cycle count
540 static void PENTIUMOP(cmovbe_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 46
543 UINT8 modrm = FETCH(cpustate);
547 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
549 src = LOAD_RM32(modrm);
550 STORE_REG32(modrm, src);
552 CYCLES(cpustate,1); // TODO: correct cycle count
556 UINT32 ea = GetEA(cpustate,modrm,0);
557 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
559 src = READ32(cpustate,ea);
560 STORE_REG32(modrm, src);
562 CYCLES(cpustate,1); // TODO: correct cycle count
566 static void PENTIUMOP(cmova_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 47
569 UINT8 modrm = FETCH(cpustate);
573 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
575 src = LOAD_RM16(modrm);
576 STORE_REG16(modrm, src);
578 CYCLES(cpustate,1); // TODO: correct cycle count
582 UINT32 ea = GetEA(cpustate,modrm,0);
583 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
585 src = READ16(cpustate,ea);
586 STORE_REG16(modrm, src);
588 CYCLES(cpustate,1); // TODO: correct cycle count
592 static void PENTIUMOP(cmova_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 47
595 UINT8 modrm = FETCH(cpustate);
599 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
601 src = LOAD_RM32(modrm);
602 STORE_REG32(modrm, src);
604 CYCLES(cpustate,1); // TODO: correct cycle count
608 UINT32 ea = GetEA(cpustate,modrm,0);
609 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
611 src = READ32(cpustate,ea);
612 STORE_REG32(modrm, src);
614 CYCLES(cpustate,1); // TODO: correct cycle count
618 static void PENTIUMOP(cmovs_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 48
621 UINT8 modrm = FETCH(cpustate);
625 if (cpustate->SF == 1)
627 src = LOAD_RM16(modrm);
628 STORE_REG16(modrm, src);
630 CYCLES(cpustate,1); // TODO: correct cycle count
634 UINT32 ea = GetEA(cpustate,modrm,0);
635 if (cpustate->SF == 1)
637 src = READ16(cpustate,ea);
638 STORE_REG16(modrm, src);
640 CYCLES(cpustate,1); // TODO: correct cycle count
644 static void PENTIUMOP(cmovs_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 48
647 UINT8 modrm = FETCH(cpustate);
651 if (cpustate->SF == 1)
653 src = LOAD_RM32(modrm);
654 STORE_REG32(modrm, src);
656 CYCLES(cpustate,1); // TODO: correct cycle count
660 UINT32 ea = GetEA(cpustate,modrm,0);
661 if (cpustate->SF == 1)
663 src = READ32(cpustate,ea);
664 STORE_REG32(modrm, src);
666 CYCLES(cpustate,1); // TODO: correct cycle count
670 static void PENTIUMOP(cmovns_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 49
673 UINT8 modrm = FETCH(cpustate);
677 if (cpustate->SF == 0)
679 src = LOAD_RM16(modrm);
680 STORE_REG16(modrm, src);
682 CYCLES(cpustate,1); // TODO: correct cycle count
686 UINT32 ea = GetEA(cpustate,modrm,0);
687 if (cpustate->SF == 0)
689 src = READ16(cpustate,ea);
690 STORE_REG16(modrm, src);
692 CYCLES(cpustate,1); // TODO: correct cycle count
696 static void PENTIUMOP(cmovns_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 49
699 UINT8 modrm = FETCH(cpustate);
703 if (cpustate->SF == 0)
705 src = LOAD_RM32(modrm);
706 STORE_REG32(modrm, src);
708 CYCLES(cpustate,1); // TODO: correct cycle count
712 UINT32 ea = GetEA(cpustate,modrm,0);
713 if (cpustate->SF == 0)
715 src = READ32(cpustate,ea);
716 STORE_REG32(modrm, src);
718 CYCLES(cpustate,1); // TODO: correct cycle count
722 static void PENTIUMOP(cmovp_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4a
725 UINT8 modrm = FETCH(cpustate);
729 if (cpustate->PF == 1)
731 src = LOAD_RM16(modrm);
732 STORE_REG16(modrm, src);
734 CYCLES(cpustate,1); // TODO: correct cycle count
738 UINT32 ea = GetEA(cpustate,modrm,0);
739 if (cpustate->PF == 1)
741 src = READ16(cpustate,ea);
742 STORE_REG16(modrm, src);
744 CYCLES(cpustate,1); // TODO: correct cycle count
748 static void PENTIUMOP(cmovp_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4a
751 UINT8 modrm = FETCH(cpustate);
755 if (cpustate->PF == 1)
757 src = LOAD_RM32(modrm);
758 STORE_REG32(modrm, src);
760 CYCLES(cpustate,1); // TODO: correct cycle count
764 UINT32 ea = GetEA(cpustate,modrm,0);
765 if (cpustate->PF == 1)
767 src = READ32(cpustate,ea);
768 STORE_REG32(modrm, src);
770 CYCLES(cpustate,1); // TODO: correct cycle count
774 static void PENTIUMOP(cmovnp_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4b
777 UINT8 modrm = FETCH(cpustate);
781 if (cpustate->PF == 0)
783 src = LOAD_RM16(modrm);
784 STORE_REG16(modrm, src);
786 CYCLES(cpustate,1); // TODO: correct cycle count
790 UINT32 ea = GetEA(cpustate,modrm,0);
791 if (cpustate->PF == 0)
793 src = READ16(cpustate,ea);
794 STORE_REG16(modrm, src);
796 CYCLES(cpustate,1); // TODO: correct cycle count
800 static void PENTIUMOP(cmovnp_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4b
803 UINT8 modrm = FETCH(cpustate);
807 if (cpustate->PF == 0)
809 src = LOAD_RM32(modrm);
810 STORE_REG32(modrm, src);
812 CYCLES(cpustate,1); // TODO: correct cycle count
816 UINT32 ea = GetEA(cpustate,modrm,0);
817 if (cpustate->PF == 0)
819 src = READ32(cpustate,ea);
820 STORE_REG32(modrm, src);
822 CYCLES(cpustate,1); // TODO: correct cycle count
826 static void PENTIUMOP(cmovl_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4c
829 UINT8 modrm = FETCH(cpustate);
833 if (cpustate->SF != cpustate->OF)
835 src = LOAD_RM16(modrm);
836 STORE_REG16(modrm, src);
838 CYCLES(cpustate,1); // TODO: correct cycle count
842 UINT32 ea = GetEA(cpustate,modrm,0);
843 if (cpustate->SF != cpustate->OF)
845 src = READ16(cpustate,ea);
846 STORE_REG16(modrm, src);
848 CYCLES(cpustate,1); // TODO: correct cycle count
852 static void PENTIUMOP(cmovl_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4c
855 UINT8 modrm = FETCH(cpustate);
859 if (cpustate->SF != cpustate->OF)
861 src = LOAD_RM32(modrm);
862 STORE_REG32(modrm, src);
864 CYCLES(cpustate,1); // TODO: correct cycle count
868 UINT32 ea = GetEA(cpustate,modrm,0);
869 if (cpustate->SF != cpustate->OF)
871 src = READ32(cpustate,ea);
872 STORE_REG32(modrm, src);
874 CYCLES(cpustate,1); // TODO: correct cycle count
878 static void PENTIUMOP(cmovge_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4d
881 UINT8 modrm = FETCH(cpustate);
885 if (cpustate->SF == cpustate->OF)
887 src = LOAD_RM16(modrm);
888 STORE_REG16(modrm, src);
890 CYCLES(cpustate,1); // TODO: correct cycle count
894 UINT32 ea = GetEA(cpustate,modrm,0);
895 if (cpustate->SF == cpustate->OF)
897 src = READ16(cpustate,ea);
898 STORE_REG16(modrm, src);
900 CYCLES(cpustate,1); // TODO: correct cycle count
904 static void PENTIUMOP(cmovge_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4d
907 UINT8 modrm = FETCH(cpustate);
911 if (cpustate->SF == cpustate->OF)
913 src = LOAD_RM32(modrm);
914 STORE_REG32(modrm, src);
916 CYCLES(cpustate,1); // TODO: correct cycle count
920 UINT32 ea = GetEA(cpustate,modrm,0);
921 if (cpustate->SF == cpustate->OF)
923 src = READ32(cpustate,ea);
924 STORE_REG32(modrm, src);
926 CYCLES(cpustate,1); // TODO: correct cycle count
930 static void PENTIUMOP(cmovle_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4e
933 UINT8 modrm = FETCH(cpustate);
937 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
939 src = LOAD_RM16(modrm);
940 STORE_REG16(modrm, src);
942 CYCLES(cpustate,1); // TODO: correct cycle count
946 UINT32 ea = GetEA(cpustate,modrm,0);
947 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
949 src = READ16(cpustate,ea);
950 STORE_REG16(modrm, src);
952 CYCLES(cpustate,1); // TODO: correct cycle count
956 static void PENTIUMOP(cmovle_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4e
959 UINT8 modrm = FETCH(cpustate);
963 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
965 src = LOAD_RM32(modrm);
966 STORE_REG32(modrm, src);
968 CYCLES(cpustate,1); // TODO: correct cycle count
972 UINT32 ea = GetEA(cpustate,modrm,0);
973 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
975 src = READ32(cpustate,ea);
976 STORE_REG32(modrm, src);
978 CYCLES(cpustate,1); // TODO: correct cycle count
982 static void PENTIUMOP(cmovg_r16_rm16)(i386_state *cpustate) // Opcode 0x0f 4f
985 UINT8 modrm = FETCH(cpustate);
989 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
991 src = LOAD_RM16(modrm);
992 STORE_REG16(modrm, src);
994 CYCLES(cpustate,1); // TODO: correct cycle count
998 UINT32 ea = GetEA(cpustate,modrm,0);
999 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
1001 src = READ16(cpustate,ea);
1002 STORE_REG16(modrm, src);
1004 CYCLES(cpustate,1); // TODO: correct cycle count
1008 static void PENTIUMOP(cmovg_r32_rm32)(i386_state *cpustate) // Opcode 0x0f 4f
1011 UINT8 modrm = FETCH(cpustate);
1015 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
1017 src = LOAD_RM32(modrm);
1018 STORE_REG32(modrm, src);
1020 CYCLES(cpustate,1); // TODO: correct cycle count
1024 UINT32 ea = GetEA(cpustate,modrm,0);
1025 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
1027 src = READ32(cpustate,ea);
1028 STORE_REG32(modrm, src);
1030 CYCLES(cpustate,1); // TODO: correct cycle count
1034 static void PENTIUMOP(movnti_m16_r16)(i386_state *cpustate) // Opcode 0f c3
1036 UINT8 modrm = FETCH(cpustate);
1037 if( modrm >= 0xc0 ) {
1038 // unsupported by cpu
1039 CYCLES(cpustate,1); // TODO: correct cycle count
1041 // since cache is not implemented
1042 UINT32 ea = GetEA(cpustate, modrm, 0);
1043 WRITE16(cpustate,ea,LOAD_RM16(modrm));
1044 CYCLES(cpustate,1); // TODO: correct cycle count
1048 static void PENTIUMOP(movnti_m32_r32)(i386_state *cpustate) // Opcode 0f c3
1050 UINT8 modrm = FETCH(cpustate);
1051 if( modrm >= 0xc0 ) {
1052 // unsupported by cpu
1053 CYCLES(cpustate,1); // TODO: correct cycle count
1055 // since cache is not implemented
1056 UINT32 ea = GetEA(cpustate, modrm, 0);
1057 WRITE32(cpustate,ea,LOAD_RM32(modrm));
1058 CYCLES(cpustate,1); // TODO: correct cycle count
1062 static void I386OP(cyrix_unknown)(i386_state *cpustate) // Opcode 0x0f 74
1064 logerror("Unemulated 0x0f 0x74 opcode called\n");
1069 static void PENTIUMOP(cmpxchg8b_m64)(i386_state *cpustate) // Opcode 0x0f c7
1071 UINT8 modm = FETCH(cpustate);
1072 if( modm >= 0xc0 ) {
1073 report_invalid_modrm(cpustate, "cmpxchg8b_m64", modm);
1075 UINT32 ea = GetEA(cpustate, modm, 0);
1076 UINT64 value = READ64(cpustate,ea);
1077 UINT64 edx_eax = (((UINT64) REG32(EDX)) << 32) | REG32(EAX);
1078 UINT64 ecx_ebx = (((UINT64) REG32(ECX)) << 32) | REG32(EBX);
1080 if( value == edx_eax ) {
1081 WRITE64(cpustate,ea, ecx_ebx);
1083 CYCLES(cpustate,CYCLES_CMPXCHG_REG_MEM_T);
1085 REG32(EDX) = (UINT32) (value >> 32);
1086 REG32(EAX) = (UINT32) (value >> 0);
1088 CYCLES(cpustate,CYCLES_CMPXCHG_REG_MEM_F);
1093 static void PENTIUMOP(movntq_m64_r64)(i386_state *cpustate) // Opcode 0f e7
1095 //MMXPROLOG(cpustate); // TODO: check if needed
1096 UINT8 modrm = FETCH(cpustate);
1097 if( modrm >= 0xc0 ) {
1098 CYCLES(cpustate,1); // unsupported
1100 // since cache is not implemented
1101 UINT32 ea = GetEA(cpustate, modrm, 0);
1102 WRITEMMX(cpustate, ea, MMX((modrm >> 3) & 0x7));
1103 CYCLES(cpustate,1); // TODO: correct cycle count
1107 static void PENTIUMOP(maskmovq_r64_r64)(i386_state *cpustate) // Opcode 0f f7
1110 UINT8 modm = FETCH(cpustate);
1111 UINT32 ea = GetEA(cpustate, 7, 0); // ds:di/edi/rdi register
1112 MMXPROLOG(cpustate);
1115 for (n=0;n <= 7;n++)
1116 if (MMX(m).b[n] & 127)
1117 WRITE8(cpustate, ea+n, MMX(s).b[n]);
1120 static void PENTIUMOP(popcnt_r16_rm16)(i386_state *cpustate) // Opcode f3 0f b8
1123 UINT8 modrm = FETCH(cpustate);
1126 if( modrm >= 0xc0 ) {
1127 src = LOAD_RM16(modrm);
1129 UINT32 ea = GetEA(cpustate,modrm,0);
1130 src = READ16(cpustate,ea);
1133 for (n=0;n < 16;n++) {
1134 count=count+(src & 1);
1137 STORE_REG16(modrm, count);
1138 CYCLES(cpustate,1); // TODO: correct cycle count
1141 static void PENTIUMOP(popcnt_r32_rm32)(i386_state *cpustate) // Opcode f3 0f b8
1144 UINT8 modrm = FETCH(cpustate);
1147 if( modrm >= 0xc0 ) {
1148 src = LOAD_RM32(modrm);
1150 UINT32 ea = GetEA(cpustate,modrm,0);
1151 src = READ32(cpustate,ea);
1154 for (n=0;n < 32;n++) {
1155 count=count+(src & 1);
1158 STORE_REG32(modrm, count);
1159 CYCLES(cpustate,1); // TODO: correct cycle count
1162 static void PENTIUMOP(tzcnt_r16_rm16)(i386_state *cpustate)
1164 // for CPUs that don't support TZCNT, fall back to BSF
1165 i386_bsf_r16_rm16(cpustate);
1166 // TODO: actually implement TZCNT
1169 static void PENTIUMOP(tzcnt_r32_rm32)(i386_state *cpustate)
1171 // for CPUs that don't support TZCNT, fall back to BSF
1172 i386_bsf_r32_rm32(cpustate);
1173 // TODO: actually implement TZCNT
1176 INLINE INT8 SaturatedSignedWordToSignedByte(INT16 word)
1185 INLINE UINT8 SaturatedSignedWordToUnsignedByte(INT16 word)
1194 INLINE INT16 SaturatedSignedDwordToSignedWord(INT32 dword)
1200 return (INT16)dword;
1203 static void MMXOP(group_0f71)(i386_state *cpustate) // Opcode 0f 71
1205 UINT8 modm = FETCH(cpustate);
1206 UINT8 imm8 = FETCH(cpustate);
1207 MMXPROLOG(cpustate);
1208 if( modm >= 0xc0 ) {
1209 switch ( (modm & 0x38) >> 3 )
1212 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] >> imm8;
1213 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] >> imm8;
1214 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] >> imm8;
1215 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] >> imm8;
1218 MMX(modm & 7).s[0]=MMX(modm & 7).s[0] >> imm8;
1219 MMX(modm & 7).s[1]=MMX(modm & 7).s[1] >> imm8;
1220 MMX(modm & 7).s[2]=MMX(modm & 7).s[2] >> imm8;
1221 MMX(modm & 7).s[3]=MMX(modm & 7).s[3] >> imm8;
1224 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] << imm8;
1225 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] << imm8;
1226 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] << imm8;
1227 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] << imm8;
1230 report_invalid_modrm(cpustate, "mmx_group0f71", modm);
1235 static void MMXOP(group_0f72)(i386_state *cpustate) // Opcode 0f 72
1237 UINT8 modm = FETCH(cpustate);
1238 UINT8 imm8 = FETCH(cpustate);
1239 MMXPROLOG(cpustate);
1240 if( modm >= 0xc0 ) {
1241 switch ( (modm & 0x38) >> 3 )
1244 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] >> imm8;
1245 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] >> imm8;
1248 MMX(modm & 7).i[0]=MMX(modm & 7).i[0] >> imm8;
1249 MMX(modm & 7).i[1]=MMX(modm & 7).i[1] >> imm8;
1252 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] << imm8;
1253 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] << imm8;
1256 report_invalid_modrm(cpustate, "mmx_group0f72", modm);
1261 static void MMXOP(group_0f73)(i386_state *cpustate) // Opcode 0f 73
1263 UINT8 modm = FETCH(cpustate);
1264 UINT8 imm8 = FETCH(cpustate);
1265 MMXPROLOG(cpustate);
1266 if( modm >= 0xc0 ) {
1267 switch ( (modm & 0x38) >> 3 )
1270 if (cpustate->xmm_operand_size)
1272 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] >> imm8;
1273 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] >> imm8;
1276 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q >> imm8;
1281 XMM(modm & 7).q[0] = 0;
1282 XMM(modm & 7).q[1] = 0;
1286 imm8 = (imm8 & 7) << 3;
1287 XMM(modm & 7).q[0] = XMM(modm & 7).q[1] >> imm8;
1288 XMM(modm & 7).q[1] = 0;
1293 XMM(modm & 7).q[0] = (XMM(modm & 7).q[1] << (64 - imm8)) | (XMM(modm & 7).q[0] >> imm8);
1294 XMM(modm & 7).q[1] = XMM(modm & 7).q[0] >> imm8;
1298 if (cpustate->xmm_operand_size)
1300 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] << imm8;
1301 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] << imm8;
1304 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q << imm8;
1309 XMM(modm & 7).q[0] = 0;
1310 XMM(modm & 7).q[1] = 0;
1314 imm8 = (imm8 & 7) << 3;
1315 XMM(modm & 7).q[1] = XMM(modm & 7).q[0] << imm8;
1316 XMM(modm & 7).q[0] = 0;
1321 XMM(modm & 7).q[1] = (XMM(modm & 7).q[0] >> (64 - imm8)) | (XMM(modm & 7).q[1] << imm8);
1322 XMM(modm & 7).q[0] = XMM(modm & 7).q[0] << imm8;
1326 report_invalid_modrm(cpustate, "mmx_group0f73", modm);
1331 static void MMXOP(psrlw_r64_rm64)(i386_state *cpustate) // Opcode 0f d1
1333 MMXPROLOG(cpustate);
1334 UINT8 modrm = FETCH(cpustate);
1335 if( modrm >= 0xc0 ) {
1336 int count=(int)MMX(modrm & 7).q;
1337 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1338 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1339 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1340 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1343 UINT32 ea = GetEA(cpustate, modrm, 0);
1344 READMMX(cpustate, ea, src);
1345 int count=(int)src.q;
1346 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1347 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1348 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1349 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1351 CYCLES(cpustate,1); // TODO: correct cycle count
1354 static void MMXOP(psrld_r64_rm64)(i386_state *cpustate) // Opcode 0f d2
1356 MMXPROLOG(cpustate);
1357 UINT8 modrm = FETCH(cpustate);
1358 if( modrm >= 0xc0 ) {
1359 int count=(int)MMX(modrm & 7).q;
1360 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1361 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1364 UINT32 ea = GetEA(cpustate, modrm, 0);
1365 READMMX(cpustate, ea, src);
1366 int count=(int)src.q;
1367 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1368 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1370 CYCLES(cpustate,1); // TODO: correct cycle count
1373 static void MMXOP(psrlq_r64_rm64)(i386_state *cpustate) // Opcode 0f d3
1375 MMXPROLOG(cpustate);
1376 UINT8 modrm = FETCH(cpustate);
1377 if( modrm >= 0xc0 ) {
1378 int count=(int)MMX(modrm & 7).q;
1379 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1382 UINT32 ea = GetEA(cpustate, modrm, 0);
1383 READMMX(cpustate, ea, src);
1384 int count=(int)src.q;
1385 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1387 CYCLES(cpustate,1); // TODO: correct cycle count
1390 static void MMXOP(paddq_r64_rm64)(i386_state *cpustate) // Opcode 0f d4
1392 MMXPROLOG(cpustate);
1393 UINT8 modrm = FETCH(cpustate);
1394 if( modrm >= 0xc0 ) {
1395 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+MMX(modrm & 7).q;
1398 UINT32 ea = GetEA(cpustate, modrm, 0);
1399 READMMX(cpustate, ea, src);
1400 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+src.q;
1402 CYCLES(cpustate,1); // TODO: correct cycle count
1405 static void MMXOP(pmullw_r64_rm64)(i386_state *cpustate) // Opcode 0f d5
1407 MMXPROLOG(cpustate);
1408 UINT8 modrm = FETCH(cpustate);
1409 if( modrm >= 0xc0 ) {
1410 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) & 0xffff;
1411 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) & 0xffff;
1412 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) & 0xffff;
1413 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) & 0xffff;
1416 UINT32 ea = GetEA(cpustate, modrm, 0);
1417 READMMX(cpustate, ea, src);
1418 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) & 0xffff;
1419 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) & 0xffff;
1420 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) & 0xffff;
1421 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) & 0xffff;
1423 CYCLES(cpustate,1); // TODO: correct cycle count
1426 static void MMXOP(psubusb_r64_rm64)(i386_state *cpustate) // Opcode 0f d8
1429 MMXPROLOG(cpustate);
1430 UINT8 modrm = FETCH(cpustate);
1431 if( modrm >= 0xc0 ) {
1433 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 7).b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-MMX(modrm & 7).b[n];
1436 UINT32 ea = GetEA(cpustate, modrm, 0);
1437 READMMX(cpustate, ea, src);
1439 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-src.b[n];
1441 CYCLES(cpustate,1); // TODO: correct cycle count
1444 static void MMXOP(psubusw_r64_rm64)(i386_state *cpustate) // Opcode 0f d9
1447 MMXPROLOG(cpustate);
1448 UINT8 modrm = FETCH(cpustate);
1449 if( modrm >= 0xc0 ) {
1451 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < MMX(modrm & 7).w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-MMX(modrm & 7).w[n];
1454 UINT32 ea = GetEA(cpustate, modrm, 0);
1455 READMMX(cpustate, ea, src);
1457 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-src.w[n];
1459 CYCLES(cpustate,1); // TODO: correct cycle count
1462 static void MMXOP(pand_r64_rm64)(i386_state *cpustate) // Opcode 0f db
1464 MMXPROLOG(cpustate);
1465 UINT8 modrm = FETCH(cpustate);
1466 if( modrm >= 0xc0 ) {
1467 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & MMX(modrm & 7).q;
1470 UINT32 ea = GetEA(cpustate, modrm, 0);
1471 READMMX(cpustate, ea, src);
1472 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & src.q;
1474 CYCLES(cpustate,1); // TODO: correct cycle count
1477 static void MMXOP(paddusb_r64_rm64)(i386_state *cpustate) // Opcode 0f dc
1480 MMXPROLOG(cpustate);
1481 UINT8 modrm = FETCH(cpustate);
1482 if( modrm >= 0xc0 ) {
1484 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-MMX(modrm & 7).b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+MMX(modrm & 7).b[n];
1487 UINT32 ea = GetEA(cpustate, modrm, 0);
1488 READMMX(cpustate, ea, src);
1490 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+src.b[n];
1492 CYCLES(cpustate,1); // TODO: correct cycle count
1495 static void MMXOP(paddusw_r64_rm64)(i386_state *cpustate) // Opcode 0f dd
1498 MMXPROLOG(cpustate);
1499 UINT8 modrm = FETCH(cpustate);
1500 if( modrm >= 0xc0 ) {
1502 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-MMX(modrm & 7).w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+MMX(modrm & 7).w[n];
1505 UINT32 ea = GetEA(cpustate, modrm, 0);
1506 READMMX(cpustate, ea, src);
1508 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+src.w[n];
1510 CYCLES(cpustate,1); // TODO: correct cycle count
1513 static void MMXOP(pandn_r64_rm64)(i386_state *cpustate) // Opcode 0f df
1515 MMXPROLOG(cpustate);
1516 UINT8 modrm = FETCH(cpustate);
1517 if( modrm >= 0xc0 ) {
1518 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & MMX(modrm & 7).q;
1521 UINT32 ea = GetEA(cpustate, modrm, 0);
1522 READMMX(cpustate, ea, src);
1523 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & src.q;
1525 CYCLES(cpustate,1); // TODO: correct cycle count
1528 static void MMXOP(psraw_r64_rm64)(i386_state *cpustate) // Opcode 0f e1
1530 MMXPROLOG(cpustate);
1531 UINT8 modrm = FETCH(cpustate);
1532 if( modrm >= 0xc0 ) {
1533 int count=(int)MMX(modrm & 7).q;
1534 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1535 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1536 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1537 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1540 UINT32 ea = GetEA(cpustate, modrm, 0);
1541 READMMX(cpustate, ea, src);
1542 int count=(int)src.q;
1543 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1544 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1545 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1546 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1548 CYCLES(cpustate,1); // TODO: correct cycle count
1551 static void MMXOP(psrad_r64_rm64)(i386_state *cpustate) // Opcode 0f e2
1553 MMXPROLOG(cpustate);
1554 UINT8 modrm = FETCH(cpustate);
1555 if( modrm >= 0xc0 ) {
1556 int count=(int)MMX(modrm & 7).q;
1557 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1558 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1561 UINT32 ea = GetEA(cpustate, modrm, 0);
1562 READMMX(cpustate, ea, src);
1563 int count=(int)src.q;
1564 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1565 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1567 CYCLES(cpustate,1); // TODO: correct cycle count
1570 static void MMXOP(pmulhw_r64_rm64)(i386_state *cpustate) // Opcode 0f e5
1572 MMXPROLOG(cpustate);
1573 UINT8 modrm = FETCH(cpustate);
1574 if( modrm >= 0xc0 ) {
1575 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) >> 16;
1576 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) >> 16;
1577 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) >> 16;
1578 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) >> 16;
1581 UINT32 ea = GetEA(cpustate, modrm, 0);
1582 READMMX(cpustate, ea, src);
1583 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) >> 16;
1584 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) >> 16;
1585 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) >> 16;
1586 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) >> 16;
1588 CYCLES(cpustate,1); // TODO: correct cycle count
1591 static void MMXOP(psubsb_r64_rm64)(i386_state *cpustate) // Opcode 0f e8
1594 MMXPROLOG(cpustate);
1595 UINT8 modrm = FETCH(cpustate);
1596 if( modrm >= 0xc0 ) {
1598 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)MMX(modrm & 7).c[n]);
1601 UINT32 ea = GetEA(cpustate, modrm, 0);
1602 READMMX(cpustate, ea, s);
1604 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)s.c[n]);
1606 CYCLES(cpustate,1); // TODO: correct cycle count
1609 static void MMXOP(psubsw_r64_rm64)(i386_state *cpustate) // Opcode 0f e9
1612 MMXPROLOG(cpustate);
1613 UINT8 modrm = FETCH(cpustate);
1614 if( modrm >= 0xc0 ) {
1616 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)MMX(modrm & 7).s[n]);
1619 UINT32 ea = GetEA(cpustate, modrm, 0);
1620 READMMX(cpustate, ea, s);
1622 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)s.s[n]);
1624 CYCLES(cpustate,1); // TODO: correct cycle count
1627 static void MMXOP(por_r64_rm64)(i386_state *cpustate) // Opcode 0f eb
1629 MMXPROLOG(cpustate);
1630 UINT8 modrm = FETCH(cpustate);
1631 if( modrm >= 0xc0 ) {
1632 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | MMX(modrm & 7).q;
1635 UINT32 ea = GetEA(cpustate, modrm, 0);
1636 READMMX(cpustate, ea, s);
1637 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | s.q;
1639 CYCLES(cpustate,1); // TODO: correct cycle count
1642 static void MMXOP(paddsb_r64_rm64)(i386_state *cpustate) // Opcode 0f ec
1645 MMXPROLOG(cpustate);
1646 UINT8 modrm = FETCH(cpustate);
1647 if( modrm >= 0xc0 ) {
1649 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)MMX(modrm & 7).c[n]);
1652 UINT32 ea = GetEA(cpustate, modrm, 0);
1653 READMMX(cpustate, ea, s);
1655 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)s.c[n]);
1657 CYCLES(cpustate,1); // TODO: correct cycle count
1660 static void MMXOP(paddsw_r64_rm64)(i386_state *cpustate) // Opcode 0f ed
1663 MMXPROLOG(cpustate);
1664 UINT8 modrm = FETCH(cpustate);
1665 if( modrm >= 0xc0 ) {
1667 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)MMX(modrm & 7).s[n]);
1670 UINT32 ea = GetEA(cpustate, modrm, 0);
1671 READMMX(cpustate, ea, s);
1673 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)s.s[n]);
1675 CYCLES(cpustate,1); // TODO: correct cycle count
1678 static void MMXOP(pxor_r64_rm64)(i386_state *cpustate) // Opcode 0f ef
1680 MMXPROLOG(cpustate);
1681 UINT8 modrm = FETCH(cpustate);
1682 if( modrm >= 0xc0 ) {
1683 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ MMX(modrm & 7).q;
1686 UINT32 ea = GetEA(cpustate, modrm, 0);
1687 READMMX(cpustate, ea, s);
1688 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ s.q;
1690 CYCLES(cpustate,1); // TODO: correct cycle count
1693 static void MMXOP(psllw_r64_rm64)(i386_state *cpustate) // Opcode 0f f1
1695 MMXPROLOG(cpustate);
1696 UINT8 modrm = FETCH(cpustate);
1697 if( modrm >= 0xc0 ) {
1698 int count=(int)MMX(modrm & 7).q;
1699 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1700 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1701 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1702 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1705 UINT32 ea = GetEA(cpustate, modrm, 0);
1706 READMMX(cpustate, ea, s);
1708 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1709 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1710 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1711 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1713 CYCLES(cpustate,1); // TODO: correct cycle count
1716 static void MMXOP(pslld_r64_rm64)(i386_state *cpustate) // Opcode 0f f2
1718 MMXPROLOG(cpustate);
1719 UINT8 modrm = FETCH(cpustate);
1720 if( modrm >= 0xc0 ) {
1721 int count=(int)MMX(modrm & 7).q;
1722 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1723 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1726 UINT32 ea = GetEA(cpustate, modrm, 0);
1727 READMMX(cpustate, ea, s);
1729 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1730 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1732 CYCLES(cpustate,1); // TODO: correct cycle count
1735 static void MMXOP(psllq_r64_rm64)(i386_state *cpustate) // Opcode 0f f3
1737 MMXPROLOG(cpustate);
1738 UINT8 modrm = FETCH(cpustate);
1739 if( modrm >= 0xc0 ) {
1740 int count=(int)MMX(modrm & 7).q;
1741 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1744 UINT32 ea = GetEA(cpustate, modrm, 0);
1745 READMMX(cpustate, ea, s);
1747 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1749 CYCLES(cpustate,1); // TODO: correct cycle count
1752 static void MMXOP(pmaddwd_r64_rm64)(i386_state *cpustate) // Opcode 0f f5
1754 MMXPROLOG(cpustate);
1755 UINT8 modrm = FETCH(cpustate);
1756 if( modrm >= 0xc0 ) {
1757 MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]+
1758 (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1];
1759 MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]+
1760 (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3];
1763 UINT32 ea = GetEA(cpustate, modrm, 0);
1764 READMMX(cpustate, ea, s);
1765 MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)s.s[0]+
1766 (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)s.s[1];
1767 MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)s.s[2]+
1768 (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)s.s[3];
1770 CYCLES(cpustate,1); // TODO: correct cycle count
1773 static void MMXOP(psubb_r64_rm64)(i386_state *cpustate) // Opcode 0f f8
1776 MMXPROLOG(cpustate);
1777 UINT8 modrm = FETCH(cpustate);
1778 if( modrm >= 0xc0 ) {
1780 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - MMX(modrm & 7).b[n];
1783 UINT32 ea = GetEA(cpustate, modrm, 0);
1784 READMMX(cpustate, ea, s);
1786 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - s.b[n];
1788 CYCLES(cpustate,1); // TODO: correct cycle count
1791 static void MMXOP(psubw_r64_rm64)(i386_state *cpustate) // Opcode 0f f9
1794 MMXPROLOG(cpustate);
1795 UINT8 modrm = FETCH(cpustate);
1796 if( modrm >= 0xc0 ) {
1798 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - MMX(modrm & 7).w[n];
1801 UINT32 ea = GetEA(cpustate, modrm, 0);
1802 READMMX(cpustate, ea, s);
1804 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - s.w[n];
1806 CYCLES(cpustate,1); // TODO: correct cycle count
1809 static void MMXOP(psubd_r64_rm64)(i386_state *cpustate) // Opcode 0f fa
1812 MMXPROLOG(cpustate);
1813 UINT8 modrm = FETCH(cpustate);
1814 if( modrm >= 0xc0 ) {
1816 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - MMX(modrm & 7).d[n];
1819 UINT32 ea = GetEA(cpustate, modrm, 0);
1820 READMMX(cpustate, ea, s);
1822 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - s.d[n];
1824 CYCLES(cpustate,1); // TODO: correct cycle count
1827 static void MMXOP(paddb_r64_rm64)(i386_state *cpustate) // Opcode 0f fc
1830 MMXPROLOG(cpustate);
1831 UINT8 modrm = FETCH(cpustate);
1832 if( modrm >= 0xc0 ) {
1834 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + MMX(modrm & 7).b[n];
1837 UINT32 ea = GetEA(cpustate, modrm, 0);
1838 READMMX(cpustate, ea, s);
1840 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + s.b[n];
1842 CYCLES(cpustate,1); // TODO: correct cycle count
1845 static void MMXOP(paddw_r64_rm64)(i386_state *cpustate) // Opcode 0f fd
1848 MMXPROLOG(cpustate);
1849 UINT8 modrm = FETCH(cpustate);
1850 if( modrm >= 0xc0 ) {
1852 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + MMX(modrm & 7).w[n];
1855 UINT32 ea = GetEA(cpustate, modrm, 0);
1856 READMMX(cpustate, ea, s);
1858 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + s.w[n];
1860 CYCLES(cpustate,1); // TODO: correct cycle count
1863 static void MMXOP(paddd_r64_rm64)(i386_state *cpustate) // Opcode 0f fe
1866 MMXPROLOG(cpustate);
1867 UINT8 modrm = FETCH(cpustate);
1868 if( modrm >= 0xc0 ) {
1870 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + MMX(modrm & 7).d[n];
1873 UINT32 ea = GetEA(cpustate, modrm, 0);
1874 READMMX(cpustate, ea, s);
1876 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + s.d[n];
1878 CYCLES(cpustate,1); // TODO: correct cycle count
1881 static void MMXOP(emms)(i386_state *cpustate) // Opcode 0f 77
1883 cpustate->x87_tw = 0xffff; // tag word = 0xffff
1885 CYCLES(cpustate,1); // TODO: correct cycle count
1888 static void MMXOP(movd_r64_rm32)(i386_state *cpustate) // Opcode 0f 6e
1890 MMXPROLOG(cpustate);
1891 UINT8 modrm = FETCH(cpustate);
1892 if( modrm >= 0xc0 ) {
1893 if (cpustate->xmm_operand_size)
1894 XMM((modrm >> 3) & 0x7).d[0]=LOAD_RM32(modrm);
1896 MMX((modrm >> 3) & 0x7).d[0]=LOAD_RM32(modrm);
1898 UINT32 ea = GetEA(cpustate, modrm, 0);
1899 if (cpustate->xmm_operand_size)
1900 XMM((modrm >> 3) & 0x7).d[0]=READ32(cpustate, ea);
1902 MMX((modrm >> 3) & 0x7).d[0]=READ32(cpustate, ea);
1904 MMX((modrm >> 3) & 0x7).d[1]=0;
1905 CYCLES(cpustate,1); // TODO: correct cycle count
1908 static void MMXOP(movq_r64_rm64)(i386_state *cpustate) // Opcode 0f 6f
1910 MMXPROLOG(cpustate);
1911 UINT8 modrm = FETCH(cpustate);
1912 if( modrm >= 0xc0 ) {
1913 if (cpustate->xmm_operand_size)
1914 XMM((modrm >> 3) & 0x7).l[0]=XMM(modrm & 0x7).l[0];
1916 MMX((modrm >> 3) & 0x7).l=MMX(modrm & 0x7).l;
1918 UINT32 ea = GetEA(cpustate, modrm, 0);
1919 if (cpustate->xmm_operand_size)
1920 READXMM_LO64(cpustate, ea, XMM((modrm >> 3) & 0x7));
1922 READMMX(cpustate, ea, MMX((modrm >> 3) & 0x7));
1924 CYCLES(cpustate,1); // TODO: correct cycle count
1927 static void MMXOP(movd_rm32_r64)(i386_state *cpustate) // Opcode 0f 7e
1929 MMXPROLOG(cpustate);
1930 UINT8 modrm = FETCH(cpustate);
1931 if( modrm >= 0xc0 ) {
1932 if (cpustate->xmm_operand_size)
1933 STORE_RM32(modrm, XMM((modrm >> 3) & 0x7).d[0]);
1935 STORE_RM32(modrm, MMX((modrm >> 3) & 0x7).d[0]);
1937 UINT32 ea = GetEA(cpustate, modrm, 0);
1938 if (cpustate->xmm_operand_size)
1939 WRITE32(cpustate, ea, XMM((modrm >> 3) & 0x7).d[0]);
1941 WRITE32(cpustate, ea, MMX((modrm >> 3) & 0x7).d[0]);
1943 CYCLES(cpustate,1); // TODO: correct cycle count
1946 static void MMXOP(movq_rm64_r64)(i386_state *cpustate) // Opcode 0f 7f
1948 MMXPROLOG(cpustate);
1949 UINT8 modrm = FETCH(cpustate);
1950 if( modrm >= 0xc0 ) {
1951 if (cpustate->xmm_operand_size)
1952 XMM(modrm & 0x7).l[0]=XMM((modrm >> 3) & 0x7).l[0];
1954 MMX(modrm & 0x7)=MMX((modrm >> 3) & 0x7);
1956 UINT32 ea = GetEA(cpustate, modrm, 0);
1957 WRITEMMX(cpustate, ea, MMX((modrm >> 3) & 0x7));
1959 CYCLES(cpustate,1); // TODO: correct cycle count
1962 static void MMXOP(pcmpeqb_r64_rm64)(i386_state *cpustate) // Opcode 0f 74
1965 MMXPROLOG(cpustate);
1966 UINT8 modrm = FETCH(cpustate);
1967 if( modrm >= 0xc0 ) {
1970 d=(modrm >> 3) & 0x7;
1971 for (c=0;c <= 7;c++)
1972 MMX(d).b[c]=(MMX(d).b[c] == MMX(s).b[c]) ? 0xff : 0;
1975 int d=(modrm >> 3) & 0x7;
1976 UINT32 ea = GetEA(cpustate, modrm, 0);
1977 READMMX(cpustate, ea, s);
1978 for (c=0;c <= 7;c++)
1979 MMX(d).b[c]=(MMX(d).b[c] == s.b[c]) ? 0xff : 0;
1981 CYCLES(cpustate,1); // TODO: correct cycle count
1984 static void MMXOP(pcmpeqw_r64_rm64)(i386_state *cpustate) // Opcode 0f 75
1986 MMXPROLOG(cpustate);
1987 UINT8 modrm = FETCH(cpustate);
1988 if( modrm >= 0xc0 ) {
1991 d=(modrm >> 3) & 0x7;
1992 MMX(d).w[0]=(MMX(d).w[0] == MMX(s).w[0]) ? 0xffff : 0;
1993 MMX(d).w[1]=(MMX(d).w[1] == MMX(s).w[1]) ? 0xffff : 0;
1994 MMX(d).w[2]=(MMX(d).w[2] == MMX(s).w[2]) ? 0xffff : 0;
1995 MMX(d).w[3]=(MMX(d).w[3] == MMX(s).w[3]) ? 0xffff : 0;
1998 int d=(modrm >> 3) & 0x7;
1999 UINT32 ea = GetEA(cpustate, modrm, 0);
2000 READMMX(cpustate, ea, s);
2001 MMX(d).w[0]=(MMX(d).w[0] == s.w[0]) ? 0xffff : 0;
2002 MMX(d).w[1]=(MMX(d).w[1] == s.w[1]) ? 0xffff : 0;
2003 MMX(d).w[2]=(MMX(d).w[2] == s.w[2]) ? 0xffff : 0;
2004 MMX(d).w[3]=(MMX(d).w[3] == s.w[3]) ? 0xffff : 0;
2006 CYCLES(cpustate,1); // TODO: correct cycle count
2009 static void MMXOP(pcmpeqd_r64_rm64)(i386_state *cpustate) // Opcode 0f 76
2011 MMXPROLOG(cpustate);
2012 UINT8 modrm = FETCH(cpustate);
2013 if( modrm >= 0xc0 ) {
2016 d=(modrm >> 3) & 0x7;
2017 MMX(d).d[0]=(MMX(d).d[0] == MMX(s).d[0]) ? 0xffffffff : 0;
2018 MMX(d).d[1]=(MMX(d).d[1] == MMX(s).d[1]) ? 0xffffffff : 0;
2021 int d=(modrm >> 3) & 0x7;
2022 UINT32 ea = GetEA(cpustate, modrm, 0);
2023 READMMX(cpustate, ea, s);
2024 MMX(d).d[0]=(MMX(d).d[0] == s.d[0]) ? 0xffffffff : 0;
2025 MMX(d).d[1]=(MMX(d).d[1] == s.d[1]) ? 0xffffffff : 0;
2027 CYCLES(cpustate,1); // TODO: correct cycle count
2030 static void MMXOP(pshufw_r64_rm64_i8)(i386_state *cpustate) // Opcode 0f 70
2032 MMXPROLOG(cpustate);
2033 UINT8 modrm = FETCH(cpustate);
2034 if( modrm >= 0xc0 ) {
2037 UINT8 imm8 = FETCH(cpustate);
2039 d=(modrm >> 3) & 0x7;
2041 MMX(d).w[0]=t.w[imm8 & 3];
2042 MMX(d).w[1]=t.w[(imm8 >> 2) & 3];
2043 MMX(d).w[2]=t.w[(imm8 >> 4) & 3];
2044 MMX(d).w[3]=t.w[(imm8 >> 6) & 3];
2047 int d=(modrm >> 3) & 0x7;
2048 UINT32 ea = GetEA(cpustate, modrm, 0);
2049 UINT8 imm8 = FETCH(cpustate);
2050 READMMX(cpustate, ea, s);
2051 MMX(d).w[0]=s.w[imm8 & 3];
2052 MMX(d).w[1]=s.w[(imm8 >> 2) & 3];
2053 MMX(d).w[2]=s.w[(imm8 >> 4) & 3];
2054 MMX(d).w[3]=s.w[(imm8 >> 6) & 3];
2056 CYCLES(cpustate,1); // TODO: correct cycle count
2059 static void MMXOP(punpcklbw_r64_r64m32)(i386_state *cpustate) // Opcode 0f 60
2061 MMXPROLOG(cpustate);
2062 UINT8 modrm = FETCH(cpustate);
2063 if( modrm >= 0xc0 ) {
2067 d=(modrm >> 3) & 0x7;
2069 MMX(d).b[0]=t & 0xff;
2070 MMX(d).b[1]=MMX(s).b[0];
2071 MMX(d).b[2]=(t >> 8) & 0xff;
2072 MMX(d).b[3]=MMX(s).b[1];
2073 MMX(d).b[4]=(t >> 16) & 0xff;
2074 MMX(d).b[5]=MMX(s).b[2];
2075 MMX(d).b[6]=(t >> 24) & 0xff;
2076 MMX(d).b[7]=MMX(s).b[3];
2079 int d=(modrm >> 3) & 0x7;
2080 UINT32 ea = GetEA(cpustate, modrm, 0);
2081 s = READ32(cpustate, ea);
2083 MMX(d).b[0]=t & 0xff;
2084 MMX(d).b[1]=s & 0xff;
2085 MMX(d).b[2]=(t >> 8) & 0xff;
2086 MMX(d).b[3]=(s >> 8) & 0xff;
2087 MMX(d).b[4]=(t >> 16) & 0xff;
2088 MMX(d).b[5]=(s >> 16) & 0xff;
2089 MMX(d).b[6]=(t >> 24) & 0xff;
2090 MMX(d).b[7]=(s >> 24) & 0xff;
2092 CYCLES(cpustate,1); // TODO: correct cycle count
2095 static void MMXOP(punpcklwd_r64_r64m32)(i386_state *cpustate) // Opcode 0f 61
2097 MMXPROLOG(cpustate);
2098 UINT8 modrm = FETCH(cpustate);
2099 if( modrm >= 0xc0 ) {
2103 d=(modrm >> 3) & 0x7;
2105 MMX(d).w[0]=MMX(d).w[0];
2106 MMX(d).w[1]=MMX(s).w[0];
2108 MMX(d).w[3]=MMX(s).w[1];
2112 int d=(modrm >> 3) & 0x7;
2113 UINT32 ea = GetEA(cpustate, modrm, 0);
2114 s = READ32(cpustate, ea);
2116 MMX(d).w[0]=MMX(d).w[0];
2117 MMX(d).w[1]=s & 0xffff;
2119 MMX(d).w[3]=(s >> 16) & 0xffff;
2121 CYCLES(cpustate,1); // TODO: correct cycle count
2124 static void MMXOP(punpckldq_r64_r64m32)(i386_state *cpustate) // Opcode 0f 62
2126 MMXPROLOG(cpustate);
2127 UINT8 modrm = FETCH(cpustate);
2128 if( modrm >= 0xc0 ) {
2131 d=(modrm >> 3) & 0x7;
2132 MMX(d).d[0]=MMX(d).d[0];
2133 MMX(d).d[1]=MMX(s).d[0];
2136 int d=(modrm >> 3) & 0x7;
2137 UINT32 ea = GetEA(cpustate, modrm, 0);
2138 s = READ32(cpustate, ea);
2139 MMX(d).d[0]=MMX(d).d[0];
2142 CYCLES(cpustate,1); // TODO: correct cycle count
2145 static void MMXOP(packsswb_r64_rm64)(i386_state *cpustate) // Opcode 0f 63
2147 MMXPROLOG(cpustate);
2148 UINT8 modrm = FETCH(cpustate);
2149 if( modrm >= 0xc0 ) {
2152 d=(modrm >> 3) & 0x7;
2153 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2154 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2155 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2156 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2157 MMX(d).c[4]=SaturatedSignedWordToSignedByte(MMX(s).s[0]);
2158 MMX(d).c[5]=SaturatedSignedWordToSignedByte(MMX(s).s[1]);
2159 MMX(d).c[6]=SaturatedSignedWordToSignedByte(MMX(s).s[2]);
2160 MMX(d).c[7]=SaturatedSignedWordToSignedByte(MMX(s).s[3]);
2163 int d=(modrm >> 3) & 0x7;
2164 UINT32 ea = GetEA(cpustate, modrm, 0);
2165 READMMX(cpustate, ea, s);
2166 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2167 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2168 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2169 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2170 MMX(d).c[4]=SaturatedSignedWordToSignedByte(s.s[0]);
2171 MMX(d).c[5]=SaturatedSignedWordToSignedByte(s.s[1]);
2172 MMX(d).c[6]=SaturatedSignedWordToSignedByte(s.s[2]);
2173 MMX(d).c[7]=SaturatedSignedWordToSignedByte(s.s[3]);
2175 CYCLES(cpustate,1); // TODO: correct cycle count
2178 static void MMXOP(pcmpgtb_r64_rm64)(i386_state *cpustate) // Opcode 0f 64
2181 MMXPROLOG(cpustate);
2182 UINT8 modrm = FETCH(cpustate);
2183 if( modrm >= 0xc0 ) {
2186 d=(modrm >> 3) & 0x7;
2187 for (c=0;c <= 7;c++)
2188 MMX(d).b[c]=(MMX(d).c[c] > MMX(s).c[c]) ? 0xff : 0;
2191 int d=(modrm >> 3) & 0x7;
2192 UINT32 ea = GetEA(cpustate, modrm, 0);
2193 READMMX(cpustate, ea, s);
2194 for (c=0;c <= 7;c++)
2195 MMX(d).b[c]=(MMX(d).c[c] > s.c[c]) ? 0xff : 0;
2197 CYCLES(cpustate,1); // TODO: correct cycle count
2200 static void MMXOP(pcmpgtw_r64_rm64)(i386_state *cpustate) // Opcode 0f 65
2203 MMXPROLOG(cpustate);
2204 UINT8 modrm = FETCH(cpustate);
2205 if( modrm >= 0xc0 ) {
2208 d=(modrm >> 3) & 0x7;
2209 for (c=0;c <= 3;c++)
2210 MMX(d).w[c]=(MMX(d).s[c] > MMX(s).s[c]) ? 0xffff : 0;
2213 int d=(modrm >> 3) & 0x7;
2214 UINT32 ea = GetEA(cpustate, modrm, 0);
2215 READMMX(cpustate, ea, s);
2216 for (c=0;c <= 3;c++)
2217 MMX(d).w[c]=(MMX(d).s[c] > s.s[c]) ? 0xffff : 0;
2219 CYCLES(cpustate,1); // TODO: correct cycle count
2222 static void MMXOP(pcmpgtd_r64_rm64)(i386_state *cpustate) // Opcode 0f 66
2225 MMXPROLOG(cpustate);
2226 UINT8 modrm = FETCH(cpustate);
2227 if( modrm >= 0xc0 ) {
2230 d=(modrm >> 3) & 0x7;
2231 for (c=0;c <= 1;c++)
2232 MMX(d).d[c]=(MMX(d).i[c] > MMX(s).i[c]) ? 0xffffffff : 0;
2235 int d=(modrm >> 3) & 0x7;
2236 UINT32 ea = GetEA(cpustate, modrm, 0);
2237 READMMX(cpustate, ea, s);
2238 for (c=0;c <= 1;c++)
2239 MMX(d).d[c]=(MMX(d).i[c] > s.i[c]) ? 0xffffffff : 0;
2241 CYCLES(cpustate,1); // TODO: correct cycle count
2244 static void MMXOP(packuswb_r64_rm64)(i386_state *cpustate) // Opcode 0f 67
2246 MMXPROLOG(cpustate);
2247 UINT8 modrm = FETCH(cpustate);
2248 if( modrm >= 0xc0 ) {
2251 d=(modrm >> 3) & 0x7;
2252 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(MMX(d).s[0]);
2253 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(MMX(d).s[1]);
2254 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(MMX(d).s[2]);
2255 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(MMX(d).s[3]);
2256 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(MMX(s).s[0]);
2257 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(MMX(s).s[1]);
2258 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(MMX(s).s[2]);
2259 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(MMX(s).s[3]);
2262 int d=(modrm >> 3) & 0x7;
2263 UINT32 ea = GetEA(cpustate, modrm, 0);
2264 READMMX(cpustate, ea, s);
2265 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(MMX(d).s[0]);
2266 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(MMX(d).s[1]);
2267 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(MMX(d).s[2]);
2268 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(MMX(d).s[3]);
2269 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(s.s[0]);
2270 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(s.s[1]);
2271 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(s.s[2]);
2272 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(s.s[3]);
2274 CYCLES(cpustate,1); // TODO: correct cycle count
2277 static void MMXOP(punpckhbw_r64_rm64)(i386_state *cpustate) // Opcode 0f 68
2279 MMXPROLOG(cpustate);
2280 UINT8 modrm = FETCH(cpustate);
2281 if( modrm >= 0xc0 ) {
2284 d=(modrm >> 3) & 0x7;
2285 MMX(d).b[0]=MMX(d).b[4];
2286 MMX(d).b[1]=MMX(s).b[4];
2287 MMX(d).b[2]=MMX(d).b[5];
2288 MMX(d).b[3]=MMX(s).b[5];
2289 MMX(d).b[4]=MMX(d).b[6];
2290 MMX(d).b[5]=MMX(s).b[6];
2291 MMX(d).b[6]=MMX(d).b[7];
2292 MMX(d).b[7]=MMX(s).b[7];
2295 int d=(modrm >> 3) & 0x7;
2296 UINT32 ea = GetEA(cpustate, modrm, 0);
2297 READMMX(cpustate, ea, s);
2298 MMX(d).b[0]=MMX(d).b[4];
2300 MMX(d).b[2]=MMX(d).b[5];
2302 MMX(d).b[4]=MMX(d).b[6];
2304 MMX(d).b[6]=MMX(d).b[7];
2307 CYCLES(cpustate,1); // TODO: correct cycle count
2310 static void MMXOP(punpckhwd_r64_rm64)(i386_state *cpustate) // Opcode 0f 69
2312 MMXPROLOG(cpustate);
2313 UINT8 modrm = FETCH(cpustate);
2314 if( modrm >= 0xc0 ) {
2317 d=(modrm >> 3) & 0x7;
2318 MMX(d).w[0]=MMX(d).w[2];
2319 MMX(d).w[1]=MMX(s).w[2];
2320 MMX(d).w[2]=MMX(d).w[3];
2321 MMX(d).w[3]=MMX(s).w[3];
2324 int d=(modrm >> 3) & 0x7;
2325 UINT32 ea = GetEA(cpustate, modrm, 0);
2326 READMMX(cpustate, ea, s);
2327 MMX(d).w[0]=MMX(d).w[2];
2329 MMX(d).w[2]=MMX(d).w[3];
2332 CYCLES(cpustate,1); // TODO: correct cycle count
2335 static void MMXOP(punpckhdq_r64_rm64)(i386_state *cpustate) // Opcode 0f 6a
2337 MMXPROLOG(cpustate);
2338 UINT8 modrm = FETCH(cpustate);
2339 if( modrm >= 0xc0 ) {
2342 d=(modrm >> 3) & 0x7;
2343 MMX(d).d[0]=MMX(d).d[1];
2344 MMX(d).d[1]=MMX(s).d[1];
2347 int d=(modrm >> 3) & 0x7;
2348 UINT32 ea = GetEA(cpustate, modrm, 0);
2349 READMMX(cpustate, ea, s);
2350 MMX(d).d[0]=MMX(d).d[1];
2353 CYCLES(cpustate,1); // TODO: correct cycle count
2356 static void MMXOP(packssdw_r64_rm64)(i386_state *cpustate) // Opcode 0f 6b
2358 MMXPROLOG(cpustate);
2359 UINT8 modrm = FETCH(cpustate);
2360 if( modrm >= 0xc0 ) {
2363 d=(modrm >> 3) & 0x7;
2364 MMX(d).s[0]=SaturatedSignedDwordToSignedWord(MMX(d).i[0]);
2365 MMX(d).s[1]=SaturatedSignedDwordToSignedWord(MMX(d).i[1]);
2366 MMX(d).s[2]=SaturatedSignedDwordToSignedWord(MMX(s).i[0]);
2367 MMX(d).s[3]=SaturatedSignedDwordToSignedWord(MMX(s).i[1]);
2370 int d=(modrm >> 3) & 0x7;
2371 UINT32 ea = GetEA(cpustate, modrm, 0);
2372 READMMX(cpustate, ea, s);
2373 MMX(d).s[0]=SaturatedSignedDwordToSignedWord(MMX(d).i[0]);
2374 MMX(d).s[1]=SaturatedSignedDwordToSignedWord(MMX(d).i[1]);
2375 MMX(d).s[2]=SaturatedSignedDwordToSignedWord(s.i[0]);
2376 MMX(d).s[3]=SaturatedSignedDwordToSignedWord(s.i[1]);
2378 CYCLES(cpustate,1); // TODO: correct cycle count
2381 static void SSEOP(sse_group0fae)(i386_state *cpustate) // Opcode 0f ae
2383 UINT8 modm = FETCH(cpustate);
2384 if( modm == 0xf8 ) {
2385 logerror("Unemulated SFENCE opcode called\n");
2386 CYCLES(cpustate,1); // sfence instruction
2387 } else if( modm == 0xf0 ) {
2388 CYCLES(cpustate,1); // mfence instruction
2389 } else if( modm == 0xe8 ) {
2390 CYCLES(cpustate,1); // lfence instruction
2391 } else if( modm < 0xc0 ) {
2393 switch ( (modm & 0x38) >> 3 )
2395 case 2: // ldmxcsr m32
2396 ea = GetEA(cpustate, modm, 0);
2397 cpustate->mxcsr = READ32(cpustate, ea);
2399 case 3: // stmxcsr m32
2400 ea = GetEA(cpustate, modm, 0);
2401 WRITE32(cpustate, ea, cpustate->mxcsr);
2403 case 7: // clflush m8
2404 GetNonTranslatedEA(cpustate, modm, NULL);
2407 report_invalid_modrm(cpustate, "sse_group0fae", modm);
2410 report_invalid_modrm(cpustate, "sse_group0fae", modm);
2414 static void SSEOP(cvttps2dq_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 5b
2416 UINT8 modrm = FETCH(cpustate);
2417 if( modrm >= 0xc0 ) {
2418 XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM(modrm & 0x7).f[0];
2419 XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM(modrm & 0x7).f[1];
2420 XMM((modrm >> 3) & 0x7).i[2]=(INT32)XMM(modrm & 0x7).f[2];
2421 XMM((modrm >> 3) & 0x7).i[3]=(INT32)XMM(modrm & 0x7).f[3];
2424 UINT32 ea = GetEA(cpustate, modrm, 0);
2425 READXMM(cpustate, ea, src);
2426 XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f[0];
2427 XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f[1];
2428 XMM((modrm >> 3) & 0x7).i[2]=(INT32)src.f[2];
2429 XMM((modrm >> 3) & 0x7).i[3]=(INT32)src.f[3];
2431 CYCLES(cpustate,1); // TODO: correct cycle count
2434 static void SSEOP(cvtss2sd_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 5a
2436 UINT8 modrm = FETCH(cpustate);
2437 if( modrm >= 0xc0 ) {
2438 XMM((modrm >> 3) & 0x7).f64[0] = XMM(modrm & 0x7).f[0];
2441 UINT32 ea = GetEA(cpustate, modrm, 0);
2442 s.d[0] = READ32(cpustate, ea);
2443 XMM((modrm >> 3) & 0x7).f64[0] = s.f[0];
2445 CYCLES(cpustate,1); // TODO: correct cycle count
2448 static void SSEOP(cvttss2si_r32_r128m32)(i386_state *cpustate) // Opcode f3 0f 2c
2451 UINT8 modrm = FETCH(cpustate); // get mordm byte
2452 if( modrm >= 0xc0 ) { // if bits 7-6 are 11 the source is a xmm register (low doubleword)
2453 src = (INT32)XMM(modrm & 0x7).f[0^NATIVE_ENDIAN_VALUE_LE_BE(0,1)];
2454 } else { // otherwise is a memory address
2456 UINT32 ea = GetEA(cpustate, modrm, 0);
2457 t.d[0] = READ32(cpustate, ea);
2458 src = (INT32)t.f[0];
2460 STORE_REG32(modrm, (UINT32)src);
2461 CYCLES(cpustate,1); // TODO: correct cycle count
2464 static void SSEOP(cvtss2si_r32_r128m32)(i386_state *cpustate) // Opcode f3 0f 2d
2467 UINT8 modrm = FETCH(cpustate);
2468 if( modrm >= 0xc0 ) {
2469 src = (INT32)XMM(modrm & 0x7).f[0];
2472 UINT32 ea = GetEA(cpustate, modrm, 0);
2473 t.d[0] = READ32(cpustate, ea);
2474 src = (INT32)t.f[0];
2476 STORE_REG32(modrm, (UINT32)src);
2477 CYCLES(cpustate,1); // TODO: correct cycle count
2480 static void SSEOP(cvtsi2ss_r128_rm32)(i386_state *cpustate) // Opcode f3 0f 2a
2482 UINT8 modrm = FETCH(cpustate);
2483 if( modrm >= 0xc0 ) {
2484 XMM((modrm >> 3) & 0x7).f[0] = (INT32)LOAD_RM32(modrm);
2486 UINT32 ea = GetEA(cpustate, modrm, 0);
2487 XMM((modrm >> 3) & 0x7).f[0] = (INT32)READ32(cpustate, ea);
2489 CYCLES(cpustate,1); // TODO: correct cycle count
2492 static void SSEOP(cvtpi2ps_r128_rm64)(i386_state *cpustate) // Opcode 0f 2a
2494 UINT8 modrm = FETCH(cpustate);
2495 MMXPROLOG(cpustate);
2496 if( modrm >= 0xc0 ) {
2497 XMM((modrm >> 3) & 0x7).f[0] = MMX(modrm & 0x7).i[0];
2498 XMM((modrm >> 3) & 0x7).f[1] = MMX(modrm & 0x7).i[1];
2501 UINT32 ea = GetEA(cpustate, modrm, 0);
2502 READMMX(cpustate, ea, r);
2503 XMM((modrm >> 3) & 0x7).f[0] = r.i[0];
2504 XMM((modrm >> 3) & 0x7).f[1] = r.i[1];
2506 CYCLES(cpustate,1); // TODO: correct cycle count
2509 static void SSEOP(cvttps2pi_r64_r128m64)(i386_state *cpustate) // Opcode 0f 2c
2511 UINT8 modrm = FETCH(cpustate);
2512 MMXPROLOG(cpustate);
2513 if( modrm >= 0xc0 ) {
2514 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2515 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2518 UINT32 ea = GetEA(cpustate, modrm, 0);
2519 READXMM(cpustate, ea, r);
2520 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2521 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2523 CYCLES(cpustate,1); // TODO: correct cycle count
2526 static void SSEOP(cvtps2pi_r64_r128m64)(i386_state *cpustate) // Opcode 0f 2d
2528 UINT8 modrm = FETCH(cpustate);
2529 MMXPROLOG(cpustate);
2530 if( modrm >= 0xc0 ) {
2531 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2532 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2535 UINT32 ea = GetEA(cpustate, modrm, 0);
2536 READXMM(cpustate, ea, r);
2537 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2538 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2540 CYCLES(cpustate,1); // TODO: correct cycle count
2543 static void SSEOP(cvtps2pd_r128_r128m64)(i386_state *cpustate) // Opcode 0f 5a
2545 UINT8 modrm = FETCH(cpustate);
2546 if( modrm >= 0xc0 ) {
2547 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).f[0];
2548 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).f[1];
2551 UINT32 ea = GetEA(cpustate, modrm, 0);
2552 READMMX(cpustate, ea, r);
2553 XMM((modrm >> 3) & 0x7).f64[0] = (double)r.f[0];
2554 XMM((modrm >> 3) & 0x7).f64[1] = (double)r.f[1];
2556 CYCLES(cpustate,1); // TODO: correct cycle count
2559 static void SSEOP(cvtdq2ps_r128_rm128)(i386_state *cpustate) // Opcode 0f 5b
2561 UINT8 modrm = FETCH(cpustate);
2562 if( modrm >= 0xc0 ) {
2563 XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).i[0];
2564 XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).i[1];
2565 XMM((modrm >> 3) & 0x7).f[2] = (float)XMM(modrm & 0x7).i[2];
2566 XMM((modrm >> 3) & 0x7).f[3] = (float)XMM(modrm & 0x7).i[3];
2569 UINT32 ea = GetEA(cpustate, modrm, 0);
2570 READXMM(cpustate, ea, r);
2571 XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0];
2572 XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1];
2573 XMM((modrm >> 3) & 0x7).f[2] = (float)r.i[2];
2574 XMM((modrm >> 3) & 0x7).f[3] = (float)r.i[3];
2576 CYCLES(cpustate,1); // TODO: correct cycle count
2579 static void SSEOP(cvtdq2pd_r128_r128m64)(i386_state *cpustate) // Opcode f3 0f e6
2581 UINT8 modrm = FETCH(cpustate);
2582 if( modrm >= 0xc0 ) {
2583 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).i[0];
2584 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).i[1];
2587 UINT32 ea = GetEA(cpustate, modrm, 0);
2588 READMMX(cpustate, ea, s);
2589 XMM((modrm >> 3) & 0x7).f64[0] = (double)s.i[0];
2590 XMM((modrm >> 3) & 0x7).f64[1] = (double)s.i[1];
2592 CYCLES(cpustate,1); // TODO: correct cycle count
2595 static void SSEOP(movss_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 10
2597 UINT8 modrm = FETCH(cpustate);
2598 if( modrm >= 0xc0 ) {
2599 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
2601 UINT32 ea = GetEA(cpustate, modrm, 0);
2602 XMM((modrm >> 3) & 0x7).d[0] = READ32(cpustate, ea);
2604 CYCLES(cpustate,1); // TODO: correct cycle count
2607 static void SSEOP(movss_rm128_r128)(i386_state *cpustate) // Opcode f3 0f 11
2609 UINT8 modrm = FETCH(cpustate);
2610 if( modrm >= 0xc0 ) {
2611 XMM(modrm & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0];
2613 UINT32 ea = GetEA(cpustate, modrm, 0);
2614 WRITE32(cpustate, ea, XMM((modrm >> 3) & 0x7).d[0]);
2616 CYCLES(cpustate,1); // TODO: correct cycle count
2619 static void SSEOP(movsldup_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 12
2621 UINT8 modrm = FETCH(cpustate);
2622 if( modrm >= 0xc0 ) {
2623 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
2624 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[0];
2625 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[2];
2626 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[2];
2629 UINT32 ea = GetEA(cpustate, modrm, 0);
2630 READXMM(cpustate, ea, src);
2631 XMM((modrm >> 3) & 0x7).d[0] = src.d[0];
2632 XMM((modrm >> 3) & 0x7).d[1] = src.d[0];
2633 XMM((modrm >> 3) & 0x7).d[2] = src.d[2];
2634 XMM((modrm >> 3) & 0x7).d[3] = src.d[2];
2636 CYCLES(cpustate,1); // TODO: correct cycle count
2639 static void SSEOP(movshdup_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 16
2641 UINT8 modrm = FETCH(cpustate);
2642 if( modrm >= 0xc0 ) {
2643 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[1];
2644 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[1];
2645 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[3];
2646 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[3];
2649 UINT32 ea = GetEA(cpustate, modrm, 0);
2650 READXMM(cpustate, ea, src);
2651 XMM((modrm >> 3) & 0x7).d[0] = src.d[1];
2652 XMM((modrm >> 3) & 0x7).d[1] = src.d[1];
2653 XMM((modrm >> 3) & 0x7).d[2] = src.d[3];
2654 XMM((modrm >> 3) & 0x7).d[3] = src.d[3];
2656 CYCLES(cpustate,1); // TODO: correct cycle count
2659 static void SSEOP(movaps_r128_rm128)(i386_state *cpustate) // Opcode 0f 28
2661 UINT8 modrm = FETCH(cpustate);
2662 if( modrm >= 0xc0 ) {
2663 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
2665 UINT32 ea = GetEA(cpustate, modrm, 0);
2666 READXMM(cpustate, ea, XMM((modrm >> 3) & 0x7));
2668 CYCLES(cpustate,1); // TODO: correct cycle count
2671 static void SSEOP(movaps_rm128_r128)(i386_state *cpustate) // Opcode 0f 29
2673 UINT8 modrm = FETCH(cpustate);
2674 if( modrm >= 0xc0 ) {
2675 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
2677 UINT32 ea = GetEA(cpustate, modrm, 0);
2678 WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7));
2680 CYCLES(cpustate,1); // TODO: correct cycle count
2683 static void SSEOP(movups_r128_rm128)(i386_state *cpustate) // Opcode 0f 10
2685 UINT8 modrm = FETCH(cpustate);
2686 if( modrm >= 0xc0 ) {
2687 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
2689 UINT32 ea = GetEA(cpustate, modrm, 0);
2690 READXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
2692 CYCLES(cpustate,1); // TODO: correct cycle count
2695 static void SSEOP(movups_rm128_r128)(i386_state *cpustate) // Opcode 0f 11
2697 UINT8 modrm = FETCH(cpustate);
2698 if( modrm >= 0xc0 ) {
2699 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
2701 UINT32 ea = GetEA(cpustate, modrm, 0);
2702 WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
2704 CYCLES(cpustate,1); // TODO: correct cycle count
2707 static void SSEOP(movlps_r128_m64)(i386_state *cpustate) // Opcode 0f 12
2709 UINT8 modrm = FETCH(cpustate);
2710 if( modrm >= 0xc0 ) {
2711 // unsupported by cpu
2712 CYCLES(cpustate,1); // TODO: correct cycle count
2714 UINT32 ea = GetEA(cpustate, modrm, 0);
2715 READXMM_LO64(cpustate, ea, XMM((modrm >> 3) & 0x7));
2716 CYCLES(cpustate,1); // TODO: correct cycle count
2720 static void SSEOP(movlps_m64_r128)(i386_state *cpustate) // Opcode 0f 13
2722 UINT8 modrm = FETCH(cpustate);
2723 if( modrm >= 0xc0 ) {
2724 // unsupported by cpu
2725 CYCLES(cpustate,1); // TODO: correct cycle count
2727 UINT32 ea = GetEA(cpustate, modrm, 0);
2728 WRITEXMM_LO64(cpustate, ea, XMM((modrm >> 3) & 0x7));
2729 CYCLES(cpustate,1); // TODO: correct cycle count
2733 static void SSEOP(movhps_r128_m64)(i386_state *cpustate) // Opcode 0f 16
2735 UINT8 modrm = FETCH(cpustate);
2736 if( modrm >= 0xc0 ) {
2737 // unsupported by cpu
2738 CYCLES(cpustate,1); // TODO: correct cycle count
2740 UINT32 ea = GetEA(cpustate, modrm, 0);
2741 READXMM_HI64(cpustate, ea, XMM((modrm >> 3) & 0x7));
2742 CYCLES(cpustate,1); // TODO: correct cycle count
2746 static void SSEOP(movhps_m64_r128)(i386_state *cpustate) // Opcode 0f 17
2748 UINT8 modrm = FETCH(cpustate);
2749 if( modrm >= 0xc0 ) {
2750 // unsupported by cpu
2751 CYCLES(cpustate,1); // TODO: correct cycle count
2753 UINT32 ea = GetEA(cpustate, modrm, 0);
2754 WRITEXMM_HI64(cpustate,ea, XMM((modrm >> 3) & 0x7));
2755 CYCLES(cpustate,1); // TODO: correct cycle count
2759 static void SSEOP(movntps_m128_r128)(i386_state *cpustate) // Opcode 0f 2b
2761 UINT8 modrm = FETCH(cpustate);
2762 if( modrm >= 0xc0 ) {
2763 // unsupported by cpu
2764 CYCLES(cpustate,1); // TODO: correct cycle count
2766 // since cache is not implemented
2767 UINT32 ea = GetEA(cpustate, modrm, 0);
2768 WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7));
2769 CYCLES(cpustate,1); // TODO: correct cycle count
2773 static void SSEOP(movmskps_r16_r128)(i386_state *cpustate) // Opcode 0f 50
2775 UINT8 modrm = FETCH(cpustate);
2776 if( modrm >= 0xc0 ) {
2778 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
2779 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
2780 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
2781 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
2782 STORE_REG16(modrm, b);
2784 CYCLES(cpustate,1); // TODO: correct cycle count
2787 static void SSEOP(movmskps_r32_r128)(i386_state *cpustate) // Opcode 0f 50
2789 UINT8 modrm = FETCH(cpustate);
2790 if( modrm >= 0xc0 ) {
2792 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
2793 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
2794 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
2795 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
2796 STORE_REG32(modrm, b);
2798 CYCLES(cpustate,1); // TODO: correct cycle count
2801 static void SSEOP(movq2dq_r128_r64)(i386_state *cpustate) // Opcode f3 0f d6
2803 MMXPROLOG(cpustate);
2804 UINT8 modrm = FETCH(cpustate);
2805 if( modrm >= 0xc0 ) {
2806 XMM((modrm >> 3) & 0x7).q[0] = MMX(modrm & 7).q;
2807 XMM((modrm >> 3) & 0x7).q[1] = 0;
2809 CYCLES(cpustate,1); // TODO: correct cycle count
2812 static void SSEOP(movdqu_r128_rm128)(i386_state *cpustate) // Opcode f3 0f 6f
2814 MMXPROLOG(cpustate);
2815 UINT8 modrm = FETCH(cpustate);
2816 if( modrm >= 0xc0 ) {
2817 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
2818 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1];
2820 UINT32 ea = GetEA(cpustate, modrm, 0);
2821 READXMM(cpustate, ea, XMM((modrm >> 3) & 0x7));
2823 CYCLES(cpustate,1); // TODO: correct cycle count
2826 static void SSEOP(movdqu_rm128_r128)(i386_state *cpustate) // Opcode f3 0f 7f
2828 MMXPROLOG(cpustate);
2829 UINT8 modrm = FETCH(cpustate);
2830 if( modrm >= 0xc0 ) {
2831 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
2832 XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1];
2834 UINT32 ea = GetEA(cpustate, modrm, 0);
2835 WRITEXMM(cpustate, ea, XMM((modrm >> 3) & 0x7));
2837 CYCLES(cpustate,1); // TODO: correct cycle count
2840 static void SSEOP(movq_r128_r128m64)(i386_state *cpustate) // Opcode f3 0f 7e
2842 MMXPROLOG(cpustate);
2843 UINT8 modrm = FETCH(cpustate);
2844 if( modrm >= 0xc0 ) {
2845 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
2846 XMM((modrm >> 3) & 0x7).q[1] = 0;
2848 UINT32 ea = GetEA(cpustate, modrm, 0);
2849 XMM((modrm >> 3) & 0x7).q[0] = READ64(cpustate,ea);
2850 XMM((modrm >> 3) & 0x7).q[1] = 0;
2852 CYCLES(cpustate,1); // TODO: correct cycle count
2855 static void SSEOP(pmovmskb_r16_r64)(i386_state *cpustate) // Opcode 0f d7
2857 //MMXPROLOG(cpustate);
2858 UINT8 modrm = FETCH(cpustate);
2859 if( modrm >= 0xc0 ) {
2861 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
2862 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
2863 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
2864 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
2865 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
2866 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
2867 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
2868 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
2869 STORE_REG16(modrm, b);
2871 CYCLES(cpustate,1); // TODO: correct cycle count
2874 static void SSEOP(pmovmskb_r32_r64)(i386_state *cpustate) // Opcode 0f d7
2876 //MMXPROLOG(cpustate);
2877 UINT8 modrm = FETCH(cpustate);
2878 if( modrm >= 0xc0 ) {
2880 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
2881 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
2882 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
2883 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
2884 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
2885 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
2886 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
2887 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
2888 STORE_REG32(modrm, b);
2890 CYCLES(cpustate,1); // TODO: correct cycle count
2893 static void SSEOP(xorps)(i386_state *cpustate) // Opcode 0f 57
2895 UINT8 modrm = FETCH(cpustate);
2896 if( modrm >= 0xc0 ) {
2897 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ XMM(modrm & 0x7).d[0];
2898 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ XMM(modrm & 0x7).d[1];
2899 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ XMM(modrm & 0x7).d[2];
2900 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ XMM(modrm & 0x7).d[3];
2903 UINT32 ea = GetEA(cpustate, modrm, 0);
2904 READXMM(cpustate, ea, src);
2905 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ src.d[0];
2906 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ src.d[1];
2907 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ src.d[2];
2908 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ src.d[3];
2910 CYCLES(cpustate,1); // TODO: correct cycle count
2913 static void SSEOP(addps)(i386_state *cpustate) // Opcode 0f 58
2915 UINT8 modrm = FETCH(cpustate);
2916 if( modrm >= 0xc0 ) {
2917 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
2918 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + XMM(modrm & 0x7).f[1];
2919 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + XMM(modrm & 0x7).f[2];
2920 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + XMM(modrm & 0x7).f[3];
2923 UINT32 ea = GetEA(cpustate, modrm, 0);
2924 READXMM(cpustate, ea, src);
2925 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
2926 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + src.f[1];
2927 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + src.f[2];
2928 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + src.f[3];
2930 CYCLES(cpustate,1); // TODO: correct cycle count
2933 static void SSEOP(sqrtps_r128_rm128)(i386_state *cpustate) // Opcode 0f 51
2935 UINT8 modrm = FETCH(cpustate);
2936 if( modrm >= 0xc0 ) {
2937 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
2938 XMM((modrm >> 3) & 0x7).f[1] = sqrt(XMM(modrm & 0x7).f[1]);
2939 XMM((modrm >> 3) & 0x7).f[2] = sqrt(XMM(modrm & 0x7).f[2]);
2940 XMM((modrm >> 3) & 0x7).f[3] = sqrt(XMM(modrm & 0x7).f[3]);
2943 UINT32 ea = GetEA(cpustate, modrm, 0);
2944 READXMM(cpustate, ea, src);
2945 XMM((modrm >> 3) & 0x7).f[0] = sqrt(src.f[0]);
2946 XMM((modrm >> 3) & 0x7).f[1] = sqrt(src.f[1]);
2947 XMM((modrm >> 3) & 0x7).f[2] = sqrt(src.f[2]);
2948 XMM((modrm >> 3) & 0x7).f[3] = sqrt(src.f[3]);
2950 CYCLES(cpustate,1); // TODO: correct cycle count
2953 static void SSEOP(rsqrtps_r128_rm128)(i386_state *cpustate) // Opcode 0f 52
2955 UINT8 modrm = FETCH(cpustate);
2956 if( modrm >= 0xc0 ) {
2957 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
2958 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(XMM(modrm & 0x7).f[1]);
2959 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(XMM(modrm & 0x7).f[2]);
2960 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(XMM(modrm & 0x7).f[3]);
2963 UINT32 ea = GetEA(cpustate, modrm, 0);
2964 READXMM(cpustate, ea, src);
2965 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(src.f[0]);
2966 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(src.f[1]);
2967 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(src.f[2]);
2968 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(src.f[3]);
2970 CYCLES(cpustate,1); // TODO: correct cycle count
2973 static void SSEOP(rcpps_r128_rm128)(i386_state *cpustate) // Opcode 0f 53
2975 UINT8 modrm = FETCH(cpustate);
2976 if( modrm >= 0xc0 ) {
2977 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / XMM(modrm & 0x7).f[0];
2978 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / XMM(modrm & 0x7).f[1];
2979 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / XMM(modrm & 0x7).f[2];
2980 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / XMM(modrm & 0x7).f[3];
2983 UINT32 ea = GetEA(cpustate, modrm, 0);
2984 READXMM(cpustate, ea, src);
2985 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / src.f[0];
2986 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / src.f[1];
2987 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / src.f[2];
2988 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / src.f[3];
2990 CYCLES(cpustate,1); // TODO: correct cycle count
2993 static void SSEOP(andps_r128_rm128)(i386_state *cpustate) // Opcode 0f 54
2995 UINT8 modrm = FETCH(cpustate);
2996 if( modrm >= 0xc0 ) {
2997 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0];
2998 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1];
3001 UINT32 ea = GetEA(cpustate, modrm, 0);
3002 READXMM(cpustate, ea, src);
3003 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
3004 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
3006 CYCLES(cpustate,1); // TODO: correct cycle count
3009 static void SSEOP(andnps_r128_rm128)(i386_state *cpustate) // Opcode 0f 55
3011 UINT8 modrm = FETCH(cpustate);
3012 if( modrm >= 0xc0 ) {
3013 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0];
3014 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1];
3017 UINT32 ea = GetEA(cpustate, modrm, 0);
3018 READXMM(cpustate, ea, src);
3019 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
3020 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
3022 CYCLES(cpustate,1); // TODO: correct cycle count
3025 static void SSEOP(orps_r128_rm128)(i386_state *cpustate) // Opcode 0f 56
3027 UINT8 modrm = FETCH(cpustate);
3028 if( modrm >= 0xc0 ) {
3029 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0];
3030 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1];
3033 UINT32 ea = GetEA(cpustate, modrm, 0);
3034 READXMM(cpustate, ea, src);
3035 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0];
3036 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1];
3038 CYCLES(cpustate,1); // TODO: correct cycle count
3041 static void SSEOP(mulps)(i386_state *cpustate) // Opcode 0f 59 ????
3043 UINT8 modrm = FETCH(cpustate);
3044 if( modrm >= 0xc0 ) {
3045 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3046 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * XMM(modrm & 0x7).f[1];
3047 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * XMM(modrm & 0x7).f[2];
3048 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * XMM(modrm & 0x7).f[3];
3051 UINT32 ea = GetEA(cpustate, modrm, 0);
3052 READXMM(cpustate, ea, src);
3053 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3054 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * src.f[1];
3055 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * src.f[2];
3056 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * src.f[3];
3058 CYCLES(cpustate,1); // TODO: correct cycle count
3061 static void SSEOP(subps)(i386_state *cpustate) // Opcode 0f 5c
3063 UINT8 modrm = FETCH(cpustate);
3064 if( modrm >= 0xc0 ) {
3065 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3066 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - XMM(modrm & 0x7).f[1];
3067 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2];
3068 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - XMM(modrm & 0x7).f[3];
3071 UINT32 ea = GetEA(cpustate, modrm, 0);
3072 READXMM(cpustate, ea, src);
3073 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3074 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - src.f[1];
3075 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - src.f[2];
3076 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - src.f[3];
3078 CYCLES(cpustate,1); // TODO: correct cycle count
3081 INLINE float sse_min_single(float src1, float src2)
3083 /*if ((src1 == 0) && (src2 == 0))
3094 static void SSEOP(minps)(i386_state *cpustate) // Opcode 0f 5d
3096 UINT8 modrm = FETCH(cpustate);
3097 if( modrm >= 0xc0 ) {
3098 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3099 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3100 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3101 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3104 UINT32 ea = GetEA(cpustate, modrm, 0);
3105 READXMM(cpustate, ea, src);
3106 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3107 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3108 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3109 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3111 CYCLES(cpustate,1); // TODO: correct cycle count
3114 static void SSEOP(divps)(i386_state *cpustate) // Opcode 0f 5e
3116 UINT8 modrm = FETCH(cpustate);
3117 if( modrm >= 0xc0 ) {
3118 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3119 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / XMM(modrm & 0x7).f[1];
3120 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / XMM(modrm & 0x7).f[2];
3121 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / XMM(modrm & 0x7).f[3];
3124 UINT32 ea = GetEA(cpustate, modrm, 0);
3125 READXMM(cpustate, ea, src);
3126 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3127 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / src.f[1];
3128 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / src.f[2];
3129 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / src.f[3];
3131 CYCLES(cpustate,1); // TODO: correct cycle count
3134 INLINE float sse_max_single(float src1, float src2)
3136 /*if ((src1 == 0) && (src2 == 0))
3147 static void SSEOP(maxps)(i386_state *cpustate) // Opcode 0f 5f
3149 UINT8 modrm = FETCH(cpustate);
3150 if( modrm >= 0xc0 ) {
3151 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3152 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3153 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3154 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3157 UINT32 ea = GetEA(cpustate, modrm, 0);
3158 READXMM(cpustate, ea, src);
3159 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3160 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3161 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3162 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3164 CYCLES(cpustate,1); // TODO: correct cycle count
3167 static void SSEOP(maxss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 5f
3169 UINT8 modrm = FETCH(cpustate);
3170 if( modrm >= 0xc0 ) {
3171 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3174 UINT32 ea = GetEA(cpustate, modrm, 0);
3175 src.d[0]=READ32(cpustate, ea);
3176 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3178 CYCLES(cpustate,1); // TODO: correct cycle count
3181 static void SSEOP(addss)(i386_state *cpustate) // Opcode f3 0f 58
3183 UINT8 modrm = FETCH(cpustate);
3184 if( modrm >= 0xc0 ) {
3185 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
3188 UINT32 ea = GetEA(cpustate, modrm, 0);
3189 READXMM(cpustate, ea, src);
3190 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
3192 CYCLES(cpustate,1); // TODO: correct cycle count
3195 static void SSEOP(subss)(i386_state *cpustate) // Opcode f3 0f 5c
3197 UINT8 modrm = FETCH(cpustate);
3198 if( modrm >= 0xc0 ) {
3199 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3202 UINT32 ea = GetEA(cpustate, modrm, 0);
3203 READXMM(cpustate, ea, src);
3204 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3206 CYCLES(cpustate,1); // TODO: correct cycle count
3209 static void SSEOP(mulss)(i386_state *cpustate) // Opcode f3 0f 5e
3211 UINT8 modrm = FETCH(cpustate);
3212 if( modrm >= 0xc0 ) {
3213 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3216 UINT32 ea = GetEA(cpustate, modrm, 0);
3217 READXMM(cpustate, ea, src);
3218 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3220 CYCLES(cpustate,1); // TODO: correct cycle count
3223 static void SSEOP(divss)(i386_state *cpustate) // Opcode 0f 59
3225 UINT8 modrm = FETCH(cpustate);
3226 if( modrm >= 0xc0 ) {
3227 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3230 UINT32 ea = GetEA(cpustate, modrm, 0);
3231 READXMM(cpustate, ea, src);
3232 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3234 CYCLES(cpustate,1); // TODO: correct cycle count
3237 static void SSEOP(rcpss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 53
3239 UINT8 modrm = FETCH(cpustate);
3240 if( modrm >= 0xc0 ) {
3241 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / XMM(modrm & 0x7).f[0];
3244 UINT32 ea = GetEA(cpustate, modrm, 0);
3245 s.d[0]=READ32(cpustate, ea);
3246 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / s.f[0];
3248 CYCLES(cpustate,1); // TODO: correct cycle count
3251 static void SSEOP(sqrtss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 51
3253 UINT8 modrm = FETCH(cpustate);
3254 if( modrm >= 0xc0 ) {
3255 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
3258 UINT32 ea = GetEA(cpustate, modrm, 0);
3259 s.d[0]=READ32(cpustate, ea);
3260 XMM((modrm >> 3) & 0x7).f[0] = sqrt(s.f[0]);
3262 CYCLES(cpustate,1); // TODO: correct cycle count
3265 static void SSEOP(rsqrtss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 52
3267 UINT8 modrm = FETCH(cpustate);
3268 if( modrm >= 0xc0 ) {
3269 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
3272 UINT32 ea = GetEA(cpustate, modrm, 0);
3273 s.d[0]=READ32(cpustate, ea);
3274 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(s.f[0]);
3276 CYCLES(cpustate,1); // TODO: correct cycle count
3279 static void SSEOP(minss_r128_r128m32)(i386_state *cpustate) // Opcode f3 0f 5d
3281 UINT8 modrm = FETCH(cpustate);
3282 if( modrm >= 0xc0 ) {
3283 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < XMM(modrm & 0x7).f[0] ? XMM((modrm >> 3) & 0x7).f[0] : XMM(modrm & 0x7).f[0];
3286 UINT32 ea = GetEA(cpustate, modrm, 0);
3287 s.d[0] = READ32(cpustate, ea);
3288 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < s.f[0] ? XMM((modrm >> 3) & 0x7).f[0] : s.f[0];
3290 CYCLES(cpustate,1); // TODO: correct cycle count
3293 static void SSEOP(comiss_r128_r128m32)(i386_state *cpustate) // Opcode 0f 2f
3296 UINT8 modrm = FETCH(cpustate);
3297 if( modrm >= 0xc0 ) {
3298 a = XMM((modrm >> 3) & 0x7).d[0];
3299 b = XMM(modrm & 0x7).d[0];
3302 UINT32 ea = GetEA(cpustate, modrm, 0);
3303 READXMM(cpustate, ea, src);
3304 a = XMM((modrm >> 3) & 0x7).d[0];
3310 if (float32_is_nan(a) || float32_is_nan(b))
3321 if (float32_eq(a, b))
3323 if (float32_lt(a, b))
3326 // should generate exception when at least one of the operands is either QNaN or SNaN
3327 CYCLES(cpustate,1); // TODO: correct cycle count
3330 static void SSEOP(ucomiss_r128_r128m32)(i386_state *cpustate) // Opcode 0f 2e
3333 UINT8 modrm = FETCH(cpustate);
3334 if( modrm >= 0xc0 ) {
3335 a = XMM((modrm >> 3) & 0x7).d[0];
3336 b = XMM(modrm & 0x7).d[0];
3339 UINT32 ea = GetEA(cpustate, modrm, 0);
3340 READXMM(cpustate, ea, src);
3341 a = XMM((modrm >> 3) & 0x7).d[0];
3347 if (float32_is_nan(a) || float32_is_nan(b))
3358 if (float32_eq(a, b))
3360 if (float32_lt(a, b))
3363 // should generate exception when at least one of the operands is SNaN
3364 CYCLES(cpustate,1); // TODO: correct cycle count
3367 static void SSEOP(shufps)(i386_state *cpustate) // Opcode 0f 67
3369 UINT8 modrm = FETCH(cpustate);
3370 UINT8 sel = FETCH(cpustate);
3378 d=(modrm >> 3) & 0x7;
3379 if( modrm >= 0xc0 ) {
3382 XMM(d).d[1]=XMM(d).d[m2];
3384 XMM(d).d[2]=XMM(s).d[m3];
3385 XMM(d).d[3]=XMM(s).d[m4];
3389 UINT32 ea = GetEA(cpustate, modrm, 0);
3390 READXMM(cpustate, ea, src);
3392 XMM(d).d[1]=XMM(d).d[m2];
3394 XMM(d).d[2]=src.d[m3];
3395 XMM(d).d[3]=src.d[m4];
3397 CYCLES(cpustate,1); // TODO: correct cycle count
3400 static void SSEOP(unpcklps_r128_rm128)(i386_state *cpustate) // Opcode 0f 14
3402 UINT8 modrm = FETCH(cpustate);
3405 d=(modrm >> 3) & 0x7;
3406 if( modrm >= 0xc0 ) {
3407 XMM(d).d[3]=XMM(s).d[1];
3408 XMM(d).d[2]=XMM(d).d[1];
3409 XMM(d).d[1]=XMM(s).d[0];
3410 //XMM(d).d[0]=XMM(d).d[0];
3413 UINT32 ea = GetEA(cpustate, modrm, 0);
3414 READXMM(cpustate, ea, src);
3415 XMM(d).d[3]=src.d[1];
3416 XMM(d).d[2]=XMM(d).d[1];
3417 XMM(d).d[1]=src.d[0];
3419 CYCLES(cpustate,1); // TODO: correct cycle count
3422 static void SSEOP(unpckhps_r128_rm128)(i386_state *cpustate) // Opcode 0f 15
3424 UINT8 modrm = FETCH(cpustate);
3427 d=(modrm >> 3) & 0x7;
3428 if( modrm >= 0xc0 ) {
3429 XMM(d).d[0]=XMM(d).d[2];
3430 XMM(d).d[1]=XMM(s).d[2];
3431 XMM(d).d[2]=XMM(d).d[3];
3432 XMM(d).d[3]=XMM(s).d[3];
3435 UINT32 ea = GetEA(cpustate, modrm, 0);
3436 READXMM(cpustate, ea, src);
3437 XMM(d).d[0]=XMM(d).d[2];
3438 XMM(d).d[1]=src.d[2];
3439 XMM(d).d[2]=XMM(d).d[3];
3440 XMM(d).d[3]=src.d[3];
3442 CYCLES(cpustate,1); // TODO: correct cycle count
3445 INLINE bool sse_issingleordered(float op1, float op2)
3447 // TODO: true when at least one of the two source operands being compared is a NaN
3448 return (op1 != op1) || (op1 != op2);
3451 INLINE bool sse_issingleunordered(float op1, float op2)
3453 // TODO: true when neither source operand is a NaN
3454 return !((op1 != op1) || (op1 != op2));
3457 INLINE void sse_predicate_compare_single(UINT8 imm8, XMM_REG d, XMM_REG s)
3462 s.d[0]=s.f[0] == s.f[0] ? 0xffffffff : 0;
3463 d.d[1]=d.f[1] == s.f[1] ? 0xffffffff : 0;
3464 d.d[2]=d.f[2] == s.f[2] ? 0xffffffff : 0;
3465 d.d[3]=d.f[3] == s.f[3] ? 0xffffffff : 0;
3468 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
3469 d.d[1]=d.f[1] < s.f[1] ? 0xffffffff : 0;
3470 d.d[2]=d.f[2] < s.f[2] ? 0xffffffff : 0;
3471 d.d[3]=d.f[3] < s.f[3] ? 0xffffffff : 0;
3474 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
3475 d.d[1]=d.f[1] <= s.f[1] ? 0xffffffff : 0;
3476 d.d[2]=d.f[2] <= s.f[2] ? 0xffffffff : 0;
3477 d.d[3]=d.f[3] <= s.f[3] ? 0xffffffff : 0;
3480 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
3481 d.d[1]=sse_issingleunordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
3482 d.d[2]=sse_issingleunordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
3483 d.d[3]=sse_issingleunordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
3486 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
3487 d.d[1]=d.f[1] != s.f[1] ? 0xffffffff : 0;
3488 d.d[2]=d.f[2] != s.f[2] ? 0xffffffff : 0;
3489 d.d[3]=d.f[3] != s.f[3] ? 0xffffffff : 0;
3492 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
3493 d.d[1]=d.f[1] < s.f[1] ? 0 : 0xffffffff;
3494 d.d[2]=d.f[2] < s.f[2] ? 0 : 0xffffffff;
3495 d.d[3]=d.f[3] < s.f[3] ? 0 : 0xffffffff;
3498 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
3499 d.d[1]=d.f[1] <= s.f[1] ? 0 : 0xffffffff;
3500 d.d[2]=d.f[2] <= s.f[2] ? 0 : 0xffffffff;
3501 d.d[3]=d.f[3] <= s.f[3] ? 0 : 0xffffffff;
3504 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
3505 d.d[1]=sse_issingleordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
3506 d.d[2]=sse_issingleordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
3507 d.d[3]=sse_issingleordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
3512 INLINE void sse_predicate_compare_single_scalar(UINT8 imm8, XMM_REG d, XMM_REG s)
3517 s.d[0]=s.f[0] == s.f[0] ? 0xffffffff : 0;
3520 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
3523 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
3526 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
3529 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
3532 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
3535 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
3538 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
3543 static void SSEOP(cmpps_r128_rm128_i8)(i386_state *cpustate) // Opcode 0f c2
3545 UINT8 modrm = FETCH(cpustate);
3546 if( modrm >= 0xc0 ) {
3548 UINT8 imm8 = FETCH(cpustate);
3550 d=(modrm >> 3) & 0x7;
3551 sse_predicate_compare_single(imm8, XMM(d), XMM(s));
3555 UINT32 ea = GetEA(cpustate, modrm, 0);
3556 UINT8 imm8 = FETCH(cpustate);
3557 READXMM(cpustate, ea, s);
3558 d=(modrm >> 3) & 0x7;
3559 sse_predicate_compare_single(imm8, XMM(d), s);
3561 CYCLES(cpustate,1); // TODO: correct cycle count
3564 static void SSEOP(cmpss_r128_r128m32_i8)(i386_state *cpustate) // Opcode f3 0f c2
3566 UINT8 modrm = FETCH(cpustate);
3567 if( modrm >= 0xc0 ) {
3569 UINT8 imm8 = FETCH(cpustate);
3571 d=(modrm >> 3) & 0x7;
3572 sse_predicate_compare_single_scalar(imm8, XMM(d), XMM(s));
3576 UINT32 ea = GetEA(cpustate, modrm, 0);
3577 UINT8 imm8 = FETCH(cpustate);
3578 s.d[0]=READ32(cpustate, ea);
3579 d=(modrm >> 3) & 0x7;
3580 sse_predicate_compare_single_scalar(imm8, XMM(d), s);
3582 CYCLES(cpustate,1); // TODO: correct cycle count
3585 static void SSEOP(pinsrw_r64_r16m16_i8)(i386_state *cpustate) // Opcode 0f c4
3587 MMXPROLOG(cpustate);
3588 UINT8 modrm = FETCH(cpustate);
3589 if( modrm >= 0xc0 ) {
3590 UINT8 imm8 = FETCH(cpustate);
3591 UINT16 v = LOAD_RM16(modrm);
3592 if (cpustate->xmm_operand_size)
3593 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
3595 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
3597 UINT32 ea = GetEA(cpustate, modrm, 0);
3598 UINT8 imm8 = FETCH(cpustate);
3599 UINT16 v = READ16(cpustate, ea);
3600 if (cpustate->xmm_operand_size)
3601 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
3603 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
3605 CYCLES(cpustate,1); // TODO: correct cycle count
3608 static void SSEOP(pinsrw_r64_r32m16_i8)(i386_state *cpustate) // Opcode 0f c4
3610 MMXPROLOG(cpustate);
3611 UINT8 modrm = FETCH(cpustate);
3612 if( modrm >= 0xc0 ) {
3613 UINT8 imm8 = FETCH(cpustate);
3614 UINT16 v = (UINT16)LOAD_RM32(modrm);
3615 if (cpustate->xmm_operand_size)
3616 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
3618 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
3620 UINT32 ea = GetEA(cpustate, modrm, 0);
3621 UINT8 imm8 = FETCH(cpustate);
3622 UINT16 v = READ16(cpustate, ea);
3623 if (cpustate->xmm_operand_size)
3624 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
3626 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
3628 CYCLES(cpustate,1); // TODO: correct cycle count
3631 static void SSEOP(pextrw_r16_r64_i8)(i386_state *cpustate) // Opcode 0f c5
3633 //MMXPROLOG(cpustate);
3634 UINT8 modrm = FETCH(cpustate);
3635 if( modrm >= 0xc0 ) {
3636 UINT8 imm8 = FETCH(cpustate);
3637 if (cpustate->xmm_operand_size)
3638 STORE_REG16(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
3640 STORE_REG16(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
3642 //UINT8 imm8 = FETCH(cpustate);
3643 report_invalid_modrm(cpustate, "pextrw_r16_r64_i8", modrm);
3645 CYCLES(cpustate,1); // TODO: correct cycle count
3648 static void SSEOP(pextrw_r32_r64_i8)(i386_state *cpustate) // Opcode 0f c5
3650 //MMXPROLOG(cpustate);
3651 UINT8 modrm = FETCH(cpustate);
3652 if( modrm >= 0xc0 ) {
3653 UINT8 imm8 = FETCH(cpustate);
3654 if (cpustate->xmm_operand_size)
3655 STORE_REG32(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
3657 STORE_REG32(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
3659 //UINT8 imm8 = FETCH(cpustate);
3660 report_invalid_modrm(cpustate, "pextrw_r32_r64_i8", modrm);
3662 CYCLES(cpustate,1); // TODO: correct cycle count
3665 static void SSEOP(pminub_r64_rm64)(i386_state *cpustate) // Opcode 0f da
3668 MMXPROLOG(cpustate);
3669 UINT8 modrm = FETCH(cpustate);
3670 if( modrm >= 0xc0 ) {
3672 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
3675 UINT32 ea = GetEA(cpustate, modrm, 0);
3676 READMMX(cpustate, ea, s);
3678 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
3680 CYCLES(cpustate,1); // TODO: correct cycle count
3683 static void SSEOP(pmaxub_r64_rm64)(i386_state *cpustate) // Opcode 0f de
3686 MMXPROLOG(cpustate);
3687 UINT8 modrm = FETCH(cpustate);
3688 if( modrm >= 0xc0 ) {
3690 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
3693 UINT32 ea = GetEA(cpustate, modrm, 0);
3694 READMMX(cpustate, ea, s);
3696 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
3698 CYCLES(cpustate,1); // TODO: correct cycle count
3701 static void SSEOP(pavgb_r64_rm64)(i386_state *cpustate) // Opcode 0f e0
3704 MMXPROLOG(cpustate);
3705 UINT8 modrm = FETCH(cpustate);
3706 if( modrm >= 0xc0 ) {
3708 MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)MMX(modrm & 0x7).b[n] + 1) >> 1;
3711 UINT32 ea = GetEA(cpustate, modrm, 0);
3712 READMMX(cpustate, ea, s);
3714 MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)s.b[n] + 1) >> 1;
3716 CYCLES(cpustate,1); // TODO: correct cycle count
3719 static void SSEOP(pavgw_r64_rm64)(i386_state *cpustate) // Opcode 0f e3
3722 MMXPROLOG(cpustate);
3723 UINT8 modrm = FETCH(cpustate);
3724 if( modrm >= 0xc0 ) {
3726 MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)MMX(modrm & 0x7).w[n] + 1) >> 1;
3729 UINT32 ea = GetEA(cpustate, modrm, 0);
3730 READMMX(cpustate, ea, s);
3732 MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)s.w[n] + 1) >> 1;
3734 CYCLES(cpustate,1); // TODO: correct cycle count
3737 static void SSEOP(pmulhuw_r64_rm64)(i386_state *cpustate) // Opcode 0f e4
3739 MMXPROLOG(cpustate);
3740 UINT8 modrm = FETCH(cpustate);
3741 if( modrm >= 0xc0 ) {
3742 MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)MMX(modrm & 7).w[0]) >> 16;
3743 MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)MMX(modrm & 7).w[1]) >> 16;
3744 MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)MMX(modrm & 7).w[2]) >> 16;
3745 MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)MMX(modrm & 7).w[3]) >> 16;
3748 UINT32 ea = GetEA(cpustate, modrm, 0);
3749 READMMX(cpustate, ea, s);
3750 MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)s.w[0]) >> 16;
3751 MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)s.w[1]) >> 16;
3752 MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)s.w[2]) >> 16;
3753 MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)s.w[3]) >> 16;
3755 CYCLES(cpustate,1); // TODO: correct cycle count
3758 static void SSEOP(pminsw_r64_rm64)(i386_state *cpustate) // Opcode 0f ea
3761 MMXPROLOG(cpustate);
3762 UINT8 modrm = FETCH(cpustate);
3763 if( modrm >= 0xc0 ) {
3765 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
3768 UINT32 ea = GetEA(cpustate, modrm, 0);
3769 READMMX(cpustate, ea, s);
3771 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
3773 CYCLES(cpustate,1); // TODO: correct cycle count
3776 static void SSEOP(pmaxsw_r64_rm64)(i386_state *cpustate) // Opcode 0f ee
3779 MMXPROLOG(cpustate);
3780 UINT8 modrm = FETCH(cpustate);
3781 if( modrm >= 0xc0 ) {
3783 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
3786 UINT32 ea = GetEA(cpustate, modrm, 0);
3787 READMMX(cpustate, ea, s);
3789 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
3791 CYCLES(cpustate,1); // TODO: correct cycle count
3794 static void SSEOP(pmuludq_r64_rm64)(i386_state *cpustate) // Opcode 0f f4
3796 MMXPROLOG(cpustate);
3797 UINT8 modrm = FETCH(cpustate);
3798 if( modrm >= 0xc0 ) {
3799 MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)MMX(modrm & 0x7).d[0];
3802 UINT32 ea = GetEA(cpustate, modrm, 0);
3803 READMMX(cpustate, ea, s);
3804 MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)s.d[0];
3806 CYCLES(cpustate,1); // TODO: correct cycle count
3809 static void SSEOP(psadbw_r64_rm64)(i386_state *cpustate) // Opcode 0f f6
3813 MMXPROLOG(cpustate);
3814 UINT8 modrm = FETCH(cpustate);
3815 if( modrm >= 0xc0 ) {
3818 temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)MMX(modrm & 0x7).b[n]);
3819 MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff;
3822 UINT32 ea = GetEA(cpustate, modrm, 0);
3823 READMMX(cpustate, ea, s);
3826 temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]);
3827 MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff;
3829 CYCLES(cpustate,1); // TODO: correct cycle count
3832 static void SSEOP(psubq_r64_rm64)(i386_state *cpustate) // Opcode 0f fb
3834 MMXPROLOG(cpustate);
3835 UINT8 modrm = FETCH(cpustate);
3836 if( modrm >= 0xc0 ) {
3837 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - MMX(modrm & 7).q;
3840 UINT32 ea = GetEA(cpustate, modrm, 0);
3841 READMMX(cpustate, ea, s);
3842 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - s.q;
3844 CYCLES(cpustate,1); // TODO: correct cycle count
3847 static void SSEOP(pshufhw_r128_rm128_i8)(i386_state *cpustate) // Opcode f3 0f 70
3849 UINT8 modrm = FETCH(cpustate);
3850 if( modrm >= 0xc0 ) {
3853 UINT8 imm8 = FETCH(cpustate);
3855 d=(modrm >> 3) & 0x7;
3857 XMM(d).q[0]=XMM(s).q[0];
3858 XMM(d).w[4]=t.w[imm8 & 3];
3859 XMM(d).w[5]=t.w[(imm8 >> 2) & 3];
3860 XMM(d).w[6]=t.w[(imm8 >> 4) & 3];
3861 XMM(d).w[7]=t.w[(imm8 >> 6) & 3];
3864 int d=(modrm >> 3) & 0x7;
3865 UINT32 ea = GetEA(cpustate, modrm, 0);
3866 UINT8 imm8 = FETCH(cpustate);
3867 READXMM(cpustate, ea, s);
3869 XMM(d).w[4]=s.w[4 + (imm8 & 3)];
3870 XMM(d).w[5]=s.w[4 + ((imm8 >> 2) & 3)];
3871 XMM(d).w[6]=s.w[4 + ((imm8 >> 4) & 3)];
3872 XMM(d).w[7]=s.w[4 + ((imm8 >> 6) & 3)];
3874 CYCLES(cpustate,1); // TODO: correct cycle count