1 // license:BSD-3-Clause
2 // copyright-holders:Ville Linde, Barry Rodewald, Carl, Phil Bennett
3 // Pentium+ specific opcodes
4 #include "./i386_opdef.h"
6 #define FAULT(fault,error) {cpustate->ext = 1; i386_trap_with_error(fault,0,0,error); return;}
7 #define FAULT_EXP(fault,error) {cpustate->ext = 1; i386_trap_with_error(fault,0,trap_level+1,error); return;}
9 extern flag float32_is_nan( float32 a ); // since its not defined in softfloat.h
10 extern flag float64_is_nan( float64 a ); // since its not defined in softfloat.h
12 void I386_OPS_BASE::PENTIUMOP(rdmsr)() // Opcode 0x0f 32
17 data = MSR_READ(REG32(ECX),&valid_msr);
18 REG32(EDX) = data >> 32;
19 REG32(EAX) = data & 0xffffffff;
21 if(cpustate->CPL != 0 || valid_msr == 0) // if current privilege level isn't 0 or the register isn't recognized ...
22 FAULT(FAULT_GP,0) // ... throw a general exception fault
27 void I386_OPS_BASE::PENTIUMOP(wrmsr)() // Opcode 0x0f 30
32 data = (UINT64)REG32(EAX);
33 data |= (UINT64)(REG32(EDX)) << 32;
35 MSR_WRITE(REG32(ECX),data,&valid_msr);
37 if(cpustate->CPL != 0 || valid_msr == 0) // if current privilege level isn't 0 or the register isn't recognized
38 FAULT(FAULT_GP,0) // ... throw a general exception fault
40 CYCLES(1); // TODO: correct cycle count (~30-45)
43 void I386_OPS_BASE::PENTIUMOP(rdtsc)() // Opcode 0x0f 31
45 UINT64 ts = cpustate->tsc + (cpustate->base_cycles - cpustate->cycles);
46 REG32(EAX) = (UINT32)(ts);
47 REG32(EDX) = (UINT32)(ts >> 32);
52 void I386_OPS_BASE::PENTIUMOP(ud2)() // Opcode 0x0f 0b
57 void I386_OPS_BASE::PENTIUMOP(rsm)()
59 UINT32 smram_state = cpustate->smbase + 0xfe00;
62 logerror("i386: Invalid RSM outside SMM at %08X\n", cpustate->pc - 1);
67 // load state, no sanity checks anywhere
68 cpustate->smbase = READ32(smram_state+SMRAM_SMBASE);
69 cpustate->cr[4] = READ32(smram_state+SMRAM_IP5_CR4);
70 cpustate->sreg[ES].limit = READ32(smram_state+SMRAM_IP5_ESLIM);
71 cpustate->sreg[ES].base = READ32(smram_state+SMRAM_IP5_ESBASE);
72 cpustate->sreg[ES].flags = READ32(smram_state+SMRAM_IP5_ESACC);
73 cpustate->sreg[CS].limit = READ32(smram_state+SMRAM_IP5_CSLIM);
74 cpustate->sreg[CS].base = READ32(smram_state+SMRAM_IP5_CSBASE);
75 cpustate->sreg[CS].flags = READ32(smram_state+SMRAM_IP5_CSACC);
76 cpustate->sreg[SS].limit = READ32(smram_state+SMRAM_IP5_SSLIM);
77 cpustate->sreg[SS].base = READ32(smram_state+SMRAM_IP5_SSBASE);
78 cpustate->sreg[SS].flags = READ32(smram_state+SMRAM_IP5_SSACC);
79 cpustate->sreg[DS].limit = READ32(smram_state+SMRAM_IP5_DSLIM);
80 cpustate->sreg[DS].base = READ32(smram_state+SMRAM_IP5_DSBASE);
81 cpustate->sreg[DS].flags = READ32(smram_state+SMRAM_IP5_DSACC);
82 cpustate->sreg[FS].limit = READ32(smram_state+SMRAM_IP5_FSLIM);
83 cpustate->sreg[FS].base = READ32(smram_state+SMRAM_IP5_FSBASE);
84 cpustate->sreg[FS].flags = READ32(smram_state+SMRAM_IP5_FSACC);
85 cpustate->sreg[GS].limit = READ32(smram_state+SMRAM_IP5_GSLIM);
86 cpustate->sreg[GS].base = READ32(smram_state+SMRAM_IP5_GSBASE);
87 cpustate->sreg[GS].flags = READ32(smram_state+SMRAM_IP5_GSACC);
88 cpustate->ldtr.flags = READ32(smram_state+SMRAM_IP5_LDTACC);
89 cpustate->ldtr.limit = READ32(smram_state+SMRAM_IP5_LDTLIM);
90 cpustate->ldtr.base = READ32(smram_state+SMRAM_IP5_LDTBASE);
91 cpustate->gdtr.limit = READ32(smram_state+SMRAM_IP5_GDTLIM);
92 cpustate->gdtr.base = READ32(smram_state+SMRAM_IP5_GDTBASE);
93 cpustate->idtr.limit = READ32(smram_state+SMRAM_IP5_IDTLIM);
94 cpustate->idtr.base = READ32(smram_state+SMRAM_IP5_IDTBASE);
95 cpustate->task.limit = READ32(smram_state+SMRAM_IP5_TRLIM);
96 cpustate->task.base = READ32(smram_state+SMRAM_IP5_TRBASE);
97 cpustate->task.flags = READ32(smram_state+SMRAM_IP5_TRACC);
99 cpustate->sreg[ES].selector = READ32(smram_state+SMRAM_ES);
100 cpustate->sreg[CS].selector = READ32(smram_state+SMRAM_CS);
101 cpustate->sreg[SS].selector = READ32(smram_state+SMRAM_SS);
102 cpustate->sreg[DS].selector = READ32(smram_state+SMRAM_DS);
103 cpustate->sreg[FS].selector = READ32(smram_state+SMRAM_FS);
104 cpustate->sreg[GS].selector = READ32(smram_state+SMRAM_GS);
105 cpustate->ldtr.segment = READ32(smram_state+SMRAM_LDTR);
106 cpustate->task.segment = READ32(smram_state+SMRAM_TR);
108 cpustate->dr[7] = READ32(smram_state+SMRAM_DR7);
109 cpustate->dr[6] = READ32(smram_state+SMRAM_DR6);
110 REG32(EAX) = READ32(smram_state+SMRAM_EAX);
111 REG32(ECX) = READ32(smram_state+SMRAM_ECX);
112 REG32(EDX) = READ32(smram_state+SMRAM_EDX);
113 REG32(EBX) = READ32(smram_state+SMRAM_EBX);
114 REG32(ESP) = READ32(smram_state+SMRAM_ESP);
115 REG32(EBP) = READ32(smram_state+SMRAM_EBP);
116 REG32(ESI) = READ32(smram_state+SMRAM_ESI);
117 REG32(EDI) = READ32(smram_state+SMRAM_EDI);
118 cpustate->eip = READ32(smram_state+SMRAM_EIP);
119 cpustate->eflags = READ32(smram_state+SMRAM_EAX);
120 cpustate->cr[3] = READ32(smram_state+SMRAM_CR3);
121 cpustate->cr[0] = READ32(smram_state+SMRAM_CR0);
123 cpustate->CPL = (cpustate->sreg[SS].flags >> 13) & 3; // cpl == dpl of ss
125 for(int i = 0; i < GS; i++)
127 if(PROTECTED_MODE && !V8086_MODE)
129 cpustate->sreg[i].valid = cpustate->sreg[i].selector ? true : false;
130 cpustate->sreg[i].d = (cpustate->sreg[i].flags & 0x4000) ? 1 : 0;
133 cpustate->sreg[i].valid = true;
136 // if(!cpustate->smiact.isnull())
137 // cpustate->smiact(false);
138 cpustate->smm = false;
140 CHANGE_PC(cpustate->eip);
141 cpustate->nmi_masked = false;
142 if(cpustate->smi_latched)
147 if(cpustate->nmi_latched)
149 cpustate->nmi_latched = false;
154 void I386_OPS_BASE::PENTIUMOP(prefetch_m8)() // Opcode 0x0f 18
156 UINT8 modrm = FETCH();
157 UINT32 ea = GetEA(modrm,0);
158 CYCLES(1+(ea & 1)); // TODO: correct cycle count
161 void I386_OPS_BASE::PENTIUMOP(cmovo_r16_rm16)() // Opcode 0x0f 40
164 UINT8 modrm = FETCH();
168 if (cpustate->OF == 1)
170 src = LOAD_RM16(modrm);
171 STORE_REG16(modrm, src);
173 CYCLES(1); // TODO: correct cycle count
177 UINT32 ea = GetEA(modrm,0);
178 if (cpustate->OF == 1)
181 STORE_REG16(modrm, src);
183 CYCLES(1); // TODO: correct cycle count
187 void I386_OPS_BASE::PENTIUMOP(cmovo_r32_rm32)() // Opcode 0x0f 40
190 UINT8 modrm = FETCH();
194 if (cpustate->OF == 1)
196 src = LOAD_RM32(modrm);
197 STORE_REG32(modrm, src);
199 CYCLES(1); // TODO: correct cycle count
203 UINT32 ea = GetEA(modrm,0);
204 if (cpustate->OF == 1)
207 STORE_REG32(modrm, src);
209 CYCLES(1); // TODO: correct cycle count
213 void I386_OPS_BASE::PENTIUMOP(cmovno_r16_rm16)() // Opcode 0x0f 41
216 UINT8 modrm = FETCH();
220 if (cpustate->OF == 0)
222 src = LOAD_RM16(modrm);
223 STORE_REG16(modrm, src);
225 CYCLES(1); // TODO: correct cycle count
229 UINT32 ea = GetEA(modrm,0);
230 if (cpustate->OF == 0)
233 STORE_REG16(modrm, src);
235 CYCLES(1); // TODO: correct cycle count
239 void I386_OPS_BASE::PENTIUMOP(cmovno_r32_rm32)() // Opcode 0x0f 41
242 UINT8 modrm = FETCH();
246 if (cpustate->OF == 0)
248 src = LOAD_RM32(modrm);
249 STORE_REG32(modrm, src);
251 CYCLES(1); // TODO: correct cycle count
255 UINT32 ea = GetEA(modrm,0);
256 if (cpustate->OF == 0)
259 STORE_REG32(modrm, src);
261 CYCLES(1); // TODO: correct cycle count
265 void I386_OPS_BASE::PENTIUMOP(cmovb_r16_rm16)() // Opcode 0x0f 42
268 UINT8 modrm = FETCH();
272 if (cpustate->CF == 1)
274 src = LOAD_RM16(modrm);
275 STORE_REG16(modrm, src);
277 CYCLES(1); // TODO: correct cycle count
281 UINT32 ea = GetEA(modrm,0);
282 if (cpustate->CF == 1)
285 STORE_REG16(modrm, src);
287 CYCLES(1); // TODO: correct cycle count
291 void I386_OPS_BASE::PENTIUMOP(cmovb_r32_rm32)() // Opcode 0x0f 42
294 UINT8 modrm = FETCH();
298 if (cpustate->CF == 1)
300 src = LOAD_RM32(modrm);
301 STORE_REG32(modrm, src);
303 CYCLES(1); // TODO: correct cycle count
307 UINT32 ea = GetEA(modrm,0);
308 if (cpustate->CF == 1)
311 STORE_REG32(modrm, src);
313 CYCLES(1); // TODO: correct cycle count
317 void I386_OPS_BASE::PENTIUMOP(cmovae_r16_rm16)() // Opcode 0x0f 43
320 UINT8 modrm = FETCH();
324 if (cpustate->CF == 0)
326 src = LOAD_RM16(modrm);
327 STORE_REG16(modrm, src);
329 CYCLES(1); // TODO: correct cycle count
333 UINT32 ea = GetEA(modrm,0);
334 if (cpustate->CF == 0)
337 STORE_REG16(modrm, src);
339 CYCLES(1); // TODO: correct cycle count
343 void I386_OPS_BASE::PENTIUMOP(cmovae_r32_rm32)() // Opcode 0x0f 43
346 UINT8 modrm = FETCH();
350 if (cpustate->CF == 0)
352 src = LOAD_RM32(modrm);
353 STORE_REG32(modrm, src);
355 CYCLES(1); // TODO: correct cycle count
359 UINT32 ea = GetEA(modrm,0);
360 if (cpustate->CF == 0)
363 STORE_REG32(modrm, src);
365 CYCLES(1); // TODO: correct cycle count
369 void I386_OPS_BASE::PENTIUMOP(cmove_r16_rm16)() // Opcode 0x0f 44
372 UINT8 modrm = FETCH();
376 if (cpustate->ZF == 1)
378 src = LOAD_RM16(modrm);
379 STORE_REG16(modrm, src);
381 CYCLES(1); // TODO: correct cycle count
385 UINT32 ea = GetEA(modrm,0);
386 if (cpustate->ZF == 1)
389 STORE_REG16(modrm, src);
391 CYCLES(1); // TODO: correct cycle count
395 void I386_OPS_BASE::PENTIUMOP(cmove_r32_rm32)() // Opcode 0x0f 44
398 UINT8 modrm = FETCH();
402 if (cpustate->ZF == 1)
404 src = LOAD_RM32(modrm);
405 STORE_REG32(modrm, src);
407 CYCLES(1); // TODO: correct cycle count
411 UINT32 ea = GetEA(modrm,0);
412 if (cpustate->ZF == 1)
415 STORE_REG32(modrm, src);
417 CYCLES(1); // TODO: correct cycle count
421 void I386_OPS_BASE::PENTIUMOP(cmovne_r16_rm16)() // Opcode 0x0f 45
424 UINT8 modrm = FETCH();
428 if (cpustate->ZF == 0)
430 src = LOAD_RM16(modrm);
431 STORE_REG16(modrm, src);
433 CYCLES(1); // TODO: correct cycle count
437 UINT32 ea = GetEA(modrm,0);
438 if (cpustate->ZF == 0)
441 STORE_REG16(modrm, src);
443 CYCLES(1); // TODO: correct cycle count
447 void I386_OPS_BASE::PENTIUMOP(cmovne_r32_rm32)() // Opcode 0x0f 45
450 UINT8 modrm = FETCH();
454 if (cpustate->ZF == 0)
456 src = LOAD_RM32(modrm);
457 STORE_REG32(modrm, src);
459 CYCLES(1); // TODO: correct cycle count
463 UINT32 ea = GetEA(modrm,0);
464 if (cpustate->ZF == 0)
467 STORE_REG32(modrm, src);
469 CYCLES(1); // TODO: correct cycle count
473 void I386_OPS_BASE::PENTIUMOP(cmovbe_r16_rm16)() // Opcode 0x0f 46
476 UINT8 modrm = FETCH();
480 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
482 src = LOAD_RM16(modrm);
483 STORE_REG16(modrm, src);
485 CYCLES(1); // TODO: correct cycle count
489 UINT32 ea = GetEA(modrm,0);
490 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
493 STORE_REG16(modrm, src);
495 CYCLES(1); // TODO: correct cycle count
499 void I386_OPS_BASE::PENTIUMOP(cmovbe_r32_rm32)() // Opcode 0x0f 46
502 UINT8 modrm = FETCH();
506 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
508 src = LOAD_RM32(modrm);
509 STORE_REG32(modrm, src);
511 CYCLES(1); // TODO: correct cycle count
515 UINT32 ea = GetEA(modrm,0);
516 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
519 STORE_REG32(modrm, src);
521 CYCLES(1); // TODO: correct cycle count
525 void I386_OPS_BASE::PENTIUMOP(cmova_r16_rm16)() // Opcode 0x0f 47
528 UINT8 modrm = FETCH();
532 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
534 src = LOAD_RM16(modrm);
535 STORE_REG16(modrm, src);
537 CYCLES(1); // TODO: correct cycle count
541 UINT32 ea = GetEA(modrm,0);
542 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
545 STORE_REG16(modrm, src);
547 CYCLES(1); // TODO: correct cycle count
551 void I386_OPS_BASE::PENTIUMOP(cmova_r32_rm32)() // Opcode 0x0f 47
554 UINT8 modrm = FETCH();
558 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
560 src = LOAD_RM32(modrm);
561 STORE_REG32(modrm, src);
563 CYCLES(1); // TODO: correct cycle count
567 UINT32 ea = GetEA(modrm,0);
568 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
571 STORE_REG32(modrm, src);
573 CYCLES(1); // TODO: correct cycle count
577 void I386_OPS_BASE::PENTIUMOP(cmovs_r16_rm16)() // Opcode 0x0f 48
580 UINT8 modrm = FETCH();
584 if (cpustate->SF == 1)
586 src = LOAD_RM16(modrm);
587 STORE_REG16(modrm, src);
589 CYCLES(1); // TODO: correct cycle count
593 UINT32 ea = GetEA(modrm,0);
594 if (cpustate->SF == 1)
597 STORE_REG16(modrm, src);
599 CYCLES(1); // TODO: correct cycle count
603 void I386_OPS_BASE::PENTIUMOP(cmovs_r32_rm32)() // Opcode 0x0f 48
606 UINT8 modrm = FETCH();
610 if (cpustate->SF == 1)
612 src = LOAD_RM32(modrm);
613 STORE_REG32(modrm, src);
615 CYCLES(1); // TODO: correct cycle count
619 UINT32 ea = GetEA(modrm,0);
620 if (cpustate->SF == 1)
623 STORE_REG32(modrm, src);
625 CYCLES(1); // TODO: correct cycle count
629 void I386_OPS_BASE::PENTIUMOP(cmovns_r16_rm16)() // Opcode 0x0f 49
632 UINT8 modrm = FETCH();
636 if (cpustate->SF == 0)
638 src = LOAD_RM16(modrm);
639 STORE_REG16(modrm, src);
641 CYCLES(1); // TODO: correct cycle count
645 UINT32 ea = GetEA(modrm,0);
646 if (cpustate->SF == 0)
649 STORE_REG16(modrm, src);
651 CYCLES(1); // TODO: correct cycle count
655 void I386_OPS_BASE::PENTIUMOP(cmovns_r32_rm32)() // Opcode 0x0f 49
658 UINT8 modrm = FETCH();
662 if (cpustate->SF == 0)
664 src = LOAD_RM32(modrm);
665 STORE_REG32(modrm, src);
667 CYCLES(1); // TODO: correct cycle count
671 UINT32 ea = GetEA(modrm,0);
672 if (cpustate->SF == 0)
675 STORE_REG32(modrm, src);
677 CYCLES(1); // TODO: correct cycle count
681 void I386_OPS_BASE::PENTIUMOP(cmovp_r16_rm16)() // Opcode 0x0f 4a
684 UINT8 modrm = FETCH();
688 if (cpustate->PF == 1)
690 src = LOAD_RM16(modrm);
691 STORE_REG16(modrm, src);
693 CYCLES(1); // TODO: correct cycle count
697 UINT32 ea = GetEA(modrm,0);
698 if (cpustate->PF == 1)
701 STORE_REG16(modrm, src);
703 CYCLES(1); // TODO: correct cycle count
707 void I386_OPS_BASE::PENTIUMOP(cmovp_r32_rm32)() // Opcode 0x0f 4a
710 UINT8 modrm = FETCH();
714 if (cpustate->PF == 1)
716 src = LOAD_RM32(modrm);
717 STORE_REG32(modrm, src);
719 CYCLES(1); // TODO: correct cycle count
723 UINT32 ea = GetEA(modrm,0);
724 if (cpustate->PF == 1)
727 STORE_REG32(modrm, src);
729 CYCLES(1); // TODO: correct cycle count
733 void I386_OPS_BASE::PENTIUMOP(cmovnp_r16_rm16)() // Opcode 0x0f 4b
736 UINT8 modrm = FETCH();
740 if (cpustate->PF == 0)
742 src = LOAD_RM16(modrm);
743 STORE_REG16(modrm, src);
745 CYCLES(1); // TODO: correct cycle count
749 UINT32 ea = GetEA(modrm,0);
750 if (cpustate->PF == 0)
753 STORE_REG16(modrm, src);
755 CYCLES(1); // TODO: correct cycle count
759 void I386_OPS_BASE::PENTIUMOP(cmovnp_r32_rm32)() // Opcode 0x0f 4b
762 UINT8 modrm = FETCH();
766 if (cpustate->PF == 0)
768 src = LOAD_RM32(modrm);
769 STORE_REG32(modrm, src);
771 CYCLES(1); // TODO: correct cycle count
775 UINT32 ea = GetEA(modrm,0);
776 if (cpustate->PF == 0)
779 STORE_REG32(modrm, src);
781 CYCLES(1); // TODO: correct cycle count
785 void I386_OPS_BASE::PENTIUMOP(cmovl_r16_rm16)() // Opcode 0x0f 4c
788 UINT8 modrm = FETCH();
792 if (cpustate->SF != cpustate->OF)
794 src = LOAD_RM16(modrm);
795 STORE_REG16(modrm, src);
797 CYCLES(1); // TODO: correct cycle count
801 UINT32 ea = GetEA(modrm,0);
802 if (cpustate->SF != cpustate->OF)
805 STORE_REG16(modrm, src);
807 CYCLES(1); // TODO: correct cycle count
811 void I386_OPS_BASE::PENTIUMOP(cmovl_r32_rm32)() // Opcode 0x0f 4c
814 UINT8 modrm = FETCH();
818 if (cpustate->SF != cpustate->OF)
820 src = LOAD_RM32(modrm);
821 STORE_REG32(modrm, src);
823 CYCLES(1); // TODO: correct cycle count
827 UINT32 ea = GetEA(modrm,0);
828 if (cpustate->SF != cpustate->OF)
831 STORE_REG32(modrm, src);
833 CYCLES(1); // TODO: correct cycle count
837 void I386_OPS_BASE::PENTIUMOP(cmovge_r16_rm16)() // Opcode 0x0f 4d
840 UINT8 modrm = FETCH();
844 if (cpustate->SF == cpustate->OF)
846 src = LOAD_RM16(modrm);
847 STORE_REG16(modrm, src);
849 CYCLES(1); // TODO: correct cycle count
853 UINT32 ea = GetEA(modrm,0);
854 if (cpustate->SF == cpustate->OF)
857 STORE_REG16(modrm, src);
859 CYCLES(1); // TODO: correct cycle count
863 void I386_OPS_BASE::PENTIUMOP(cmovge_r32_rm32)() // Opcode 0x0f 4d
866 UINT8 modrm = FETCH();
870 if (cpustate->SF == cpustate->OF)
872 src = LOAD_RM32(modrm);
873 STORE_REG32(modrm, src);
875 CYCLES(1); // TODO: correct cycle count
879 UINT32 ea = GetEA(modrm,0);
880 if (cpustate->SF == cpustate->OF)
883 STORE_REG32(modrm, src);
885 CYCLES(1); // TODO: correct cycle count
889 void I386_OPS_BASE::PENTIUMOP(cmovle_r16_rm16)() // Opcode 0x0f 4e
892 UINT8 modrm = FETCH();
896 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
898 src = LOAD_RM16(modrm);
899 STORE_REG16(modrm, src);
901 CYCLES(1); // TODO: correct cycle count
905 UINT32 ea = GetEA(modrm,0);
906 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
909 STORE_REG16(modrm, src);
911 CYCLES(1); // TODO: correct cycle count
915 void I386_OPS_BASE::PENTIUMOP(cmovle_r32_rm32)() // Opcode 0x0f 4e
918 UINT8 modrm = FETCH();
922 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
924 src = LOAD_RM32(modrm);
925 STORE_REG32(modrm, src);
927 CYCLES(1); // TODO: correct cycle count
931 UINT32 ea = GetEA(modrm,0);
932 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
935 STORE_REG32(modrm, src);
937 CYCLES(1); // TODO: correct cycle count
941 void I386_OPS_BASE::PENTIUMOP(cmovg_r16_rm16)() // Opcode 0x0f 4f
944 UINT8 modrm = FETCH();
948 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
950 src = LOAD_RM16(modrm);
951 STORE_REG16(modrm, src);
953 CYCLES(1); // TODO: correct cycle count
957 UINT32 ea = GetEA(modrm,0);
958 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
961 STORE_REG16(modrm, src);
963 CYCLES(1); // TODO: correct cycle count
967 void I386_OPS_BASE::PENTIUMOP(cmovg_r32_rm32)() // Opcode 0x0f 4f
970 UINT8 modrm = FETCH();
974 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
976 src = LOAD_RM32(modrm);
977 STORE_REG32(modrm, src);
979 CYCLES(1); // TODO: correct cycle count
983 UINT32 ea = GetEA(modrm,0);
984 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
987 STORE_REG32(modrm, src);
989 CYCLES(1); // TODO: correct cycle count
993 void I386_OPS_BASE::PENTIUMOP(movnti_m16_r16)() // Opcode 0f c3
995 UINT8 modrm = FETCH();
996 if( modrm >= 0xc0 ) {
997 // unsupported by cpu
998 CYCLES(1); // TODO: correct cycle count
1000 // since cache is not implemented
1001 UINT32 ea = GetEA(modrm, 0);
1002 WRITE16(ea,LOAD_RM16(modrm));
1003 CYCLES(1); // TODO: correct cycle count
1007 void I386_OPS_BASE::PENTIUMOP(movnti_m32_r32)() // Opcode 0f c3
1009 UINT8 modrm = FETCH();
1010 if( modrm >= 0xc0 ) {
1011 // unsupported by cpu
1012 CYCLES(1); // TODO: correct cycle count
1014 // since cache is not implemented
1015 UINT32 ea = GetEA(modrm, 0);
1016 WRITE32(ea,LOAD_RM32(modrm));
1017 CYCLES(1); // TODO: correct cycle count
1021 void I386_OPS_BASE::I386OP(cyrix_special)() // Opcode 0x0f 3a-3d
1024 0f 3a BB0_RESET (set BB0 pointer = base)
1025 0f 3b BB1_RESET (set BB1 pointer = base)
1026 0f 3c CPU_WRITE (write special CPU memory-mapped register, [ebx] = eax)
1027 0f 3d CPU_READ (read special CPU memory-mapped register, eax, = [ebx])
1033 void I386_OPS_BASE::I386OP(cyrix_unknown)() // Opcode 0x0f 74
1035 logerror("Unemulated 0x0f 0x74 opcode called\n");
1040 void I386_OPS_BASE::PENTIUMOP(cmpxchg8b_m64)() // Opcode 0x0f c7
1042 UINT8 modm = FETCH();
1043 if( modm >= 0xc0 ) {
1044 report_invalid_modrm( "cmpxchg8b_m64", modm);
1046 UINT32 ea = GetEA(modm, 0);
1047 UINT64 value = READ64( ea);
1048 UINT64 edx_eax = (((UINT64) REG32(EDX)) << 32) | REG32(EAX);
1049 UINT64 ecx_ebx = (((UINT64) REG32(ECX)) << 32) | REG32(EBX);
1051 if( value == edx_eax ) {
1052 WRITE64( ea, ecx_ebx);
1054 CYCLES(CYCLES_CMPXCHG_REG_MEM_T);
1056 REG32(EDX) = (UINT32) (value >> 32);
1057 REG32(EAX) = (UINT32) (value >> 0);
1059 CYCLES(CYCLES_CMPXCHG_REG_MEM_F);
1064 void I386_OPS_BASE::PENTIUMOP(movntq_m64_r64)() // Opcode 0f e7
1066 //MMXPROLOG(); // TODO: check if needed
1067 UINT8 modrm = FETCH();
1068 if( modrm >= 0xc0 ) {
1069 CYCLES(1); // unsupported
1071 // since cache is not implemented
1072 UINT32 ea = GetEA(modrm, 0);
1073 WRITEMMX( ea, MMX((modrm >> 3) & 0x7));
1074 CYCLES(1); // TODO: correct cycle count
1078 void I386_OPS_BASE::PENTIUMOP(maskmovq_r64_r64)() // Opcode 0f f7
1081 UINT8 modm = FETCH();
1082 UINT32 ea = GetEA(7, 0); // ds:di/edi/rdi register
1086 for (n=0;n <= 7;n++)
1087 if (MMX(m).b[n] & 127)
1088 WRITE8(ea+n, MMX(s).b[n]);
1091 void I386_OPS_BASE::SSEOP(maskmovdqu_r128_r128)() // Opcode 66 0f f7
1094 UINT8 modm = FETCH();
1095 UINT32 ea = GetEA(7, 0); // ds:di/edi/rdi register
1098 for (n=0;n < 16;n++)
1099 if (XMM(m).b[n] & 127)
1100 WRITE8(ea+n, XMM(s).b[n]);
1103 void I386_OPS_BASE::PENTIUMOP(popcnt_r16_rm16)() // Opcode f3 0f b8
1106 UINT8 modrm = FETCH();
1109 if( modrm >= 0xc0 ) {
1110 src = LOAD_RM16(modrm);
1112 UINT32 ea = GetEA(modrm,0);
1116 for (n=0;n < 16;n++) {
1117 count=count+(src & 1);
1120 STORE_REG16(modrm, count);
1121 CYCLES(1); // TODO: correct cycle count
1124 void I386_OPS_BASE::PENTIUMOP(popcnt_r32_rm32)() // Opcode f3 0f b8
1127 UINT8 modrm = FETCH();
1130 if( modrm >= 0xc0 ) {
1131 src = LOAD_RM32(modrm);
1133 UINT32 ea = GetEA(modrm,0);
1137 for (n=0;n < 32;n++) {
1138 count=count+(src & 1);
1141 STORE_REG32(modrm, count);
1142 CYCLES(1); // TODO: correct cycle count
1145 void I386_OPS_BASE::PENTIUMOP(tzcnt_r16_rm16)()
1147 // for CPUs that don't support TZCNT, fall back to BSF
1148 i386_bsf_r16_rm16();
1149 // TODO: actually implement TZCNT
1152 void I386_OPS_BASE::PENTIUMOP(tzcnt_r32_rm32)()
1154 // for CPUs that don't support TZCNT, fall back to BSF
1155 i386_bsf_r32_rm32();
1156 // TODO: actually implement TZCNT
1160 void I386_OPS_BASE::MMXOP(group_0f71)() // Opcode 0f 71
1162 UINT8 modm = FETCH();
1163 UINT8 imm8 = FETCH();
1165 if( modm >= 0xc0 ) {
1166 switch ( (modm & 0x38) >> 3 )
1169 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] >> imm8;
1170 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] >> imm8;
1171 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] >> imm8;
1172 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] >> imm8;
1175 MMX(modm & 7).s[0]=MMX(modm & 7).s[0] >> imm8;
1176 MMX(modm & 7).s[1]=MMX(modm & 7).s[1] >> imm8;
1177 MMX(modm & 7).s[2]=MMX(modm & 7).s[2] >> imm8;
1178 MMX(modm & 7).s[3]=MMX(modm & 7).s[3] >> imm8;
1181 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] << imm8;
1182 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] << imm8;
1183 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] << imm8;
1184 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] << imm8;
1187 report_invalid_modrm( "mmx_group0f71", modm);
1192 void I386_OPS_BASE::I386_OPS_BASE::SSEOP(group_660f71)() // Opcode 66 0f 71
1194 UINT8 modm = FETCH();
1195 UINT8 imm8 = FETCH();
1197 switch ((modm & 0x38) >> 3)
1200 for (int n = 0; n < 8;n++)
1201 XMM(modm & 7).w[n] = XMM(modm & 7).w[n] >> imm8;
1204 for (int n = 0; n < 8;n++)
1205 XMM(modm & 7).s[n] = XMM(modm & 7).s[n] >> imm8;
1208 for (int n = 0; n < 8;n++)
1209 XMM(modm & 7).w[n] = XMM(modm & 7).w[n] << imm8;
1212 report_invalid_modrm( "mmx_group660f71", modm);
1217 void I386_OPS_BASE::MMXOP(group_0f72)() // Opcode 0f 72
1219 UINT8 modm = FETCH();
1220 UINT8 imm8 = FETCH();
1222 if( modm >= 0xc0 ) {
1223 switch ( (modm & 0x38) >> 3 )
1226 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] >> imm8;
1227 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] >> imm8;
1230 MMX(modm & 7).i[0]=MMX(modm & 7).i[0] >> imm8;
1231 MMX(modm & 7).i[1]=MMX(modm & 7).i[1] >> imm8;
1234 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] << imm8;
1235 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] << imm8;
1238 report_invalid_modrm( "mmx_group0f72", modm);
1243 void I386_OPS_BASE::SSEOP(group_660f72)() // Opcode 66 0f 72
1245 UINT8 modm = FETCH();
1246 UINT8 imm8 = FETCH();
1248 switch ((modm & 0x38) >> 3)
1251 for (int n = 0; n < 4;n++)
1252 XMM(modm & 7).d[n] = XMM(modm & 7).d[n] >> imm8;
1255 for (int n = 0; n < 4;n++)
1256 XMM(modm & 7).i[n] = XMM(modm & 7).i[n] >> imm8;
1259 for (int n = 0; n < 4;n++)
1260 XMM(modm & 7).d[n] = XMM(modm & 7).d[n] << imm8;
1263 report_invalid_modrm( "mmx_group660f72", modm);
1268 void I386_OPS_BASE::MMXOP(group_0f73)() // Opcode 0f 73
1270 UINT8 modm = FETCH();
1271 UINT8 imm8 = FETCH();
1273 if( modm >= 0xc0 ) {
1274 switch ( (modm & 0x38) >> 3 )
1277 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q >> imm8;
1280 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q << imm8;
1283 report_invalid_modrm( "mmx_group0f73", modm);
1288 void I386_OPS_BASE::SSEOP(group_660f73)() // Opcode 66 0f 73
1291 UINT8 modm = FETCH();
1292 UINT8 imm8 = FETCH();
1294 switch ((modm & 0x38) >> 3)
1297 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] >> imm8;
1298 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] >> imm8;
1303 XMM(modm & 7).q[0] = 0;
1304 XMM(modm & 7).q[1] = 0;
1308 imm8 = (imm8 & 7) << 3;
1309 XMM(modm & 7).q[0] = XMM(modm & 7).q[1] >> imm8;
1310 XMM(modm & 7).q[1] = 0;
1314 t0 = XMM(modm & 7).q[0];
1316 XMM(modm & 7).q[0] = (XMM(modm & 7).q[1] << (64 - imm8)) | (t0 >> imm8);
1317 XMM(modm & 7).q[1] = t0 >> imm8;
1321 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] << imm8;
1322 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] << imm8;
1327 XMM(modm & 7).q[0] = 0;
1328 XMM(modm & 7).q[1] = 0;
1332 imm8 = (imm8 & 7) << 3;
1333 XMM(modm & 7).q[1] = XMM(modm & 7).q[0] << imm8;
1334 XMM(modm & 7).q[0] = 0;
1339 XMM(modm & 7).q[1] = (XMM(modm & 7).q[0] >> (64 - imm8)) | (XMM(modm & 7).q[1] << imm8);
1340 XMM(modm & 7).q[0] = XMM(modm & 7).q[0] << imm8;
1344 report_invalid_modrm( "sse_group660f73", modm);
1349 void I386_OPS_BASE::MMXOP(psrlw_r64_rm64)() // Opcode 0f d1
1352 UINT8 modrm = FETCH();
1353 if( modrm >= 0xc0 ) {
1354 int count=(int)MMX(modrm & 7).q;
1355 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1356 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1357 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1358 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1361 UINT32 ea = GetEA(modrm, 0);
1363 int count=(int)src.q;
1364 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1365 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1366 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1367 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1369 CYCLES(1); // TODO: correct cycle count
1372 void I386_OPS_BASE::MMXOP(psrld_r64_rm64)() // Opcode 0f d2
1375 UINT8 modrm = FETCH();
1376 if( modrm >= 0xc0 ) {
1377 int count=(int)MMX(modrm & 7).q;
1378 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1379 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1382 UINT32 ea = GetEA(modrm, 0);
1384 int count=(int)src.q;
1385 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1386 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1388 CYCLES(1); // TODO: correct cycle count
1391 void I386_OPS_BASE::MMXOP(psrlq_r64_rm64)() // Opcode 0f d3
1394 UINT8 modrm = FETCH();
1395 if( modrm >= 0xc0 ) {
1396 int count=(int)MMX(modrm & 7).q;
1397 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1400 UINT32 ea = GetEA(modrm, 0);
1402 int count=(int)src.q;
1403 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1405 CYCLES(1); // TODO: correct cycle count
1408 void I386_OPS_BASE::MMXOP(paddq_r64_rm64)() // Opcode 0f d4
1411 UINT8 modrm = FETCH();
1412 if( modrm >= 0xc0 ) {
1413 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+MMX(modrm & 7).q;
1416 UINT32 ea = GetEA(modrm, 0);
1418 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+src.q;
1420 CYCLES(1); // TODO: correct cycle count
1423 void I386_OPS_BASE::MMXOP(pmullw_r64_rm64)() // Opcode 0f d5
1426 UINT8 modrm = FETCH();
1427 if( modrm >= 0xc0 ) {
1428 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) & 0xffff;
1429 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) & 0xffff;
1430 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) & 0xffff;
1431 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) & 0xffff;
1434 UINT32 ea = GetEA(modrm, 0);
1436 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) & 0xffff;
1437 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) & 0xffff;
1438 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) & 0xffff;
1439 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) & 0xffff;
1441 CYCLES(1); // TODO: correct cycle count
1444 void I386_OPS_BASE::MMXOP(psubusb_r64_rm64)() // Opcode 0f d8
1448 UINT8 modrm = FETCH();
1449 if( modrm >= 0xc0 ) {
1451 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 7).b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-MMX(modrm & 7).b[n];
1454 UINT32 ea = GetEA(modrm, 0);
1457 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-src.b[n];
1459 CYCLES(1); // TODO: correct cycle count
1462 void I386_OPS_BASE::MMXOP(psubusw_r64_rm64)() // Opcode 0f d9
1466 UINT8 modrm = FETCH();
1467 if( modrm >= 0xc0 ) {
1469 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < MMX(modrm & 7).w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-MMX(modrm & 7).w[n];
1472 UINT32 ea = GetEA(modrm, 0);
1475 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-src.w[n];
1477 CYCLES(1); // TODO: correct cycle count
1480 void I386_OPS_BASE::MMXOP(pand_r64_rm64)() // Opcode 0f db
1483 UINT8 modrm = FETCH();
1484 if( modrm >= 0xc0 ) {
1485 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & MMX(modrm & 7).q;
1488 UINT32 ea = GetEA(modrm, 0);
1490 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & src.q;
1492 CYCLES(1); // TODO: correct cycle count
1495 void I386_OPS_BASE::MMXOP(paddusb_r64_rm64)() // Opcode 0f dc
1499 UINT8 modrm = FETCH();
1500 if( modrm >= 0xc0 ) {
1502 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-MMX(modrm & 7).b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+MMX(modrm & 7).b[n];
1505 UINT32 ea = GetEA(modrm, 0);
1508 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+src.b[n];
1510 CYCLES(1); // TODO: correct cycle count
1513 void I386_OPS_BASE::MMXOP(paddusw_r64_rm64)() // Opcode 0f dd
1517 UINT8 modrm = FETCH();
1518 if( modrm >= 0xc0 ) {
1520 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-MMX(modrm & 7).w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+MMX(modrm & 7).w[n];
1523 UINT32 ea = GetEA(modrm, 0);
1526 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+src.w[n];
1528 CYCLES(1); // TODO: correct cycle count
1531 void I386_OPS_BASE::MMXOP(pandn_r64_rm64)() // Opcode 0f df
1534 UINT8 modrm = FETCH();
1535 if( modrm >= 0xc0 ) {
1536 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & MMX(modrm & 7).q;
1539 UINT32 ea = GetEA(modrm, 0);
1541 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & src.q;
1543 CYCLES(1); // TODO: correct cycle count
1546 void I386_OPS_BASE::MMXOP(psraw_r64_rm64)() // Opcode 0f e1
1549 UINT8 modrm = FETCH();
1550 if( modrm >= 0xc0 ) {
1551 int count=(int)MMX(modrm & 7).q;
1552 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1553 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1554 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1555 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1558 UINT32 ea = GetEA(modrm, 0);
1560 int count=(int)src.q;
1561 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1562 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1563 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1564 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1566 CYCLES(1); // TODO: correct cycle count
1569 void I386_OPS_BASE::MMXOP(psrad_r64_rm64)() // Opcode 0f e2
1572 UINT8 modrm = FETCH();
1573 if( modrm >= 0xc0 ) {
1574 int count=(int)MMX(modrm & 7).q;
1575 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1576 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1579 UINT32 ea = GetEA(modrm, 0);
1581 int count=(int)src.q;
1582 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1583 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1585 CYCLES(1); // TODO: correct cycle count
1588 void I386_OPS_BASE::MMXOP(pmulhw_r64_rm64)() // Opcode 0f e5
1591 UINT8 modrm = FETCH();
1592 if( modrm >= 0xc0 ) {
1593 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) >> 16;
1594 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) >> 16;
1595 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) >> 16;
1596 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) >> 16;
1599 UINT32 ea = GetEA(modrm, 0);
1601 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) >> 16;
1602 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) >> 16;
1603 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) >> 16;
1604 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) >> 16;
1606 CYCLES(1); // TODO: correct cycle count
1609 void I386_OPS_BASE::MMXOP(psubsb_r64_rm64)() // Opcode 0f e8
1613 UINT8 modrm = FETCH();
1614 if( modrm >= 0xc0 ) {
1616 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)MMX(modrm & 7).c[n]);
1619 UINT32 ea = GetEA(modrm, 0);
1622 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)s.c[n]);
1624 CYCLES(1); // TODO: correct cycle count
1627 void I386_OPS_BASE::MMXOP(psubsw_r64_rm64)() // Opcode 0f e9
1631 UINT8 modrm = FETCH();
1632 if( modrm >= 0xc0 ) {
1634 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)MMX(modrm & 7).s[n]);
1637 UINT32 ea = GetEA(modrm, 0);
1640 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)s.s[n]);
1642 CYCLES(1); // TODO: correct cycle count
1645 void I386_OPS_BASE::MMXOP(por_r64_rm64)() // Opcode 0f eb
1648 UINT8 modrm = FETCH();
1649 if( modrm >= 0xc0 ) {
1650 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | MMX(modrm & 7).q;
1653 UINT32 ea = GetEA(modrm, 0);
1655 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | s.q;
1657 CYCLES(1); // TODO: correct cycle count
1660 void I386_OPS_BASE::MMXOP(paddsb_r64_rm64)() // Opcode 0f ec
1664 UINT8 modrm = FETCH();
1665 if( modrm >= 0xc0 ) {
1667 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)MMX(modrm & 7).c[n]);
1670 UINT32 ea = GetEA(modrm, 0);
1673 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)s.c[n]);
1675 CYCLES(1); // TODO: correct cycle count
1678 void I386_OPS_BASE::MMXOP(paddsw_r64_rm64)() // Opcode 0f ed
1682 UINT8 modrm = FETCH();
1683 if( modrm >= 0xc0 ) {
1685 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)MMX(modrm & 7).s[n]);
1688 UINT32 ea = GetEA(modrm, 0);
1691 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)s.s[n]);
1693 CYCLES(1); // TODO: correct cycle count
1696 void I386_OPS_BASE::MMXOP(pxor_r64_rm64)() // Opcode 0f ef
1699 UINT8 modrm = FETCH();
1700 if( modrm >= 0xc0 ) {
1701 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ MMX(modrm & 7).q;
1704 UINT32 ea = GetEA(modrm, 0);
1706 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ s.q;
1708 CYCLES(1); // TODO: correct cycle count
1711 void I386_OPS_BASE::MMXOP(psllw_r64_rm64)() // Opcode 0f f1
1714 UINT8 modrm = FETCH();
1715 if( modrm >= 0xc0 ) {
1716 int count=(int)MMX(modrm & 7).q;
1717 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1718 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1719 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1720 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1723 UINT32 ea = GetEA(modrm, 0);
1726 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1727 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1728 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1729 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1731 CYCLES(1); // TODO: correct cycle count
1734 void I386_OPS_BASE::MMXOP(pslld_r64_rm64)() // Opcode 0f f2
1737 UINT8 modrm = FETCH();
1738 if( modrm >= 0xc0 ) {
1739 int count=(int)MMX(modrm & 7).q;
1740 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1741 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1744 UINT32 ea = GetEA(modrm, 0);
1747 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1748 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1750 CYCLES(1); // TODO: correct cycle count
1753 void I386_OPS_BASE::MMXOP(psllq_r64_rm64)() // Opcode 0f f3
1756 UINT8 modrm = FETCH();
1757 if( modrm >= 0xc0 ) {
1758 int count=(int)MMX(modrm & 7).q;
1759 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1762 UINT32 ea = GetEA(modrm, 0);
1765 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1767 CYCLES(1); // TODO: correct cycle count
1770 void I386_OPS_BASE::MMXOP(pmaddwd_r64_rm64)() // Opcode 0f f5
1773 UINT8 modrm = FETCH();
1774 if( modrm >= 0xc0 ) {
1775 MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]+
1776 (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1];
1777 MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]+
1778 (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3];
1781 UINT32 ea = GetEA(modrm, 0);
1783 MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)s.s[0]+
1784 (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)s.s[1];
1785 MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)s.s[2]+
1786 (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)s.s[3];
1788 CYCLES(1); // TODO: correct cycle count
1791 void I386_OPS_BASE::MMXOP(psubb_r64_rm64)() // Opcode 0f f8
1795 UINT8 modrm = FETCH();
1796 if( modrm >= 0xc0 ) {
1798 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - MMX(modrm & 7).b[n];
1801 UINT32 ea = GetEA(modrm, 0);
1804 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - s.b[n];
1806 CYCLES(1); // TODO: correct cycle count
1809 void I386_OPS_BASE::MMXOP(psubw_r64_rm64)() // Opcode 0f f9
1813 UINT8 modrm = FETCH();
1814 if( modrm >= 0xc0 ) {
1816 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - MMX(modrm & 7).w[n];
1819 UINT32 ea = GetEA(modrm, 0);
1822 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - s.w[n];
1824 CYCLES(1); // TODO: correct cycle count
1827 void I386_OPS_BASE::MMXOP(psubd_r64_rm64)() // Opcode 0f fa
1831 UINT8 modrm = FETCH();
1832 if( modrm >= 0xc0 ) {
1834 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - MMX(modrm & 7).d[n];
1837 UINT32 ea = GetEA(modrm, 0);
1840 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - s.d[n];
1842 CYCLES(1); // TODO: correct cycle count
1845 void I386_OPS_BASE::MMXOP(paddb_r64_rm64)() // Opcode 0f fc
1849 UINT8 modrm = FETCH();
1850 if( modrm >= 0xc0 ) {
1852 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + MMX(modrm & 7).b[n];
1855 UINT32 ea = GetEA(modrm, 0);
1858 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + s.b[n];
1860 CYCLES(1); // TODO: correct cycle count
1863 void I386_OPS_BASE::MMXOP(paddw_r64_rm64)() // Opcode 0f fd
1867 UINT8 modrm = FETCH();
1868 if( modrm >= 0xc0 ) {
1870 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + MMX(modrm & 7).w[n];
1873 UINT32 ea = GetEA(modrm, 0);
1876 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + s.w[n];
1878 CYCLES(1); // TODO: correct cycle count
1881 void I386_OPS_BASE::MMXOP(paddd_r64_rm64)() // Opcode 0f fe
1885 UINT8 modrm = FETCH();
1886 if( modrm >= 0xc0 ) {
1888 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + MMX(modrm & 7).d[n];
1891 UINT32 ea = GetEA(modrm, 0);
1894 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + s.d[n];
1896 CYCLES(1); // TODO: correct cycle count
1899 void I386_OPS_BASE::MMXOP(emms)() // Opcode 0f 77
1901 cpustate->x87_tw = 0xffff; // tag word = 0xffff
1903 CYCLES(1); // TODO: correct cycle count
1906 void I386_OPS_BASE::I386OP(cyrix_svdc)() // Opcode 0f 78
1908 UINT8 modrm = FETCH();
1910 if( modrm < 0xc0 ) {
1911 UINT32 ea = GetEA(modrm,0);
1912 int index = (modrm >> 3) & 7;
1948 i386_trap( 6, 0, 0);
1952 limit = cpustate->sreg[index].limit;
1954 if (cpustate->sreg[index].flags & 0x8000) //G bit
1959 WRITE16(ea + 0, limit);
1960 WRITE32(ea + 2, cpustate->sreg[index].base);
1961 WRITE16(ea + 5, cpustate->sreg[index].flags); //replace top 8 bits of base
1962 WRITE8(ea + 7, cpustate->sreg[index].base >> 24);
1963 WRITE16(ea + 8, cpustate->sreg[index].selector);
1965 i386_trap( 6, 0, 0);
1967 CYCLES(1); // TODO: correct cycle count
1970 void I386_OPS_BASE::I386OP(cyrix_rsdc)() // Opcode 0f 79
1972 UINT8 modrm = FETCH();
1974 if( modrm < 0xc0 ) {
1975 UINT32 ea = GetEA(modrm,0);
1976 int index = (modrm >> 3) & 7;
2014 i386_trap( 6, 0, 0);
2018 base = (READ32(ea + 2) & 0x00ffffff) | (READ8(ea + 7) << 24);
2019 flags = READ16(ea + 5);
2020 limit = READ16(ea + 0) | ((flags & 3) << 16);
2022 if (flags & 0x8000) //G bit
2024 limit = (limit << 12) | 0xfff;
2027 cpustate->sreg[index].selector = READ16(ea + 8);
2028 cpustate->sreg[index].flags = flags;
2029 cpustate->sreg[index].base = base;
2030 cpustate->sreg[index].limit = limit;
2032 i386_trap( 6, 0, 0);
2034 CYCLES(1); // TODO: correct cycle count
2037 void I386_OPS_BASE::I386OP(cyrix_svldt)() // Opcode 0f 7a
2039 if ( PROTECTED_MODE && !V8086_MODE )
2041 UINT8 modrm = FETCH();
2043 if( !(modrm & 0xf8) ) {
2044 UINT32 ea = GetEA(modrm,0);
2045 UINT32 limit = cpustate->ldtr.limit;
2047 if (cpustate->ldtr.flags & 0x8000) //G bit
2052 WRITE16(ea + 0, limit);
2053 WRITE32(ea + 2, cpustate->ldtr.base);
2054 WRITE16(ea + 5, cpustate->ldtr.flags); //replace top 8 bits of base
2055 WRITE8(ea + 7, cpustate->ldtr.base >> 24);
2056 WRITE16(ea + 8, cpustate->ldtr.segment);
2058 i386_trap( 6, 0, 0);
2061 i386_trap( 6, 0, 0);
2063 CYCLES(1); // TODO: correct cycle count
2066 void I386_OPS_BASE::I386OP(cyrix_rsldt)() // Opcode 0f 7b
2068 if ( PROTECTED_MODE && !V8086_MODE )
2073 UINT8 modrm = FETCH();
2075 if( !(modrm & 0xf8) ) {
2076 UINT32 ea = GetEA(modrm,0);
2077 UINT16 flags = READ16(ea + 5);
2078 UINT32 base = (READ32(ea + 2) | 0x00ffffff) | (READ8(ea + 7) << 24);
2079 UINT32 limit = READ16(ea + 0) | ((flags & 3) << 16);
2082 if (flags & 0x8000) //G bit
2084 limit = (limit << 12) | 0xfff;
2087 memset(&seg, 0, sizeof(seg));
2088 seg.selector = READ16(ea + 8);
2089 i386_load_protected_mode_segment(&seg,NULL);
2090 cpustate->ldtr.limit = limit;
2091 cpustate->ldtr.base = base;
2092 cpustate->ldtr.flags = flags;
2094 i386_trap( 6, 0, 0);
2097 i386_trap( 6, 0, 0);
2099 CYCLES(1); // TODO: correct cycle count
2102 void I386_OPS_BASE::I386OP(cyrix_svts)() // Opcode 0f 7c
2104 if ( PROTECTED_MODE )
2106 UINT8 modrm = FETCH();
2108 if( !(modrm & 0xf8) ) {
2109 UINT32 ea = GetEA(modrm,0);
2110 UINT32 limit = cpustate->task.limit;
2112 if (cpustate->task.flags & 0x8000) //G bit
2117 WRITE16(ea + 0, limit);
2118 WRITE32(ea + 2, cpustate->task.base);
2119 WRITE16(ea + 5, cpustate->task.flags); //replace top 8 bits of base
2120 WRITE8(ea + 7, cpustate->task.base >> 24);
2121 WRITE16(ea + 8, cpustate->task.segment);
2123 i386_trap( 6, 0, 0);
2126 i386_trap( 6, 0, 0);
2130 void I386_OPS_BASE::I386OP(cyrix_rsts)() // Opcode 0f 7d
2132 if ( PROTECTED_MODE )
2137 UINT8 modrm = FETCH();
2139 if( !(modrm & 0xf8) ) {
2140 UINT32 ea = GetEA(modrm,0);
2141 UINT16 flags = READ16(ea + 5);
2142 UINT32 base = (READ32(ea + 2) | 0x00ffffff) | (READ8(ea + 7) << 24);
2143 UINT32 limit = READ16(ea + 0) | ((flags & 3) << 16);
2145 if (flags & 0x8000) //G bit
2147 limit = (limit << 12) | 0xfff;
2149 cpustate->task.segment = READ16(ea + 8);
2150 cpustate->task.limit = limit;
2151 cpustate->task.base = base;
2152 cpustate->task.flags = flags;
2154 i386_trap( 6, 0, 0);
2157 i386_trap( 6, 0, 0);
2159 CYCLES(1); // TODO: correct cycle count
2162 void I386_OPS_BASE::MMXOP(movd_r64_rm32)() // Opcode 0f 6e
2165 UINT8 modrm = FETCH();
2166 if( modrm >= 0xc0 ) {
2167 MMX((modrm >> 3) & 0x7).d[0]=LOAD_RM32(modrm);
2169 UINT32 ea = GetEA(modrm, 0);
2170 MMX((modrm >> 3) & 0x7).d[0]=READ32(ea);
2172 MMX((modrm >> 3) & 0x7).d[1]=0;
2173 CYCLES(1); // TODO: correct cycle count
2176 void I386_OPS_BASE::MMXOP(movq_r64_rm64)() // Opcode 0f 6f
2179 UINT8 modrm = FETCH();
2180 if( modrm >= 0xc0 ) {
2181 MMX((modrm >> 3) & 0x7).l=MMX(modrm & 0x7).l;
2183 UINT32 ea = GetEA(modrm, 0);
2184 READMMX( ea, MMX((modrm >> 3) & 0x7));
2186 CYCLES(1); // TODO: correct cycle count
2189 void I386_OPS_BASE::MMXOP(movd_rm32_r64)() // Opcode 0f 7e
2192 UINT8 modrm = FETCH();
2193 if( modrm >= 0xc0 ) {
2194 STORE_RM32(modrm, MMX((modrm >> 3) & 0x7).d[0]);
2196 UINT32 ea = GetEA(modrm, 0);
2197 WRITE32(ea, MMX((modrm >> 3) & 0x7).d[0]);
2199 CYCLES(1); // TODO: correct cycle count
2202 void I386_OPS_BASE::MMXOP(movq_rm64_r64)() // Opcode 0f 7f
2205 UINT8 modrm = FETCH();
2206 if( modrm >= 0xc0 ) {
2207 MMX(modrm & 0x7)=MMX((modrm >> 3) & 0x7);
2209 UINT32 ea = GetEA(modrm, 0);
2210 WRITEMMX( ea, MMX((modrm >> 3) & 0x7));
2212 CYCLES(1); // TODO: correct cycle count
2215 void I386_OPS_BASE::MMXOP(pcmpeqb_r64_rm64)() // Opcode 0f 74
2219 UINT8 modrm = FETCH();
2220 if( modrm >= 0xc0 ) {
2223 d=(modrm >> 3) & 0x7;
2224 for (c=0;c <= 7;c++)
2225 MMX(d).b[c]=(MMX(d).b[c] == MMX(s).b[c]) ? 0xff : 0;
2228 int d=(modrm >> 3) & 0x7;
2229 UINT32 ea = GetEA(modrm, 0);
2231 for (c=0;c <= 7;c++)
2232 MMX(d).b[c]=(MMX(d).b[c] == s.b[c]) ? 0xff : 0;
2234 CYCLES(1); // TODO: correct cycle count
2237 void I386_OPS_BASE::MMXOP(pcmpeqw_r64_rm64)() // Opcode 0f 75
2240 UINT8 modrm = FETCH();
2241 if( modrm >= 0xc0 ) {
2244 d=(modrm >> 3) & 0x7;
2245 MMX(d).w[0]=(MMX(d).w[0] == MMX(s).w[0]) ? 0xffff : 0;
2246 MMX(d).w[1]=(MMX(d).w[1] == MMX(s).w[1]) ? 0xffff : 0;
2247 MMX(d).w[2]=(MMX(d).w[2] == MMX(s).w[2]) ? 0xffff : 0;
2248 MMX(d).w[3]=(MMX(d).w[3] == MMX(s).w[3]) ? 0xffff : 0;
2251 int d=(modrm >> 3) & 0x7;
2252 UINT32 ea = GetEA(modrm, 0);
2254 MMX(d).w[0]=(MMX(d).w[0] == s.w[0]) ? 0xffff : 0;
2255 MMX(d).w[1]=(MMX(d).w[1] == s.w[1]) ? 0xffff : 0;
2256 MMX(d).w[2]=(MMX(d).w[2] == s.w[2]) ? 0xffff : 0;
2257 MMX(d).w[3]=(MMX(d).w[3] == s.w[3]) ? 0xffff : 0;
2259 CYCLES(1); // TODO: correct cycle count
2262 void I386_OPS_BASE::MMXOP(pcmpeqd_r64_rm64)() // Opcode 0f 76
2265 UINT8 modrm = FETCH();
2266 if( modrm >= 0xc0 ) {
2269 d=(modrm >> 3) & 0x7;
2270 MMX(d).d[0]=(MMX(d).d[0] == MMX(s).d[0]) ? 0xffffffff : 0;
2271 MMX(d).d[1]=(MMX(d).d[1] == MMX(s).d[1]) ? 0xffffffff : 0;
2274 int d=(modrm >> 3) & 0x7;
2275 UINT32 ea = GetEA(modrm, 0);
2277 MMX(d).d[0]=(MMX(d).d[0] == s.d[0]) ? 0xffffffff : 0;
2278 MMX(d).d[1]=(MMX(d).d[1] == s.d[1]) ? 0xffffffff : 0;
2280 CYCLES(1); // TODO: correct cycle count
2283 void I386_OPS_BASE::MMXOP(pshufw_r64_rm64_i8)() // Opcode 0f 70
2286 UINT8 modrm = FETCH();
2287 if( modrm >= 0xc0 ) {
2290 UINT8 imm8 = FETCH();
2292 d=(modrm >> 3) & 0x7;
2294 MMX(d).w[0]=t.w[imm8 & 3];
2295 MMX(d).w[1]=t.w[(imm8 >> 2) & 3];
2296 MMX(d).w[2]=t.w[(imm8 >> 4) & 3];
2297 MMX(d).w[3]=t.w[(imm8 >> 6) & 3];
2300 int d=(modrm >> 3) & 0x7;
2301 UINT32 ea = GetEA(modrm, 0);
2302 UINT8 imm8 = FETCH();
2304 MMX(d).w[0]=s.w[imm8 & 3];
2305 MMX(d).w[1]=s.w[(imm8 >> 2) & 3];
2306 MMX(d).w[2]=s.w[(imm8 >> 4) & 3];
2307 MMX(d).w[3]=s.w[(imm8 >> 6) & 3];
2309 CYCLES(1); // TODO: correct cycle count
2312 void I386_OPS_BASE::SSEOP(punpcklbw_r128_rm128)() // Opcode 66 0f 60
2314 UINT8 modrm = FETCH();
2315 if (modrm >= 0xc0) {
2319 d = (modrm >> 3) & 0x7;
2320 xd.l[0] = XMM(d).l[0];
2321 xs.l[0] = XMM(s).l[0];
2322 XMM(d).b[0] = xd.b[0];
2323 XMM(d).b[1] = xs.b[0];
2324 XMM(d).b[2] = xd.b[1];
2325 XMM(d).b[3] = xs.b[1];
2326 XMM(d).b[4] = xd.b[2];
2327 XMM(d).b[5] = xs.b[2];
2328 XMM(d).b[6] = xd.b[3];
2329 XMM(d).b[7] = xs.b[3];
2330 XMM(d).b[8] = xd.b[4];
2331 XMM(d).b[9] = xs.b[4];
2332 XMM(d).b[10] = xd.b[5];
2333 XMM(d).b[11] = xs.b[5];
2334 XMM(d).b[12] = xd.b[6];
2335 XMM(d).b[13] = xs.b[6];
2336 XMM(d).b[14] = xd.b[7];
2337 XMM(d).b[15] = xs.b[7];
2341 int d = (modrm >> 3) & 0x7;
2342 UINT32 ea = GetEA(modrm, 0);
2343 xd.l[0] = XMM(d).l[0];
2344 xs.q[0] = READ64( ea);
2345 for (int n = 0; n < 8; n++) {
2346 XMM(d).b[n << 1] = xd.b[n];
2347 XMM(d).b[(n << 1) | 1] = xs.b[n];
2350 CYCLES(1); // TODO: correct cycle count
2353 void I386_OPS_BASE::SSEOP(punpcklwd_r128_rm128)()
2355 UINT8 modrm = FETCH();
2356 if (modrm >= 0xc0) {
2360 d = (modrm >> 3) & 0x7;
2361 xd.l[0] = XMM(d).l[0];
2362 xs.l[0] = XMM(s).l[0];
2363 for (int n = 0; n < 4; n++) {
2364 XMM(d).w[n << 1] = xd.w[n];
2365 XMM(d).w[(n << 1) | 1] = xs.w[n];
2370 int d = (modrm >> 3) & 0x7;
2371 UINT32 ea = GetEA(modrm, 0);
2372 xd.l[0] = XMM(d).l[0];
2373 xs.q[0] = READ64( ea);
2374 for (int n = 0; n < 4; n++) {
2375 XMM(d).w[n << 1] = xd.w[n];
2376 XMM(d).w[(n << 1) | 1] = xs.w[n];
2379 CYCLES(1); // TODO: correct cycle count
2382 void I386_OPS_BASE::SSEOP(punpckldq_r128_rm128)()
2384 UINT8 modrm = FETCH();
2385 if (modrm >= 0xc0) {
2389 d = (modrm >> 3) & 0x7;
2390 xd.l[0] = XMM(d).l[0];
2391 xs.l[0] = XMM(s).l[0];
2392 for (int n = 0; n < 2; n++) {
2393 XMM(d).d[n << 1] = xd.d[n];
2394 XMM(d).d[(n << 1) | 1] = xs.d[n];
2399 int d = (modrm >> 3) & 0x7;
2400 UINT32 ea = GetEA(modrm, 0);
2401 xd.l[0] = XMM(d).l[0];
2402 xs.q[0] = READ64( ea);
2403 for (int n = 0; n < 2; n++) {
2404 XMM(d).d[n << 1] = xd.d[n];
2405 XMM(d).d[(n << 1) | 1] = xs.d[n];
2408 CYCLES(1); // TODO: correct cycle count
2411 void I386_OPS_BASE::SSEOP(punpcklqdq_r128_rm128)()
2413 UINT8 modrm = FETCH();
2414 if (modrm >= 0xc0) {
2418 d = (modrm >> 3) & 0x7;
2419 xd.l[0] = XMM(d).l[0];
2420 xs.l[0] = XMM(s).l[0];
2421 XMM(d).q[0] = xd.q[0];
2422 XMM(d).q[1] = xs.q[0];
2426 int d = (modrm >> 3) & 0x7;
2427 UINT32 ea = GetEA(modrm, 0);
2428 xd.l[0] = XMM(d).l[0];
2429 xs.q[0] = READ64( ea);
2430 XMM(d).q[0] = xd.q[0];
2431 XMM(d).q[1] = xs.q[0];
2433 CYCLES(1); // TODO: correct cycle count
2436 void I386_OPS_BASE::MMXOP(punpcklbw_r64_r64m32)() // Opcode 0f 60
2439 UINT8 modrm = FETCH();
2440 if( modrm >= 0xc0 ) {
2444 d=(modrm >> 3) & 0x7;
2446 MMX(d).b[0]=t & 0xff;
2447 MMX(d).b[1]=MMX(s).b[0];
2448 MMX(d).b[2]=(t >> 8) & 0xff;
2449 MMX(d).b[3]=MMX(s).b[1];
2450 MMX(d).b[4]=(t >> 16) & 0xff;
2451 MMX(d).b[5]=MMX(s).b[2];
2452 MMX(d).b[6]=(t >> 24) & 0xff;
2453 MMX(d).b[7]=MMX(s).b[3];
2456 int d=(modrm >> 3) & 0x7;
2457 UINT32 ea = GetEA(modrm, 0);
2460 MMX(d).b[0]=t & 0xff;
2461 MMX(d).b[1]=s & 0xff;
2462 MMX(d).b[2]=(t >> 8) & 0xff;
2463 MMX(d).b[3]=(s >> 8) & 0xff;
2464 MMX(d).b[4]=(t >> 16) & 0xff;
2465 MMX(d).b[5]=(s >> 16) & 0xff;
2466 MMX(d).b[6]=(t >> 24) & 0xff;
2467 MMX(d).b[7]=(s >> 24) & 0xff;
2469 CYCLES(1); // TODO: correct cycle count
2472 void I386_OPS_BASE::MMXOP(punpcklwd_r64_r64m32)() // Opcode 0f 61
2475 UINT8 modrm = FETCH();
2476 if( modrm >= 0xc0 ) {
2480 d=(modrm >> 3) & 0x7;
2482 MMX(d).w[0]=MMX(d).w[0];
2483 MMX(d).w[1]=MMX(s).w[0];
2485 MMX(d).w[3]=MMX(s).w[1];
2489 int d=(modrm >> 3) & 0x7;
2490 UINT32 ea = GetEA(modrm, 0);
2493 MMX(d).w[0]=MMX(d).w[0];
2494 MMX(d).w[1]=s & 0xffff;
2496 MMX(d).w[3]=(s >> 16) & 0xffff;
2498 CYCLES(1); // TODO: correct cycle count
2501 void I386_OPS_BASE::MMXOP(punpckldq_r64_r64m32)() // Opcode 0f 62
2504 UINT8 modrm = FETCH();
2505 if( modrm >= 0xc0 ) {
2508 d=(modrm >> 3) & 0x7;
2509 MMX(d).d[0]=MMX(d).d[0];
2510 MMX(d).d[1]=MMX(s).d[0];
2513 int d=(modrm >> 3) & 0x7;
2514 UINT32 ea = GetEA(modrm, 0);
2516 MMX(d).d[0]=MMX(d).d[0];
2519 CYCLES(1); // TODO: correct cycle count
2522 void I386_OPS_BASE::MMXOP(packsswb_r64_rm64)() // Opcode 0f 63
2525 UINT8 modrm = FETCH();
2526 if( modrm >= 0xc0 ) {
2529 d=(modrm >> 3) & 0x7;
2530 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2531 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2532 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2533 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2534 MMX(d).c[4]=SaturatedSignedWordToSignedByte(MMX(s).s[0]);
2535 MMX(d).c[5]=SaturatedSignedWordToSignedByte(MMX(s).s[1]);
2536 MMX(d).c[6]=SaturatedSignedWordToSignedByte(MMX(s).s[2]);
2537 MMX(d).c[7]=SaturatedSignedWordToSignedByte(MMX(s).s[3]);
2540 int d=(modrm >> 3) & 0x7;
2541 UINT32 ea = GetEA(modrm, 0);
2543 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2544 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2545 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2546 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2547 MMX(d).c[4]=SaturatedSignedWordToSignedByte(s.s[0]);
2548 MMX(d).c[5]=SaturatedSignedWordToSignedByte(s.s[1]);
2549 MMX(d).c[6]=SaturatedSignedWordToSignedByte(s.s[2]);
2550 MMX(d).c[7]=SaturatedSignedWordToSignedByte(s.s[3]);
2552 CYCLES(1); // TODO: correct cycle count
2555 void I386_OPS_BASE::MMXOP(pcmpgtb_r64_rm64)() // Opcode 0f 64
2559 UINT8 modrm = FETCH();
2560 if( modrm >= 0xc0 ) {
2563 d=(modrm >> 3) & 0x7;
2564 for (c=0;c <= 7;c++)
2565 MMX(d).b[c]=(MMX(d).c[c] > MMX(s).c[c]) ? 0xff : 0;
2568 int d=(modrm >> 3) & 0x7;
2569 UINT32 ea = GetEA(modrm, 0);
2571 for (c=0;c <= 7;c++)
2572 MMX(d).b[c]=(MMX(d).c[c] > s.c[c]) ? 0xff : 0;
2574 CYCLES(1); // TODO: correct cycle count
2577 void I386_OPS_BASE::MMXOP(pcmpgtw_r64_rm64)() // Opcode 0f 65
2581 UINT8 modrm = FETCH();
2582 if( modrm >= 0xc0 ) {
2585 d=(modrm >> 3) & 0x7;
2586 for (c=0;c <= 3;c++)
2587 MMX(d).w[c]=(MMX(d).s[c] > MMX(s).s[c]) ? 0xffff : 0;
2590 int d=(modrm >> 3) & 0x7;
2591 UINT32 ea = GetEA(modrm, 0);
2593 for (c=0;c <= 3;c++)
2594 MMX(d).w[c]=(MMX(d).s[c] > s.s[c]) ? 0xffff : 0;
2596 CYCLES(1); // TODO: correct cycle count
2599 void I386_OPS_BASE::MMXOP(pcmpgtd_r64_rm64)() // Opcode 0f 66
2603 UINT8 modrm = FETCH();
2604 if( modrm >= 0xc0 ) {
2607 d=(modrm >> 3) & 0x7;
2608 for (c=0;c <= 1;c++)
2609 MMX(d).d[c]=(MMX(d).i[c] > MMX(s).i[c]) ? 0xffffffff : 0;
2612 int d=(modrm >> 3) & 0x7;
2613 UINT32 ea = GetEA(modrm, 0);
2615 for (c=0;c <= 1;c++)
2616 MMX(d).d[c]=(MMX(d).i[c] > s.i[c]) ? 0xffffffff : 0;
2618 CYCLES(1); // TODO: correct cycle count
2621 void I386_OPS_BASE::MMXOP(packuswb_r64_rm64)() // Opcode 0f 67
2624 UINT8 modrm = FETCH();
2625 if( modrm >= 0xc0 ) {
2629 d=(modrm >> 3) & 0x7;
2632 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(ds.s[0]);
2633 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(ds.s[1]);
2634 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(ds.s[2]);
2635 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(ds.s[3]);
2636 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(sd.s[0]);
2637 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(sd.s[1]);
2638 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(sd.s[2]);
2639 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(sd.s[3]);
2642 int d=(modrm >> 3) & 0x7;
2643 UINT32 ea = GetEA(modrm, 0);
2646 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(t.s[0]);
2647 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(t.s[1]);
2648 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(t.s[2]);
2649 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(t.s[3]);
2650 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(s.s[0]);
2651 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(s.s[1]);
2652 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(s.s[2]);
2653 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(s.s[3]);
2655 CYCLES(1); // TODO: correct cycle count
2658 void I386_OPS_BASE::MMXOP(punpckhbw_r64_rm64)() // Opcode 0f 68
2661 UINT8 modrm = FETCH();
2662 if( modrm >= 0xc0 ) {
2665 d=(modrm >> 3) & 0x7;
2666 MMX(d).b[0]=MMX(d).b[4];
2667 MMX(d).b[1]=MMX(s).b[4];
2668 MMX(d).b[2]=MMX(d).b[5];
2669 MMX(d).b[3]=MMX(s).b[5];
2670 MMX(d).b[4]=MMX(d).b[6];
2671 MMX(d).b[5]=MMX(s).b[6];
2672 MMX(d).b[6]=MMX(d).b[7];
2673 MMX(d).b[7]=MMX(s).b[7];
2676 int d=(modrm >> 3) & 0x7;
2677 UINT32 ea = GetEA(modrm, 0);
2679 MMX(d).b[0]=MMX(d).b[4];
2681 MMX(d).b[2]=MMX(d).b[5];
2683 MMX(d).b[4]=MMX(d).b[6];
2685 MMX(d).b[6]=MMX(d).b[7];
2688 CYCLES(1); // TODO: correct cycle count
2691 void I386_OPS_BASE::MMXOP(punpckhwd_r64_rm64)() // Opcode 0f 69
2694 UINT8 modrm = FETCH();
2695 if( modrm >= 0xc0 ) {
2698 d=(modrm >> 3) & 0x7;
2699 MMX(d).w[0]=MMX(d).w[2];
2700 MMX(d).w[1]=MMX(s).w[2];
2701 MMX(d).w[2]=MMX(d).w[3];
2702 MMX(d).w[3]=MMX(s).w[3];
2705 int d=(modrm >> 3) & 0x7;
2706 UINT32 ea = GetEA(modrm, 0);
2708 MMX(d).w[0]=MMX(d).w[2];
2710 MMX(d).w[2]=MMX(d).w[3];
2713 CYCLES(1); // TODO: correct cycle count
2716 void I386_OPS_BASE::MMXOP(punpckhdq_r64_rm64)() // Opcode 0f 6a
2719 UINT8 modrm = FETCH();
2720 if( modrm >= 0xc0 ) {
2723 d=(modrm >> 3) & 0x7;
2724 MMX(d).d[0]=MMX(d).d[1];
2725 MMX(d).d[1]=MMX(s).d[1];
2728 int d=(modrm >> 3) & 0x7;
2729 UINT32 ea = GetEA(modrm, 0);
2731 MMX(d).d[0]=MMX(d).d[1];
2734 CYCLES(1); // TODO: correct cycle count
2737 void I386_OPS_BASE::MMXOP(packssdw_r64_rm64)() // Opcode 0f 6b
2740 UINT8 modrm = FETCH();
2741 if( modrm >= 0xc0 ) {
2743 INT32 t1, t2, t3, t4;
2745 d=(modrm >> 3) & 0x7;
2750 MMX(d).s[0] = SaturatedSignedDwordToSignedWord(t1);
2751 MMX(d).s[1] = SaturatedSignedDwordToSignedWord(t2);
2752 MMX(d).s[2] = SaturatedSignedDwordToSignedWord(t3);
2753 MMX(d).s[3] = SaturatedSignedDwordToSignedWord(t4);
2758 int d=(modrm >> 3) & 0x7;
2759 UINT32 ea = GetEA(modrm, 0);
2763 MMX(d).s[0] = SaturatedSignedDwordToSignedWord(t1);
2764 MMX(d).s[1] = SaturatedSignedDwordToSignedWord(t2);
2765 MMX(d).s[2] = SaturatedSignedDwordToSignedWord(s.i[0]);
2766 MMX(d).s[3] = SaturatedSignedDwordToSignedWord(s.i[1]);
2768 CYCLES(1); // TODO: correct cycle count
2771 void I386_OPS_BASE::SSEOP(group_0fae)() // Opcode 0f ae
2773 UINT8 modm = FETCH();
2774 if( modm == 0xf8 ) {
2775 logerror("Unemulated SFENCE opcode called\n");
2776 CYCLES(1); // sfence instruction
2777 } else if( modm == 0xf0 ) {
2778 CYCLES(1); // mfence instruction
2779 } else if( modm == 0xe8 ) {
2780 CYCLES(1); // lfence instruction
2781 } else if( modm < 0xc0 ) {
2783 switch ( (modm & 0x38) >> 3 )
2785 case 2: // ldmxcsr m32
2786 ea = GetEA(modm, 0);
2787 cpustate->mxcsr = READ32(ea);
2789 case 3: // stmxcsr m32
2790 ea = GetEA(modm, 0);
2791 WRITE32(ea, cpustate->mxcsr);
2793 case 7: // clflush m8
2794 GetNonTranslatedEA(modm, NULL);
2797 report_invalid_modrm( "sse_group_0fae", modm);
2800 report_invalid_modrm( "sse_group_0fae", modm);
2804 void I386_OPS_BASE::SSEOP(cvttps2dq_r128_rm128)() // Opcode f3 0f 5b
2806 UINT8 modrm = FETCH();
2807 if( modrm >= 0xc0 ) {
2808 XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM(modrm & 0x7).f[0];
2809 XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM(modrm & 0x7).f[1];
2810 XMM((modrm >> 3) & 0x7).i[2]=(INT32)XMM(modrm & 0x7).f[2];
2811 XMM((modrm >> 3) & 0x7).i[3]=(INT32)XMM(modrm & 0x7).f[3];
2814 UINT32 ea = GetEA(modrm, 0);
2816 XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f[0];
2817 XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f[1];
2818 XMM((modrm >> 3) & 0x7).i[2]=(INT32)src.f[2];
2819 XMM((modrm >> 3) & 0x7).i[3]=(INT32)src.f[3];
2821 CYCLES(1); // TODO: correct cycle count
2824 void I386_OPS_BASE::SSEOP(cvtss2sd_r128_r128m32)() // Opcode f3 0f 5a
2826 UINT8 modrm = FETCH();
2827 if( modrm >= 0xc0 ) {
2828 XMM((modrm >> 3) & 0x7).f64[0] = XMM(modrm & 0x7).f[0];
2831 UINT32 ea = GetEA(modrm, 0);
2832 s.d[0] = READ32(ea);
2833 XMM((modrm >> 3) & 0x7).f64[0] = s.f[0];
2835 CYCLES(1); // TODO: correct cycle count
2838 void I386_OPS_BASE::SSEOP(cvttss2si_r32_r128m32)() // Opcode f3 0f 2c
2841 UINT8 modrm = FETCH(); // get mordm byte
2842 if( modrm >= 0xc0 ) { // if bits 7-6 are 11 the source is a xmm register (low doubleword)
2843 src = (INT32)XMM(modrm & 0x7).f[0^NATIVE_ENDIAN_VALUE_LE_BE(0,1)];
2844 } else { // otherwise is a memory address
2846 UINT32 ea = GetEA(modrm, 0);
2847 t.d[0] = READ32(ea);
2848 src = (INT32)t.f[0];
2850 STORE_REG32(modrm, (UINT32)src);
2851 CYCLES(1); // TODO: correct cycle count
2854 void I386_OPS_BASE::SSEOP(cvtss2si_r32_r128m32)() // Opcode f3 0f 2d
2857 UINT8 modrm = FETCH();
2858 if( modrm >= 0xc0 ) {
2859 src = (INT32)XMM(modrm & 0x7).f[0];
2862 UINT32 ea = GetEA(modrm, 0);
2863 t.d[0] = READ32(ea);
2864 src = (INT32)t.f[0];
2866 STORE_REG32(modrm, (UINT32)src);
2867 CYCLES(1); // TODO: correct cycle count
2870 void I386_OPS_BASE::SSEOP(cvtsi2ss_r128_rm32)() // Opcode f3 0f 2a
2872 UINT8 modrm = FETCH();
2873 if( modrm >= 0xc0 ) {
2874 XMM((modrm >> 3) & 0x7).f[0] = (INT32)LOAD_RM32(modrm);
2876 UINT32 ea = GetEA(modrm, 0);
2877 XMM((modrm >> 3) & 0x7).f[0] = (INT32)READ32(ea);
2879 CYCLES(1); // TODO: correct cycle count
2882 void I386_OPS_BASE::SSEOP(cvtpi2ps_r128_rm64)() // Opcode 0f 2a
2884 UINT8 modrm = FETCH();
2886 if( modrm >= 0xc0 ) {
2887 XMM((modrm >> 3) & 0x7).f[0] = (float)MMX(modrm & 0x7).i[0];
2888 XMM((modrm >> 3) & 0x7).f[1] = (float)MMX(modrm & 0x7).i[1];
2891 UINT32 ea = GetEA(modrm, 0);
2893 XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0];
2894 XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1];
2896 CYCLES(1); // TODO: correct cycle count
2899 void I386_OPS_BASE::SSEOP(cvttps2pi_r64_r128m64)() // Opcode 0f 2c
2901 UINT8 modrm = FETCH();
2903 if( modrm >= 0xc0 ) {
2904 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2905 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2908 UINT32 ea = GetEA(modrm, 0);
2910 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2911 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2913 CYCLES(1); // TODO: correct cycle count
2916 void I386_OPS_BASE::SSEOP(cvtps2pi_r64_r128m64)() // Opcode 0f 2d
2918 UINT8 modrm = FETCH();
2920 if( modrm >= 0xc0 ) {
2921 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2922 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2925 UINT32 ea = GetEA(modrm, 0);
2927 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2928 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2930 CYCLES(1); // TODO: correct cycle count
2933 void I386_OPS_BASE::SSEOP(cvtps2pd_r128_r128m64)() // Opcode 0f 5a
2935 UINT8 modrm = FETCH();
2936 if( modrm >= 0xc0 ) {
2937 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).f[0];
2938 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).f[1];
2941 UINT32 ea = GetEA(modrm, 0);
2943 XMM((modrm >> 3) & 0x7).f64[0] = (double)r.f[0];
2944 XMM((modrm >> 3) & 0x7).f64[1] = (double)r.f[1];
2946 CYCLES(1); // TODO: correct cycle count
2949 void I386_OPS_BASE::SSEOP(cvtdq2ps_r128_rm128)() // Opcode 0f 5b
2951 UINT8 modrm = FETCH();
2952 if( modrm >= 0xc0 ) {
2953 XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).i[0];
2954 XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).i[1];
2955 XMM((modrm >> 3) & 0x7).f[2] = (float)XMM(modrm & 0x7).i[2];
2956 XMM((modrm >> 3) & 0x7).f[3] = (float)XMM(modrm & 0x7).i[3];
2959 UINT32 ea = GetEA(modrm, 0);
2961 XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0];
2962 XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1];
2963 XMM((modrm >> 3) & 0x7).f[2] = (float)r.i[2];
2964 XMM((modrm >> 3) & 0x7).f[3] = (float)r.i[3];
2966 CYCLES(1); // TODO: correct cycle count
2969 void I386_OPS_BASE::SSEOP(cvtdq2pd_r128_r128m64)() // Opcode f3 0f e6
2971 UINT8 modrm = FETCH();
2972 if( modrm >= 0xc0 ) {
2973 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).i[0];
2974 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).i[1];
2977 UINT32 ea = GetEA(modrm, 0);
2979 XMM((modrm >> 3) & 0x7).f64[0] = (double)s.i[0];
2980 XMM((modrm >> 3) & 0x7).f64[1] = (double)s.i[1];
2982 CYCLES(1); // TODO: correct cycle count
2985 void I386_OPS_BASE::SSEOP(movss_r128_rm128)() // Opcode f3 0f 10
2987 UINT8 modrm = FETCH();
2988 if( modrm >= 0xc0 ) {
2989 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
2991 UINT32 ea = GetEA(modrm, 0);
2992 XMM((modrm >> 3) & 0x7).d[0] = READ32(ea);
2994 CYCLES(1); // TODO: correct cycle count
2997 void I386_OPS_BASE::SSEOP(movss_rm128_r128)() // Opcode f3 0f 11
2999 UINT8 modrm = FETCH();
3000 if( modrm >= 0xc0 ) {
3001 XMM(modrm & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0];
3003 UINT32 ea = GetEA(modrm, 0);
3004 WRITE32(ea, XMM((modrm >> 3) & 0x7).d[0]);
3006 CYCLES(1); // TODO: correct cycle count
3009 void I386_OPS_BASE::SSEOP(movsldup_r128_rm128)() // Opcode f3 0f 12
3011 UINT8 modrm = FETCH();
3012 if( modrm >= 0xc0 ) {
3013 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
3014 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[0];
3015 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[2];
3016 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[2];
3019 UINT32 ea = GetEA(modrm, 0);
3021 XMM((modrm >> 3) & 0x7).d[0] = src.d[0];
3022 XMM((modrm >> 3) & 0x7).d[1] = src.d[0];
3023 XMM((modrm >> 3) & 0x7).d[2] = src.d[2];
3024 XMM((modrm >> 3) & 0x7).d[3] = src.d[2];
3026 CYCLES(1); // TODO: correct cycle count
3029 void I386_OPS_BASE::SSEOP(movshdup_r128_rm128)() // Opcode f3 0f 16
3031 UINT8 modrm = FETCH();
3032 if( modrm >= 0xc0 ) {
3033 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[1];
3034 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[1];
3035 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[3];
3036 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[3];
3039 UINT32 ea = GetEA(modrm, 0);
3041 XMM((modrm >> 3) & 0x7).d[0] = src.d[1];
3042 XMM((modrm >> 3) & 0x7).d[1] = src.d[1];
3043 XMM((modrm >> 3) & 0x7).d[2] = src.d[3];
3044 XMM((modrm >> 3) & 0x7).d[3] = src.d[3];
3046 CYCLES(1); // TODO: correct cycle count
3049 void I386_OPS_BASE::SSEOP(movaps_r128_rm128)() // Opcode 0f 28
3051 UINT8 modrm = FETCH();
3052 if( modrm >= 0xc0 ) {
3053 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3055 UINT32 ea = GetEA(modrm, 0);
3056 READXMM( ea, XMM((modrm >> 3) & 0x7));
3058 CYCLES(1); // TODO: correct cycle count
3061 void I386_OPS_BASE::SSEOP(movaps_rm128_r128)() // Opcode 0f 29
3063 UINT8 modrm = FETCH();
3064 if( modrm >= 0xc0 ) {
3065 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3067 UINT32 ea = GetEA(modrm, 0);
3068 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3070 CYCLES(1); // TODO: correct cycle count
3073 void I386_OPS_BASE::SSEOP(movups_r128_rm128)() // Opcode 0f 10
3075 UINT8 modrm = FETCH();
3076 if( modrm >= 0xc0 ) {
3077 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3079 UINT32 ea = GetEA(modrm, 0);
3080 READXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3082 CYCLES(1); // TODO: correct cycle count
3085 void I386_OPS_BASE::SSEOP(movupd_r128_rm128)() // Opcode 66 0f 10
3087 UINT8 modrm = FETCH();
3088 if( modrm >= 0xc0 ) {
3089 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3091 UINT32 ea = GetEA(modrm, 0);
3092 READXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3094 CYCLES(1); // TODO: correct cycle count
3097 void I386_OPS_BASE::SSEOP(movups_rm128_r128)() // Opcode 0f 11
3099 UINT8 modrm = FETCH();
3100 if( modrm >= 0xc0 ) {
3101 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3103 UINT32 ea = GetEA(modrm, 0);
3104 WRITEXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3106 CYCLES(1); // TODO: correct cycle count
3109 void I386_OPS_BASE::SSEOP(movupd_rm128_r128)() // Opcode 66 0f 11
3111 UINT8 modrm = FETCH();
3112 if( modrm >= 0xc0 ) {
3113 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3115 UINT32 ea = GetEA(modrm, 0);
3116 WRITEXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3118 CYCLES(1); // TODO: correct cycle count
3121 void I386_OPS_BASE::SSEOP(movlps_r128_m64)() // Opcode 0f 12
3123 UINT8 modrm = FETCH();
3124 if( modrm >= 0xc0 ) {
3126 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[1];
3127 CYCLES(1); // TODO: correct cycle count
3130 UINT32 ea = GetEA(modrm, 0);
3131 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3132 CYCLES(1); // TODO: correct cycle count
3136 void I386_OPS_BASE::SSEOP(movlpd_r128_m64)() // Opcode 66 0f 12
3138 UINT8 modrm = FETCH();
3139 if( modrm >= 0xc0 ) {
3140 CYCLES(1); // TODO: correct cycle count
3143 UINT32 ea = GetEA(modrm, 0);
3144 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3145 CYCLES(1); // TODO: correct cycle count
3149 void I386_OPS_BASE::SSEOP(movlps_m64_r128)() // Opcode 0f 13
3151 UINT8 modrm = FETCH();
3152 if( modrm >= 0xc0 ) {
3153 // unsupported by cpu
3154 CYCLES(1); // TODO: correct cycle count
3156 UINT32 ea = GetEA(modrm, 0);
3157 WRITEXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3158 CYCLES(1); // TODO: correct cycle count
3162 void I386_OPS_BASE::SSEOP(movlpd_m64_r128)() // Opcode 66 0f 13
3164 UINT8 modrm = FETCH();
3165 if( modrm >= 0xc0 ) {
3166 // unsupported by cpu
3167 CYCLES(1); // TODO: correct cycle count
3169 UINT32 ea = GetEA(modrm, 0);
3170 WRITEXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3171 CYCLES(1); // TODO: correct cycle count
3175 void I386_OPS_BASE::SSEOP(movhps_r128_m64)() // Opcode 0f 16
3177 UINT8 modrm = FETCH();
3178 if( modrm >= 0xc0 ) {
3180 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[0];
3181 CYCLES(1); // TODO: correct cycle count
3184 UINT32 ea = GetEA(modrm, 0);
3185 READXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3186 CYCLES(1); // TODO: correct cycle count
3190 void I386_OPS_BASE::SSEOP(movhpd_r128_m64)() // Opcode 66 0f 16
3192 UINT8 modrm = FETCH();
3193 if( modrm >= 0xc0 ) {
3194 // unsupported by cpu
3195 CYCLES(1); // TODO: correct cycle count
3198 UINT32 ea = GetEA(modrm, 0);
3199 READXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3200 CYCLES(1); // TODO: correct cycle count
3204 void I386_OPS_BASE::SSEOP(movhps_m64_r128)() // Opcode 0f 17
3206 UINT8 modrm = FETCH();
3207 if( modrm >= 0xc0 ) {
3208 // unsupported by cpu
3209 CYCLES(1); // TODO: correct cycle count
3211 UINT32 ea = GetEA(modrm, 0);
3212 WRITEXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3213 CYCLES(1); // TODO: correct cycle count
3217 void I386_OPS_BASE::SSEOP(movhpd_m64_r128)() // Opcode 66 0f 17
3219 UINT8 modrm = FETCH();
3220 if( modrm >= 0xc0 ) {
3221 // unsupported by cpu
3222 CYCLES(1); // TODO: correct cycle count
3224 UINT32 ea = GetEA(modrm, 0);
3225 WRITEXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3226 CYCLES(1); // TODO: correct cycle count
3230 void I386_OPS_BASE::SSEOP(movntps_m128_r128)() // Opcode 0f 2b
3232 UINT8 modrm = FETCH();
3233 if( modrm >= 0xc0 ) {
3234 // unsupported by cpu
3235 CYCLES(1); // TODO: correct cycle count
3237 // since cache is not implemented
3238 UINT32 ea = GetEA(modrm, 0);
3239 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3240 CYCLES(1); // TODO: correct cycle count
3244 void I386_OPS_BASE::SSEOP(movmskps_r16_r128)() // Opcode 0f 50
3246 UINT8 modrm = FETCH();
3247 if( modrm >= 0xc0 ) {
3249 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
3250 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
3251 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
3252 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
3253 STORE_REG16(modrm, b);
3255 CYCLES(1); // TODO: correct cycle count
3258 void I386_OPS_BASE::SSEOP(movmskps_r32_r128)() // Opcode 0f 50
3260 UINT8 modrm = FETCH();
3261 if( modrm >= 0xc0 ) {
3263 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
3264 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
3265 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
3266 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
3267 STORE_REG32(modrm, b);
3269 CYCLES(1); // TODO: correct cycle count
3272 void I386_OPS_BASE::SSEOP(movmskpd_r32_r128)() // Opcode 66 0f 50
3274 UINT8 modrm = FETCH();
3275 if( modrm >= 0xc0 ) {
3277 b=(XMM(modrm & 0x7).q[0] >> 63) & 1;
3278 b=b | ((XMM(modrm & 0x7).q[1] >> 62) & 2);
3279 STORE_REG32(modrm, b);
3281 CYCLES(1); // TODO: correct cycle count
3284 void I386_OPS_BASE::SSEOP(movq2dq_r128_r64)() // Opcode f3 0f d6
3287 UINT8 modrm = FETCH();
3288 if( modrm >= 0xc0 ) {
3289 XMM((modrm >> 3) & 0x7).q[0] = MMX(modrm & 7).q;
3290 XMM((modrm >> 3) & 0x7).q[1] = 0;
3292 CYCLES(1); // TODO: correct cycle count
3295 void I386_OPS_BASE::SSEOP(movdqu_r128_rm128)() // Opcode f3 0f 6f
3298 UINT8 modrm = FETCH();
3299 if( modrm >= 0xc0 ) {
3300 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3301 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1];
3303 UINT32 ea = GetEA(modrm, 0);
3304 READXMM( ea, XMM((modrm >> 3) & 0x7));
3306 CYCLES(1); // TODO: correct cycle count
3309 void I386_OPS_BASE::SSEOP(movdqu_rm128_r128)() // Opcode f3 0f 7f
3312 UINT8 modrm = FETCH();
3313 if( modrm >= 0xc0 ) {
3314 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
3315 XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1];
3317 UINT32 ea = GetEA(modrm, 0);
3318 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3320 CYCLES(1); // TODO: correct cycle count
3323 void I386_OPS_BASE::SSEOP(movd_m128_rm32)() // Opcode 66 0f 6e
3325 UINT8 modrm = FETCH();
3326 if (modrm >= 0xc0) {
3327 XMM((modrm >> 3) & 0x7).d[0] = LOAD_RM32(modrm);
3330 UINT32 ea = GetEA(modrm, 0);
3331 XMM((modrm >> 3) & 0x7).d[0] = READ32(ea);
3333 XMM((modrm >> 3) & 0x7).d[1] = 0;
3334 XMM((modrm >> 3) & 0x7).q[1] = 0;
3335 CYCLES(1); // TODO: correct cycle count
3338 void I386_OPS_BASE::SSEOP(movdqa_m128_rm128)() // Opcode 66 0f 6f
3340 UINT8 modrm = FETCH();
3341 if (modrm >= 0xc0) {
3342 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3343 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1];
3346 UINT32 ea = GetEA(modrm, 0);
3347 READXMM( ea, XMM((modrm >> 3) & 0x7));
3349 CYCLES(1); // TODO: correct cycle count
3352 void I386_OPS_BASE::SSEOP(movq_r128_r128m64)() // Opcode f3 0f 7e
3355 UINT8 modrm = FETCH();
3356 if( modrm >= 0xc0 ) {
3357 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3358 XMM((modrm >> 3) & 0x7).q[1] = 0;
3360 UINT32 ea = GetEA(modrm, 0);
3361 XMM((modrm >> 3) & 0x7).q[0] = READ64( ea);
3362 XMM((modrm >> 3) & 0x7).q[1] = 0;
3364 CYCLES(1); // TODO: correct cycle count
3367 void I386_OPS_BASE::SSEOP(movd_rm32_r128)() // Opcode 66 0f 7e
3369 UINT8 modrm = FETCH();
3370 if (modrm >= 0xc0) {
3371 STORE_RM32(modrm, XMM((modrm >> 3) & 0x7).d[0]);
3374 UINT32 ea = GetEA(modrm, 0);
3375 WRITE32(ea, XMM((modrm >> 3) & 0x7).d[0]);
3377 CYCLES(1); // TODO: correct cycle count
3380 void I386_OPS_BASE::SSEOP(movdqa_rm128_r128)() // Opcode 66 0f 7f
3382 UINT8 modrm = FETCH();
3383 if (modrm >= 0xc0) {
3384 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
3385 XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1];
3388 UINT32 ea = GetEA(modrm, 0);
3389 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3391 CYCLES(1); // TODO: correct cycle count
3394 void I386_OPS_BASE::SSEOP(pmovmskb_r16_r64)() // Opcode 0f d7
3397 UINT8 modrm = FETCH();
3398 if( modrm >= 0xc0 ) {
3400 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
3401 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
3402 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
3403 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
3404 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
3405 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
3406 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
3407 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
3408 STORE_REG16(modrm, b);
3410 CYCLES(1); // TODO: correct cycle count
3413 void I386_OPS_BASE::SSEOP(pmovmskb_r32_r64)() // Opcode 0f d7
3416 UINT8 modrm = FETCH();
3417 if( modrm >= 0xc0 ) {
3419 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
3420 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
3421 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
3422 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
3423 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
3424 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
3425 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
3426 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
3427 STORE_REG32(modrm, b);
3429 CYCLES(1); // TODO: correct cycle count
3432 void I386_OPS_BASE::SSEOP(pmovmskb_r32_r128)() // Opcode 66 0f d7
3434 UINT8 modrm = FETCH();
3435 if( modrm >= 0xc0 ) {
3437 b=(XMM(modrm & 0x7).b[0] >> 7) & 1;
3438 b=b | ((XMM(modrm & 0x7).b[1] >> 6) & 2);
3439 b=b | ((XMM(modrm & 0x7).b[2] >> 5) & 4);
3440 b=b | ((XMM(modrm & 0x7).b[3] >> 4) & 8);
3441 b=b | ((XMM(modrm & 0x7).b[4] >> 3) & 16);
3442 b=b | ((XMM(modrm & 0x7).b[5] >> 2) & 32);
3443 b=b | ((XMM(modrm & 0x7).b[6] >> 1) & 64);
3444 b=b | ((XMM(modrm & 0x7).b[7] >> 0) & 128);
3445 b=b | ((XMM(modrm & 0x7).b[8] << 1) & 256);
3446 b=b | ((XMM(modrm & 0x7).b[9] << 2) & 512);
3447 b=b | ((XMM(modrm & 0x7).b[10] << 3) & 1024);
3448 b=b | ((XMM(modrm & 0x7).b[11] << 4) & 2048);
3449 b=b | ((XMM(modrm & 0x7).b[12] << 5) & 4096);
3450 b=b | ((XMM(modrm & 0x7).b[13] << 6) & 8192);
3451 b=b | ((XMM(modrm & 0x7).b[14] << 7) & 16384);
3452 b=b | ((XMM(modrm & 0x7).b[15] << 8) & 32768);
3453 STORE_REG32(modrm, b);
3455 CYCLES(1); // TODO: correct cycle count
3458 void I386_OPS_BASE::SSEOP(xorps)() // Opcode 0f 57
3460 UINT8 modrm = FETCH();
3461 if( modrm >= 0xc0 ) {
3462 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ XMM(modrm & 0x7).d[0];
3463 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ XMM(modrm & 0x7).d[1];
3464 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ XMM(modrm & 0x7).d[2];
3465 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ XMM(modrm & 0x7).d[3];
3468 UINT32 ea = GetEA(modrm, 0);
3470 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ src.d[0];
3471 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ src.d[1];
3472 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ src.d[2];
3473 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ src.d[3];
3475 CYCLES(1); // TODO: correct cycle count
3478 void I386_OPS_BASE::SSEOP(xorpd_r128_rm128)() // Opcode 66 0f 57
3480 UINT8 modrm = FETCH();
3481 if( modrm >= 0xc0 ) {
3482 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] ^ XMM(modrm & 0x7).q[0];
3483 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] ^ XMM(modrm & 0x7).q[1];
3486 UINT32 ea = GetEA(modrm, 0);
3488 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] ^ src.q[0];
3489 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] ^ src.q[1];
3491 CYCLES(1); // TODO: correct cycle count
3494 void I386_OPS_BASE::SSEOP(addps)() // Opcode 0f 58
3496 UINT8 modrm = FETCH();
3497 if( modrm >= 0xc0 ) {
3498 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
3499 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + XMM(modrm & 0x7).f[1];
3500 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + XMM(modrm & 0x7).f[2];
3501 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + XMM(modrm & 0x7).f[3];
3504 UINT32 ea = GetEA(modrm, 0);
3506 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
3507 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + src.f[1];
3508 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + src.f[2];
3509 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + src.f[3];
3511 CYCLES(1); // TODO: correct cycle count
3514 void I386_OPS_BASE::SSEOP(sqrtps_r128_rm128)() // Opcode 0f 51
3516 UINT8 modrm = FETCH();
3517 if( modrm >= 0xc0 ) {
3518 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
3519 XMM((modrm >> 3) & 0x7).f[1] = sqrt(XMM(modrm & 0x7).f[1]);
3520 XMM((modrm >> 3) & 0x7).f[2] = sqrt(XMM(modrm & 0x7).f[2]);
3521 XMM((modrm >> 3) & 0x7).f[3] = sqrt(XMM(modrm & 0x7).f[3]);
3524 UINT32 ea = GetEA(modrm, 0);
3526 XMM((modrm >> 3) & 0x7).f[0] = sqrt(src.f[0]);
3527 XMM((modrm >> 3) & 0x7).f[1] = sqrt(src.f[1]);
3528 XMM((modrm >> 3) & 0x7).f[2] = sqrt(src.f[2]);
3529 XMM((modrm >> 3) & 0x7).f[3] = sqrt(src.f[3]);
3531 CYCLES(1); // TODO: correct cycle count
3534 void I386_OPS_BASE::SSEOP(rsqrtps_r128_rm128)() // Opcode 0f 52
3536 UINT8 modrm = FETCH();
3537 if( modrm >= 0xc0 ) {
3538 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
3539 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(XMM(modrm & 0x7).f[1]);
3540 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(XMM(modrm & 0x7).f[2]);
3541 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(XMM(modrm & 0x7).f[3]);
3544 UINT32 ea = GetEA(modrm, 0);
3546 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(src.f[0]);
3547 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(src.f[1]);
3548 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(src.f[2]);
3549 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(src.f[3]);
3551 CYCLES(1); // TODO: correct cycle count
3554 void I386_OPS_BASE::SSEOP(rcpps_r128_rm128)() // Opcode 0f 53
3556 UINT8 modrm = FETCH();
3557 if( modrm >= 0xc0 ) {
3558 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / XMM(modrm & 0x7).f[0];
3559 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / XMM(modrm & 0x7).f[1];
3560 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / XMM(modrm & 0x7).f[2];
3561 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / XMM(modrm & 0x7).f[3];
3564 UINT32 ea = GetEA(modrm, 0);
3566 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / src.f[0];
3567 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / src.f[1];
3568 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / src.f[2];
3569 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / src.f[3];
3571 CYCLES(1); // TODO: correct cycle count
3574 void I386_OPS_BASE::SSEOP(andps_r128_rm128)() // Opcode 0f 54
3576 UINT8 modrm = FETCH();
3577 if( modrm >= 0xc0 ) {
3578 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0];
3579 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1];
3582 UINT32 ea = GetEA(modrm, 0);
3584 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
3585 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
3587 CYCLES(1); // TODO: correct cycle count
3590 void I386_OPS_BASE::SSEOP(andpd_r128_rm128)() // Opcode 66 0f 54
3592 UINT8 modrm = FETCH();
3593 if( modrm >= 0xc0 ) {
3594 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0];
3595 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1];
3598 UINT32 ea = GetEA(modrm, 0);
3600 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
3601 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
3603 CYCLES(1); // TODO: correct cycle count
3606 void I386_OPS_BASE::SSEOP(andnps_r128_rm128)() // Opcode 0f 55
3608 UINT8 modrm = FETCH();
3609 if( modrm >= 0xc0 ) {
3610 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0];
3611 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1];
3614 UINT32 ea = GetEA(modrm, 0);
3616 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
3617 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
3619 CYCLES(1); // TODO: correct cycle count
3622 void I386_OPS_BASE::SSEOP(andnpd_r128_rm128)() // Opcode 66 0f 55
3624 UINT8 modrm = FETCH();
3625 if( modrm >= 0xc0 ) {
3626 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0];
3627 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1];
3630 UINT32 ea = GetEA(modrm, 0);
3632 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
3633 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
3635 CYCLES(1); // TODO: correct cycle count
3638 void I386_OPS_BASE::SSEOP(orps_r128_rm128)() // Opcode 0f 56
3640 UINT8 modrm = FETCH();
3641 if( modrm >= 0xc0 ) {
3642 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0];
3643 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1];
3646 UINT32 ea = GetEA(modrm, 0);
3648 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0];
3649 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1];
3651 CYCLES(1); // TODO: correct cycle count
3654 void I386_OPS_BASE::SSEOP(orpd_r128_rm128)() // Opcode 66 0f 56
3656 UINT8 modrm = FETCH();
3657 if( modrm >= 0xc0 ) {
3658 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0];
3659 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1];
3662 UINT32 ea = GetEA(modrm, 0);
3664 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0];
3665 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1];
3667 CYCLES(1); // TODO: correct cycle count
3670 void I386_OPS_BASE::SSEOP(mulps)() // Opcode 0f 59 ????
3672 UINT8 modrm = FETCH();
3673 if( modrm >= 0xc0 ) {
3674 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3675 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * XMM(modrm & 0x7).f[1];
3676 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * XMM(modrm & 0x7).f[2];
3677 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * XMM(modrm & 0x7).f[3];
3680 UINT32 ea = GetEA(modrm, 0);
3682 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3683 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * src.f[1];
3684 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * src.f[2];
3685 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * src.f[3];
3687 CYCLES(1); // TODO: correct cycle count
3690 void I386_OPS_BASE::SSEOP(subps)() // Opcode 0f 5c
3692 UINT8 modrm = FETCH();
3693 if( modrm >= 0xc0 ) {
3694 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3695 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - XMM(modrm & 0x7).f[1];
3696 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2];
3697 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - XMM(modrm & 0x7).f[3];
3700 UINT32 ea = GetEA(modrm, 0);
3702 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3703 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - src.f[1];
3704 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - src.f[2];
3705 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - src.f[3];
3707 CYCLES(1); // TODO: correct cycle count
3710 INLINE float sse_min_single(float src1, float src2)
3712 /*if ((src1 == 0) && (src2 == 0))
3723 INLINE double sse_min_double(double src1, double src2)
3725 /*if ((src1 == 0) && (src2 == 0))
3736 void I386_OPS_BASE::SSEOP(minps)() // Opcode 0f 5d
3738 UINT8 modrm = FETCH();
3739 if( modrm >= 0xc0 ) {
3740 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3741 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3742 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3743 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3746 UINT32 ea = GetEA(modrm, 0);
3748 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3749 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3750 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3751 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3753 CYCLES(1); // TODO: correct cycle count
3756 void I386_OPS_BASE::SSEOP(divps)() // Opcode 0f 5e
3758 UINT8 modrm = FETCH();
3759 if( modrm >= 0xc0 ) {
3760 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3761 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / XMM(modrm & 0x7).f[1];
3762 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / XMM(modrm & 0x7).f[2];
3763 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / XMM(modrm & 0x7).f[3];
3766 UINT32 ea = GetEA(modrm, 0);
3768 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3769 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / src.f[1];
3770 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / src.f[2];
3771 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / src.f[3];
3773 CYCLES(1); // TODO: correct cycle count
3776 INLINE float sse_max_single(float src1, float src2)
3778 /*if ((src1 == 0) && (src2 == 0))
3789 INLINE double sse_max_double(double src1, double src2)
3791 /*if ((src1 == 0) && (src2 == 0))
3802 void I386_OPS_BASE::SSEOP(maxps)() // Opcode 0f 5f
3804 UINT8 modrm = FETCH();
3805 if( modrm >= 0xc0 ) {
3806 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3807 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3808 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3809 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3812 UINT32 ea = GetEA(modrm, 0);
3814 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3815 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3816 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3817 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3819 CYCLES(1); // TODO: correct cycle count
3822 void I386_OPS_BASE::SSEOP(maxss_r128_r128m32)() // Opcode f3 0f 5f
3824 UINT8 modrm = FETCH();
3825 if( modrm >= 0xc0 ) {
3826 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3829 UINT32 ea = GetEA(modrm, 0);
3830 src.d[0]=READ32(ea);
3831 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3833 CYCLES(1); // TODO: correct cycle count
3836 void I386_OPS_BASE::SSEOP(addss)() // Opcode f3 0f 58
3838 UINT8 modrm = FETCH();
3839 if( modrm >= 0xc0 ) {
3840 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
3843 UINT32 ea = GetEA(modrm, 0);
3845 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
3847 CYCLES(1); // TODO: correct cycle count
3850 void I386_OPS_BASE::SSEOP(subss)() // Opcode f3 0f 5c
3852 UINT8 modrm = FETCH();
3853 if( modrm >= 0xc0 ) {
3854 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3857 UINT32 ea = GetEA(modrm, 0);
3859 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3861 CYCLES(1); // TODO: correct cycle count
3864 void I386_OPS_BASE::SSEOP(mulss)() // Opcode f3 0f 5e
3866 UINT8 modrm = FETCH();
3867 if( modrm >= 0xc0 ) {
3868 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3871 UINT32 ea = GetEA(modrm, 0);
3873 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3875 CYCLES(1); // TODO: correct cycle count
3878 void I386_OPS_BASE::SSEOP(divss)() // Opcode 0f 59
3880 UINT8 modrm = FETCH();
3881 if( modrm >= 0xc0 ) {
3882 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3885 UINT32 ea = GetEA(modrm, 0);
3887 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3889 CYCLES(1); // TODO: correct cycle count
3892 void I386_OPS_BASE::SSEOP(rcpss_r128_r128m32)() // Opcode f3 0f 53
3894 UINT8 modrm = FETCH();
3895 if( modrm >= 0xc0 ) {
3896 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / XMM(modrm & 0x7).f[0];
3899 UINT32 ea = GetEA(modrm, 0);
3901 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / s.f[0];
3903 CYCLES(1); // TODO: correct cycle count
3906 void I386_OPS_BASE::SSEOP(sqrtss_r128_r128m32)() // Opcode f3 0f 51
3908 UINT8 modrm = FETCH();
3909 if( modrm >= 0xc0 ) {
3910 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
3913 UINT32 ea = GetEA(modrm, 0);
3915 XMM((modrm >> 3) & 0x7).f[0] = sqrt(s.f[0]);
3917 CYCLES(1); // TODO: correct cycle count
3920 void I386_OPS_BASE::SSEOP(rsqrtss_r128_r128m32)() // Opcode f3 0f 52
3922 UINT8 modrm = FETCH();
3923 if( modrm >= 0xc0 ) {
3924 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
3927 UINT32 ea = GetEA(modrm, 0);
3929 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(s.f[0]);
3931 CYCLES(1); // TODO: correct cycle count
3934 void I386_OPS_BASE::SSEOP(minss_r128_r128m32)() // Opcode f3 0f 5d
3936 UINT8 modrm = FETCH();
3937 if( modrm >= 0xc0 ) {
3938 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < XMM(modrm & 0x7).f[0] ? XMM((modrm >> 3) & 0x7).f[0] : XMM(modrm & 0x7).f[0];
3941 UINT32 ea = GetEA(modrm, 0);
3942 s.d[0] = READ32(ea);
3943 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < s.f[0] ? XMM((modrm >> 3) & 0x7).f[0] : s.f[0];
3945 CYCLES(1); // TODO: correct cycle count
3948 void I386_OPS_BASE::SSEOP(comiss_r128_r128m32)() // Opcode 0f 2f
3951 UINT8 modrm = FETCH();
3952 if( modrm >= 0xc0 ) {
3953 a = XMM((modrm >> 3) & 0x7).d[0];
3954 b = XMM(modrm & 0x7).d[0];
3957 UINT32 ea = GetEA(modrm, 0);
3959 a = XMM((modrm >> 3) & 0x7).d[0];
3965 if (float32_is_nan(a) || float32_is_nan(b))
3976 if (float32_eq(a, b))
3978 if (float32_lt(a, b))
3981 // should generate exception when at least one of the operands is either QNaN or SNaN
3982 CYCLES(1); // TODO: correct cycle count
3985 void I386_OPS_BASE::SSEOP(comisd_r128_r128m64)() // Opcode 66 0f 2f
3988 UINT8 modrm = FETCH();
3989 if( modrm >= 0xc0 ) {
3990 a = XMM((modrm >> 3) & 0x7).q[0];
3991 b = XMM(modrm & 0x7).q[0];
3994 UINT32 ea = GetEA(modrm, 0);
3996 a = XMM((modrm >> 3) & 0x7).q[0];
4002 if (float64_is_nan(a) || float64_is_nan(b))
4013 if (float64_eq(a, b))
4015 if (float64_lt(a, b))
4018 // should generate exception when at least one of the operands is either QNaN or SNaN
4019 CYCLES(1); // TODO: correct cycle count
4022 void I386_OPS_BASE::SSEOP(ucomiss_r128_r128m32)() // Opcode 0f 2e
4025 UINT8 modrm = FETCH();
4026 if( modrm >= 0xc0 ) {
4027 a = XMM((modrm >> 3) & 0x7).d[0];
4028 b = XMM(modrm & 0x7).d[0];
4031 UINT32 ea = GetEA(modrm, 0);
4033 a = XMM((modrm >> 3) & 0x7).d[0];
4039 if (float32_is_nan(a) || float32_is_nan(b))
4050 if (float32_eq(a, b))
4052 if (float32_lt(a, b))
4055 // should generate exception when at least one of the operands is SNaN
4056 CYCLES(1); // TODO: correct cycle count
4059 void I386_OPS_BASE::SSEOP(ucomisd_r128_r128m64)() // Opcode 66 0f 2e
4062 UINT8 modrm = FETCH();
4063 if( modrm >= 0xc0 ) {
4064 a = XMM((modrm >> 3) & 0x7).q[0];
4065 b = XMM(modrm & 0x7).q[0];
4068 UINT32 ea = GetEA(modrm, 0);
4070 a = XMM((modrm >> 3) & 0x7).q[0];
4076 if (float64_is_nan(a) || float64_is_nan(b))
4087 if (float64_eq(a, b))
4089 if (float64_lt(a, b))
4092 // should generate exception when at least one of the operands is SNaN
4093 CYCLES(1); // TODO: correct cycle count
4096 void I386_OPS_BASE::SSEOP(shufps)() // Opcode 0f c6
4098 UINT8 modrm = FETCH();
4099 UINT8 sel = FETCH();
4107 d=(modrm >> 3) & 0x7;
4108 if( modrm >= 0xc0 ) {
4121 UINT32 ea = GetEA(modrm, 0);
4127 XMM(d).d[2]=src.d[m3];
4128 XMM(d).d[3]=src.d[m4];
4130 CYCLES(1); // TODO: correct cycle count
4133 void I386_OPS_BASE::SSEOP(shufpd_r128_rm128_i8)() // Opcode 66 0f c6
4135 UINT8 modrm = FETCH();
4136 UINT8 sel = FETCH();
4142 d=(modrm >> 3) & 0x7;
4143 if( modrm >= 0xc0 ) {
4152 UINT32 ea = GetEA(modrm, 0);
4156 XMM(d).q[1]=src.q[m2];
4158 CYCLES(1); // TODO: correct cycle count
4161 void I386_OPS_BASE::SSEOP(unpcklps_r128_rm128)() // Opcode 0f 14
4163 UINT8 modrm = FETCH();
4165 UINT32 t1, t2, t3, t4;
4167 d=(modrm >> 3) & 0x7;
4168 if( modrm >= 0xc0 ) {
4179 UINT32 ea = GetEA(modrm, 0);
4182 XMM(d).d[3]=src.d[1];
4184 XMM(d).d[1]=src.d[0];
4186 CYCLES(1); // TODO: correct cycle count
4189 void I386_OPS_BASE::SSEOP(unpcklpd_r128_rm128)() // Opcode 66 0f 14
4191 UINT8 modrm = FETCH();
4194 d=(modrm >> 3) & 0x7;
4195 if( modrm >= 0xc0 ) {
4196 XMM(d).q[1]=XMM(s).q[0];
4197 XMM(d).q[0]=XMM(d).q[0];
4200 UINT32 ea = GetEA(modrm, 0);
4202 XMM(d).q[1]=src.q[0];
4203 XMM(d).q[0]=XMM(d).q[0];
4205 CYCLES(1); // TODO: correct cycle count
4208 void I386_OPS_BASE::SSEOP(unpckhps_r128_rm128)() // Opcode 0f 15
4210 UINT8 modrm = FETCH();
4212 UINT32 t1, t2, t3, t4;
4214 d=(modrm >> 3) & 0x7;
4215 if( modrm >= 0xc0 ) {
4226 UINT32 ea = GetEA(modrm, 0);
4231 XMM(d).d[1]=src.d[2];
4233 XMM(d).d[3]=src.d[3];
4235 CYCLES(1); // TODO: correct cycle count
4238 void I386_OPS_BASE::SSEOP(unpckhpd_r128_rm128)() // Opcode 66 0f 15
4240 UINT8 modrm = FETCH();
4243 d=(modrm >> 3) & 0x7;
4244 if( modrm >= 0xc0 ) {
4245 XMM(d).q[0]=XMM(d).q[1];
4246 XMM(d).q[1]=XMM(s).q[1];
4249 UINT32 ea = GetEA(modrm, 0);
4251 XMM(d).q[0]=XMM(d).q[1];
4252 XMM(d).q[1]=src.q[1];
4254 CYCLES(1); // TODO: correct cycle count
4257 INLINE bool sse_issingleordered(float op1, float op2)
4259 // TODO: true when at least one of the two source operands being compared is a NaN
4260 return (op1 != op1) || (op1 != op2);
4263 INLINE bool sse_issingleunordered(float op1, float op2)
4265 // TODO: true when neither source operand is a NaN
4266 return !((op1 != op1) || (op1 != op2));
4269 INLINE bool sse_isdoubleordered(double op1, double op2)
4271 // TODO: true when at least one of the two source operands being compared is a NaN
4272 return (op1 != op1) || (op1 != op2);
4275 INLINE bool sse_isdoubleunordered(double op1, double op2)
4277 // TODO: true when neither source operand is a NaN
4278 return !((op1 != op1) || (op1 != op2));
4281 void I386_OPS_BASE::SSEOP(predicate_compare_single)(UINT8 imm8, XMM_REG d, XMM_REG s)
4286 d.d[0]=d.f[0] == s.f[0] ? 0xffffffff : 0;
4287 d.d[1]=d.f[1] == s.f[1] ? 0xffffffff : 0;
4288 d.d[2]=d.f[2] == s.f[2] ? 0xffffffff : 0;
4289 d.d[3]=d.f[3] == s.f[3] ? 0xffffffff : 0;
4292 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
4293 d.d[1]=d.f[1] < s.f[1] ? 0xffffffff : 0;
4294 d.d[2]=d.f[2] < s.f[2] ? 0xffffffff : 0;
4295 d.d[3]=d.f[3] < s.f[3] ? 0xffffffff : 0;
4298 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
4299 d.d[1]=d.f[1] <= s.f[1] ? 0xffffffff : 0;
4300 d.d[2]=d.f[2] <= s.f[2] ? 0xffffffff : 0;
4301 d.d[3]=d.f[3] <= s.f[3] ? 0xffffffff : 0;
4304 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4305 d.d[1]=sse_issingleunordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
4306 d.d[2]=sse_issingleunordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
4307 d.d[3]=sse_issingleunordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
4310 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
4311 d.d[1]=d.f[1] != s.f[1] ? 0xffffffff : 0;
4312 d.d[2]=d.f[2] != s.f[2] ? 0xffffffff : 0;
4313 d.d[3]=d.f[3] != s.f[3] ? 0xffffffff : 0;
4316 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
4317 d.d[1]=d.f[1] < s.f[1] ? 0 : 0xffffffff;
4318 d.d[2]=d.f[2] < s.f[2] ? 0 : 0xffffffff;
4319 d.d[3]=d.f[3] < s.f[3] ? 0 : 0xffffffff;
4322 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
4323 d.d[1]=d.f[1] <= s.f[1] ? 0 : 0xffffffff;
4324 d.d[2]=d.f[2] <= s.f[2] ? 0 : 0xffffffff;
4325 d.d[3]=d.f[3] <= s.f[3] ? 0 : 0xffffffff;
4328 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4329 d.d[1]=sse_issingleordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
4330 d.d[2]=sse_issingleordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
4331 d.d[3]=sse_issingleordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
4336 void I386_OPS_BASE::SSEOP(predicate_compare_double)(UINT8 imm8, XMM_REG d, XMM_REG s)
4341 d.q[0]=d.f64[0] == s.f64[0] ? 0xffffffffffffffff : 0;
4342 d.q[1]=d.f64[1] == s.f64[1] ? 0xffffffffffffffff : 0;
4345 d.q[0]=d.f64[0] < s.f64[0] ? 0xffffffffffffffff : 0;
4346 d.q[1]=d.f64[1] < s.f64[1] ? 0xffffffffffffffff : 0;
4349 d.q[0]=d.f64[0] <= s.f64[0] ? 0xffffffffffffffff : 0;
4350 d.q[1]=d.f64[1] <= s.f64[1] ? 0xffffffffffffffff : 0;
4353 d.q[0]=sse_isdoubleunordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffff : 0;
4354 d.q[1]=sse_isdoubleunordered(d.f64[1], s.f64[1]) ? 0xffffffffffffffff : 0;
4357 d.q[0]=d.f64[0] != s.f64[0] ? 0xffffffffffffffff : 0;
4358 d.q[1]=d.f64[1] != s.f64[1] ? 0xffffffffffffffff : 0;
4361 d.q[0]=d.f64[0] < s.f64[0] ? 0 : 0xffffffffffffffff;
4362 d.q[1]=d.f64[1] < s.f64[1] ? 0 : 0xffffffffffffffff;
4365 d.q[0]=d.f64[0] <= s.f64[0] ? 0 : 0xffffffffffffffff;
4366 d.q[1]=d.f64[1] <= s.f64[1] ? 0 : 0xffffffffffffffff;
4369 d.q[0]=sse_isdoubleordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffff : 0;
4370 d.q[1]=sse_isdoubleordered(d.f64[1], s.f64[1]) ? 0xffffffffffffffff : 0;
4375 void I386_OPS_BASE::SSEOP(predicate_compare_single_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
4380 d.d[0]=d.f[0] == s.f[0] ? 0xffffffff : 0;
4383 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
4386 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
4389 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4392 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
4395 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
4398 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
4401 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4406 void I386_OPS_BASE::SSEOP(predicate_compare_double_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
4411 d.q[0]=d.f64[0] == s.f64[0] ? 0xffffffffffffffff : 0;
4414 d.q[0]=d.f64[0] < s.f64[0] ? 0xffffffffffffffff : 0;
4417 d.q[0]=d.f64[0] <= s.f64[0] ? 0xffffffffffffffff : 0;
4420 d.q[0]=sse_isdoubleunordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffff : 0;
4423 d.q[0]=d.f64[0] != s.f64[0] ? 0xffffffffffffffff : 0;
4426 d.q[0]=d.f64[0] < s.f64[0] ? 0 : 0xffffffffffffffff;
4429 d.q[0]=d.f64[0] <= s.f64[0] ? 0 : 0xffffffffffffffff;
4432 d.q[0]=sse_isdoubleordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffff : 0;
4437 void I386_OPS_BASE::SSEOP(cmpps_r128_rm128_i8)() // Opcode 0f c2
4439 UINT8 modrm = FETCH();
4440 if( modrm >= 0xc0 ) {
4442 UINT8 imm8 = FETCH();
4444 d=(modrm >> 3) & 0x7;
4445 sse_predicate_compare_single(imm8, XMM(d), XMM(s));
4449 UINT32 ea = GetEA(modrm, 0);
4450 UINT8 imm8 = FETCH();
4452 d=(modrm >> 3) & 0x7;
4453 sse_predicate_compare_single(imm8, XMM(d), s);
4455 CYCLES(1); // TODO: correct cycle count
4458 void I386_OPS_BASE::SSEOP(cmppd_r128_rm128_i8)() // Opcode 66 0f c2
4460 UINT8 modrm = FETCH();
4461 if( modrm >= 0xc0 ) {
4463 UINT8 imm8 = FETCH();
4465 d=(modrm >> 3) & 0x7;
4466 sse_predicate_compare_double(imm8, XMM(d), XMM(s));
4470 UINT32 ea = GetEA(modrm, 0);
4471 UINT8 imm8 = FETCH();
4473 d=(modrm >> 3) & 0x7;
4474 sse_predicate_compare_double(imm8, XMM(d), s);
4476 CYCLES(1); // TODO: correct cycle count
4479 void I386_OPS_BASE::SSEOP(cmpss_r128_r128m32_i8)() // Opcode f3 0f c2
4481 UINT8 modrm = FETCH();
4482 if( modrm >= 0xc0 ) {
4484 UINT8 imm8 = FETCH();
4486 d=(modrm >> 3) & 0x7;
4487 sse_predicate_compare_single_scalar(imm8, XMM(d), XMM(s));
4491 UINT32 ea = GetEA(modrm, 0);
4492 UINT8 imm8 = FETCH();
4494 d=(modrm >> 3) & 0x7;
4495 sse_predicate_compare_single_scalar(imm8, XMM(d), s);
4497 CYCLES(1); // TODO: correct cycle count
4500 void I386_OPS_BASE::SSEOP(pinsrw_r64_r16m16_i8)() // Opcode 0f c4, 16bit register
4503 UINT8 modrm = FETCH();
4504 if( modrm >= 0xc0 ) {
4505 UINT8 imm8 = FETCH();
4506 UINT16 v = LOAD_RM16(modrm);
4507 if (cpustate->xmm_operand_size)
4508 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4510 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4512 UINT32 ea = GetEA(modrm, 0);
4513 UINT8 imm8 = FETCH();
4514 UINT16 v = READ16(ea);
4515 if (cpustate->xmm_operand_size)
4516 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4518 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4520 CYCLES(1); // TODO: correct cycle count
4523 void I386_OPS_BASE::SSEOP(pinsrw_r64_r32m16_i8)() // Opcode 0f c4, 32bit register
4526 UINT8 modrm = FETCH();
4527 if( modrm >= 0xc0 ) {
4528 UINT8 imm8 = FETCH();
4529 UINT16 v = (UINT16)LOAD_RM32(modrm);
4530 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4532 UINT32 ea = GetEA(modrm, 0);
4533 UINT8 imm8 = FETCH();
4534 UINT16 v = READ16(ea);
4535 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4537 CYCLES(1); // TODO: correct cycle count
4540 void I386_OPS_BASE::SSEOP(pinsrw_r128_r32m16_i8)() // Opcode 66 0f c4
4542 UINT8 modrm = FETCH();
4543 if (modrm >= 0xc0) {
4544 UINT8 imm8 = FETCH();
4545 UINT16 v = (UINT16)LOAD_RM32(modrm);
4546 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4549 UINT32 ea = GetEA(modrm, 0);
4550 UINT8 imm8 = FETCH();
4551 UINT16 v = READ16(ea);
4552 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4554 CYCLES(1); // TODO: correct cycle count
4557 void I386_OPS_BASE::SSEOP(pextrw_r16_r64_i8)() // Opcode 0f c5
4560 UINT8 modrm = FETCH();
4561 if( modrm >= 0xc0 ) {
4562 UINT8 imm8 = FETCH();
4563 if (cpustate->xmm_operand_size)
4564 STORE_REG16(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
4566 STORE_REG16(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
4568 //UINT8 imm8 = FETCH();
4569 report_invalid_modrm( "pextrw_r16_r64_i8", modrm);
4571 CYCLES(1); // TODO: correct cycle count
4574 void I386_OPS_BASE::SSEOP(pextrw_r32_r64_i8)() // Opcode 0f c5
4577 UINT8 modrm = FETCH();
4578 if( modrm >= 0xc0 ) {
4579 UINT8 imm8 = FETCH();
4580 STORE_REG32(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
4582 //UINT8 imm8 = FETCH();
4583 report_invalid_modrm( "pextrw_r32_r64_i8", modrm);
4585 CYCLES(1); // TODO: correct cycle count
4588 void I386_OPS_BASE::SSEOP(pextrw_reg_r128_i8)() // Opcode 66 0f c5
4590 UINT8 modrm = FETCH();
4591 if (modrm >= 0xc0) {
4592 UINT8 imm8 = FETCH();
4593 STORE_REG32(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
4596 //UINT8 imm8 = FETCH();
4597 report_invalid_modrm( "sse_pextrw_reg_r128_i8", modrm);
4599 CYCLES(1); // TODO: correct cycle count
4602 void I386_OPS_BASE::SSEOP(pminub_r64_rm64)() // Opcode 0f da
4606 UINT8 modrm = FETCH();
4607 if( modrm >= 0xc0 ) {
4609 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
4612 UINT32 ea = GetEA(modrm, 0);
4615 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
4617 CYCLES(1); // TODO: correct cycle count
4620 void I386_OPS_BASE::SSEOP(pminub_r128_rm128)() // Opcode 66 0f da
4622 UINT8 modrm = FETCH();
4623 if( modrm >= 0xc0 ) {
4624 for (int n=0;n < 16;n++)
4625 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] < XMM(modrm & 0x7).b[n] ? XMM((modrm >> 3) & 0x7).b[n] : XMM(modrm & 0x7).b[n];
4628 UINT32 ea = GetEA(modrm, 0);
4630 for (int n=0;n < 16;n++)
4631 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] < s.b[n] ? XMM((modrm >> 3) & 0x7).b[n] : s.b[n];
4633 CYCLES(1); // TODO: correct cycle count
4636 void I386_OPS_BASE::SSEOP(pmaxub_r64_rm64)() // Opcode 0f de
4640 UINT8 modrm = FETCH();
4641 if( modrm >= 0xc0 ) {
4643 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
4646 UINT32 ea = GetEA(modrm, 0);
4649 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
4651 CYCLES(1); // TODO: correct cycle count
4654 void I386_OPS_BASE::SSEOP(pavgb_r64_rm64)() // Opcode 0f e0
4658 UINT8 modrm = FETCH();
4659 if( modrm >= 0xc0 ) {
4661 MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)MMX(modrm & 0x7).b[n] + 1) >> 1;
4664 UINT32 ea = GetEA(modrm, 0);
4667 MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)s.b[n] + 1) >> 1;
4669 CYCLES(1); // TODO: correct cycle count
4672 void I386_OPS_BASE::SSEOP(pavgw_r64_rm64)() // Opcode 0f e3
4676 UINT8 modrm = FETCH();
4677 if( modrm >= 0xc0 ) {
4679 MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)MMX(modrm & 0x7).w[n] + 1) >> 1;
4682 UINT32 ea = GetEA(modrm, 0);
4685 MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)s.w[n] + 1) >> 1;
4687 CYCLES(1); // TODO: correct cycle count
4690 void I386_OPS_BASE::SSEOP(pmulhuw_r64_rm64)() // Opcode 0f e4
4693 UINT8 modrm = FETCH();
4694 if( modrm >= 0xc0 ) {
4695 MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)MMX(modrm & 7).w[0]) >> 16;
4696 MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)MMX(modrm & 7).w[1]) >> 16;
4697 MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)MMX(modrm & 7).w[2]) >> 16;
4698 MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)MMX(modrm & 7).w[3]) >> 16;
4701 UINT32 ea = GetEA(modrm, 0);
4703 MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)s.w[0]) >> 16;
4704 MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)s.w[1]) >> 16;
4705 MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)s.w[2]) >> 16;
4706 MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)s.w[3]) >> 16;
4708 CYCLES(1); // TODO: correct cycle count
4711 void I386_OPS_BASE::SSEOP(pminsw_r64_rm64)() // Opcode 0f ea
4715 UINT8 modrm = FETCH();
4716 if( modrm >= 0xc0 ) {
4718 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
4721 UINT32 ea = GetEA(modrm, 0);
4724 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
4726 CYCLES(1); // TODO: correct cycle count
4729 void I386_OPS_BASE::SSEOP(pmaxsw_r64_rm64)() // Opcode 0f ee
4733 UINT8 modrm = FETCH();
4734 if( modrm >= 0xc0 ) {
4736 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
4739 UINT32 ea = GetEA(modrm, 0);
4742 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
4744 CYCLES(1); // TODO: correct cycle count
4747 void I386_OPS_BASE::SSEOP(pmuludq_r64_rm64)() // Opcode 0f f4
4750 UINT8 modrm = FETCH();
4751 if( modrm >= 0xc0 ) {
4752 MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)MMX(modrm & 0x7).d[0];
4755 UINT32 ea = GetEA(modrm, 0);
4757 MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)s.d[0];
4759 CYCLES(1); // TODO: correct cycle count
4762 void I386_OPS_BASE::SSEOP(pmuludq_r128_rm128)() // Opcode 66 0f f4
4764 UINT8 modrm = FETCH();
4765 if( modrm >= 0xc0 ) {
4766 XMM((modrm >> 3) & 0x7).q[0] = (UINT64)XMM((modrm >> 3) & 0x7).d[0] * (UINT64)XMM(modrm & 0x7).d[0];
4767 XMM((modrm >> 3) & 0x7).q[1] = (UINT64)XMM((modrm >> 3) & 0x7).d[2] * (UINT64)XMM(modrm & 0x7).d[2];
4770 UINT32 ea = GetEA(modrm, 0);
4772 XMM((modrm >> 3) & 0x7).q[0] = (UINT64)XMM((modrm >> 3) & 0x7).d[0] * (UINT64)s.d[0];
4773 XMM((modrm >> 3) & 0x7).q[1] = (UINT64)XMM((modrm >> 3) & 0x7).d[2] * (UINT64)s.d[2];
4775 CYCLES(1); // TODO: correct cycle count
4778 void I386_OPS_BASE::SSEOP(psadbw_r64_rm64)() // Opcode 0f f6
4783 UINT8 modrm = FETCH();
4784 if( modrm >= 0xc0 ) {
4787 temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)MMX(modrm & 0x7).b[n]);
4788 MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff;
4791 UINT32 ea = GetEA(modrm, 0);
4795 temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]);
4796 MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff;
4798 CYCLES(1); // TODO: correct cycle count
4801 void I386_OPS_BASE::SSEOP(psubq_r64_rm64)() // Opcode 0f fb
4804 UINT8 modrm = FETCH();
4805 if( modrm >= 0xc0 ) {
4806 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - MMX(modrm & 7).q;
4809 UINT32 ea = GetEA(modrm, 0);
4811 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - s.q;
4813 CYCLES(1); // TODO: correct cycle count
4816 void I386_OPS_BASE::SSEOP(psubq_r128_rm128)() // Opcode 66 0f fb
4818 UINT8 modrm = FETCH();
4819 if( modrm >= 0xc0 ) {
4820 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] - XMM(modrm & 7).q[0];
4821 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] - XMM(modrm & 7).q[1];
4824 UINT32 ea = GetEA(modrm, 0);
4826 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] - s.q[0];
4827 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] - s.q[1];
4829 CYCLES(1); // TODO: correct cycle count
4832 void I386_OPS_BASE::SSEOP(pshufd_r128_rm128_i8)() // Opcode 66 0f 70
4834 UINT8 modrm = FETCH();
4835 if( modrm >= 0xc0 ) {
4838 UINT8 imm8 = FETCH();
4840 d=(modrm >> 3) & 0x7;
4843 XMM(d).d[0]=t.d[imm8 & 3];
4844 XMM(d).d[1]=t.d[(imm8 >> 2) & 3];
4845 XMM(d).d[2]=t.d[(imm8 >> 4) & 3];
4846 XMM(d).d[3]=t.d[(imm8 >> 6) & 3];
4849 int d=(modrm >> 3) & 0x7;
4850 UINT32 ea = GetEA(modrm, 0);
4851 UINT8 imm8 = FETCH();
4853 XMM(d).d[0]=s.d[(imm8 & 3)];
4854 XMM(d).d[1]=s.d[((imm8 >> 2) & 3)];
4855 XMM(d).d[2]=s.d[((imm8 >> 4) & 3)];
4856 XMM(d).d[3]=s.d[((imm8 >> 6) & 3)];
4858 CYCLES(1); // TODO: correct cycle count
4861 void I386_OPS_BASE::SSEOP(pshuflw_r128_rm128_i8)() // Opcode f2 0f 70
4863 UINT8 modrm = FETCH();
4864 if( modrm >= 0xc0 ) {
4867 UINT8 imm8 = FETCH();
4869 d=(modrm >> 3) & 0x7;
4871 XMM(d).q[1]=XMM(s).q[1];
4872 XMM(d).w[0]=t.w[imm8 & 3];
4873 XMM(d).w[1]=t.w[(imm8 >> 2) & 3];
4874 XMM(d).w[2]=t.w[(imm8 >> 4) & 3];
4875 XMM(d).w[3]=t.w[(imm8 >> 6) & 3];
4878 int d=(modrm >> 3) & 0x7;
4879 UINT32 ea = GetEA(modrm, 0);
4880 UINT8 imm8 = FETCH();
4883 XMM(d).w[0]=s.w[imm8 & 3];
4884 XMM(d).w[1]=s.w[(imm8 >> 2) & 3];
4885 XMM(d).w[2]=s.w[(imm8 >> 4) & 3];
4886 XMM(d).w[3]=s.w[(imm8 >> 6) & 3];
4888 CYCLES(1); // TODO: correct cycle count
4891 void I386_OPS_BASE::SSEOP(pshufhw_r128_rm128_i8)() // Opcode f3 0f 70
4893 UINT8 modrm = FETCH();
4894 if( modrm >= 0xc0 ) {
4897 UINT8 imm8 = FETCH();
4899 d=(modrm >> 3) & 0x7;
4901 XMM(d).q[0]=XMM(s).q[0];
4902 XMM(d).w[4]=t.w[imm8 & 3];
4903 XMM(d).w[5]=t.w[(imm8 >> 2) & 3];
4904 XMM(d).w[6]=t.w[(imm8 >> 4) & 3];
4905 XMM(d).w[7]=t.w[(imm8 >> 6) & 3];
4908 int d=(modrm >> 3) & 0x7;
4909 UINT32 ea = GetEA(modrm, 0);
4910 UINT8 imm8 = FETCH();
4913 XMM(d).w[4]=s.w[4 + (imm8 & 3)];
4914 XMM(d).w[5]=s.w[4 + ((imm8 >> 2) & 3)];
4915 XMM(d).w[6]=s.w[4 + ((imm8 >> 4) & 3)];
4916 XMM(d).w[7]=s.w[4 + ((imm8 >> 6) & 3)];
4918 CYCLES(1); // TODO: correct cycle count
4921 void I386_OPS_BASE::SSEOP(packsswb_r128_rm128)() // Opcode 66 0f 63
4923 UINT8 modrm = FETCH();
4924 if (modrm >= 0xc0) {
4928 d = (modrm >> 3) & 0x7;
4929 t.q[0] = XMM(s).q[0];
4930 t.q[1] = XMM(s).q[1];
4931 for (int n = 0; n < 8; n++)
4932 XMM(d).c[n] = SaturatedSignedWordToSignedByte(XMM(d).s[n]);
4933 for (int n = 0; n < 8; n++)
4934 XMM(d).c[n+8] = SaturatedSignedWordToSignedByte(t.s[n]);
4938 int d = (modrm >> 3) & 0x7;
4939 UINT32 ea = GetEA(modrm, 0);
4941 for (int n = 0; n < 8; n++)
4942 XMM(d).c[n] = SaturatedSignedWordToSignedByte(XMM(d).s[n]);
4943 for (int n = 0; n < 8; n++)
4944 XMM(d).c[n + 8] = SaturatedSignedWordToSignedByte(s.s[n]);
4946 CYCLES(1); // TODO: correct cycle count
4949 void I386_OPS_BASE::SSEOP(packssdw_r128_rm128)() // Opcode 66 0f 6b
4951 UINT8 modrm = FETCH();
4952 if (modrm >= 0xc0) {
4956 d = (modrm >> 3) & 0x7;
4957 t.q[0] = XMM(s).q[0];
4958 t.q[1] = XMM(s).q[1];
4959 XMM(d).s[0] = SaturatedSignedDwordToSignedWord(XMM(d).i[0]);
4960 XMM(d).s[1] = SaturatedSignedDwordToSignedWord(XMM(d).i[1]);
4961 XMM(d).s[2] = SaturatedSignedDwordToSignedWord(XMM(d).i[2]);
4962 XMM(d).s[3] = SaturatedSignedDwordToSignedWord(XMM(d).i[3]);
4963 XMM(d).s[4] = SaturatedSignedDwordToSignedWord(t.i[0]);
4964 XMM(d).s[5] = SaturatedSignedDwordToSignedWord(t.i[1]);
4965 XMM(d).s[6] = SaturatedSignedDwordToSignedWord(t.i[2]);
4966 XMM(d).s[7] = SaturatedSignedDwordToSignedWord(t.i[3]);
4970 int d = (modrm >> 3) & 0x7;
4971 UINT32 ea = GetEA(modrm, 0);
4973 XMM(d).s[0] = SaturatedSignedDwordToSignedWord(XMM(d).i[0]);
4974 XMM(d).s[1] = SaturatedSignedDwordToSignedWord(XMM(d).i[1]);
4975 XMM(d).s[2] = SaturatedSignedDwordToSignedWord(XMM(d).i[2]);
4976 XMM(d).s[3] = SaturatedSignedDwordToSignedWord(XMM(d).i[3]);
4977 XMM(d).s[4] = SaturatedSignedDwordToSignedWord(s.i[0]);
4978 XMM(d).s[5] = SaturatedSignedDwordToSignedWord(s.i[1]);
4979 XMM(d).s[6] = SaturatedSignedDwordToSignedWord(s.i[2]);
4980 XMM(d).s[7] = SaturatedSignedDwordToSignedWord(s.i[3]);
4982 CYCLES(1); // TODO: correct cycle count
4985 void I386_OPS_BASE::SSEOP(pcmpgtb_r128_rm128)() // Opcode 66 0f 64
4987 UINT8 modrm = FETCH();
4988 if( modrm >= 0xc0 ) {
4991 d=(modrm >> 3) & 0x7;
4992 for (int c=0;c <= 15;c++)
4993 XMM(d).b[c]=(XMM(d).c[c] > XMM(s).c[c]) ? 0xff : 0;
4996 int d=(modrm >> 3) & 0x7;
4997 UINT32 ea = GetEA(modrm, 0);
4999 for (int c=0;c <= 15;c++)
5000 XMM(d).b[c]=(XMM(d).c[c] > s.c[c]) ? 0xff : 0;
5002 CYCLES(1); // TODO: correct cycle count
5005 void I386_OPS_BASE::SSEOP(pcmpgtw_r128_rm128)() // Opcode 66 0f 65
5007 UINT8 modrm = FETCH();
5008 if( modrm >= 0xc0 ) {
5011 d=(modrm >> 3) & 0x7;
5012 for (int c=0;c <= 7;c++)
5013 XMM(d).w[c]=(XMM(d).s[c] > XMM(s).s[c]) ? 0xffff : 0;
5016 int d=(modrm >> 3) & 0x7;
5017 UINT32 ea = GetEA(modrm, 0);
5019 for (int c=0;c <= 7;c++)
5020 XMM(d).w[c]=(XMM(d).s[c] > s.s[c]) ? 0xffff : 0;
5022 CYCLES(1); // TODO: correct cycle count
5025 void I386_OPS_BASE::SSEOP(pcmpgtd_r128_rm128)() // Opcode 66 0f 66
5027 UINT8 modrm = FETCH();
5028 if( modrm >= 0xc0 ) {
5031 d=(modrm >> 3) & 0x7;
5032 for (int c=0;c <= 3;c++)
5033 XMM(d).d[c]=(XMM(d).i[c] > XMM(s).i[c]) ? 0xffffffff : 0;
5036 int d=(modrm >> 3) & 0x7;
5037 UINT32 ea = GetEA(modrm, 0);
5039 for (int c=0;c <= 3;c++)
5040 XMM(d).d[c]=(XMM(d).i[c] > s.i[c]) ? 0xffffffff : 0;
5042 CYCLES(1); // TODO: correct cycle count
5045 void I386_OPS_BASE::SSEOP(packuswb_r128_rm128)() // Opcode 66 0f 67
5047 UINT8 modrm = FETCH();
5048 if( modrm >= 0xc0 ) {
5052 d=(modrm >> 3) & 0x7;
5053 t.q[0] = XMM(s).q[0];
5054 t.q[1] = XMM(s).q[1];
5055 for (int n = 0; n < 8;n++)
5056 XMM(d).b[n]=SaturatedSignedWordToUnsignedByte(XMM(d).s[n]);
5057 for (int n = 0; n < 8;n++)
5058 XMM(d).b[n+8]=SaturatedSignedWordToUnsignedByte(t.s[n]);
5061 int d=(modrm >> 3) & 0x7;
5062 UINT32 ea = GetEA(modrm, 0);
5064 for (int n = 0; n < 8;n++)
5065 XMM(d).b[n]=SaturatedSignedWordToUnsignedByte(XMM(d).s[n]);
5066 for (int n = 0; n < 8;n++)
5067 XMM(d).b[n+8]=SaturatedSignedWordToUnsignedByte(s.s[n]);
5069 CYCLES(1); // TODO: correct cycle count
5072 void I386_OPS_BASE::SSEOP(punpckhbw_r128_rm128)() // Opcode 66 0f 68
5074 UINT8 modrm = FETCH();
5075 if( modrm >= 0xc0 ) {
5079 d=(modrm >> 3) & 0x7;
5080 t.q[1] = XMM(s).q[1];
5081 for (int n = 0; n < 16; n += 2) {
5082 XMM(d).b[n]=XMM(d).b[8+(n >> 1)];
5083 XMM(d).b[n+1]=t.b[8+(n >> 1)];
5087 int d=(modrm >> 3) & 0x7;
5088 UINT32 ea = GetEA(modrm, 0);
5090 for (int n = 0; n < 16; n += 2) {
5091 XMM(d).b[n]=XMM(d).b[8+(n >> 1)];
5092 XMM(d).b[n+1]=s.b[8+(n >> 1)];
5095 CYCLES(1); // TODO: correct cycle count
5098 void I386_OPS_BASE::SSEOP(punpckhwd_r128_rm128)() // Opcode 66 0f 69
5100 UINT8 modrm = FETCH();
5101 if( modrm >= 0xc0 ) {
5105 d=(modrm >> 3) & 0x7;
5106 t.q[1] = XMM(s).q[1];
5107 for (int n = 0; n < 8; n += 2) {
5108 XMM(d).w[n]=XMM(d).w[4+(n >> 1)];
5109 XMM(d).w[n+1]=t.w[4+(n >> 1)];
5113 int d=(modrm >> 3) & 0x7;
5114 UINT32 ea = GetEA(modrm, 0);
5116 for (int n = 0; n < 8; n += 2) {
5117 XMM(d).w[n]=XMM(d).w[4+(n >> 1)];
5118 XMM(d).w[n+1]=s.w[4+(n >> 1)];
5121 CYCLES(1); // TODO: correct cycle count
5124 void I386_OPS_BASE::SSEOP(unpckhdq_r128_rm128)() // Opcode 66 0f 6a
5126 UINT8 modrm = FETCH();
5127 if( modrm >= 0xc0 ) {
5131 d=(modrm >> 3) & 0x7;
5132 t.q[1] = XMM(s).q[1];
5133 XMM(d).d[0]=XMM(d).d[2];
5135 XMM(d).d[2]=XMM(d).d[3];
5139 int d=(modrm >> 3) & 0x7;
5140 UINT32 ea = GetEA(modrm, 0);
5142 XMM(d).d[0]=XMM(d).d[2];
5144 XMM(d).d[2]=XMM(d).d[3];
5147 CYCLES(1); // TODO: correct cycle count
5150 void I386_OPS_BASE::SSEOP(punpckhqdq_r128_rm128)() // Opcode 66 0f 6d
5152 UINT8 modrm = FETCH();
5153 if( modrm >= 0xc0 ) {
5157 d=(modrm >> 3) & 0x7;
5158 t.q[1] = XMM(s).q[1];
5159 XMM(d).q[0]=XMM(d).q[1];
5163 int d=(modrm >> 3) & 0x7;
5164 UINT32 ea = GetEA(modrm, 0);
5166 XMM(d).q[0]=XMM(d).q[1];
5169 CYCLES(1); // TODO: correct cycle count
5172 void I386_OPS_BASE::SSEOP(pcmpeqb_r128_rm128)() // Opcode 66 0f 74
5174 UINT8 modrm = FETCH();
5175 if( modrm >= 0xc0 ) {
5178 d=(modrm >> 3) & 0x7;
5179 for (int c=0;c <= 15;c++)
5180 XMM(d).b[c]=(XMM(d).c[c] == XMM(s).c[c]) ? 0xff : 0;
5183 int d=(modrm >> 3) & 0x7;
5184 UINT32 ea = GetEA(modrm, 0);
5186 for (int c=0;c <= 15;c++)
5187 XMM(d).b[c]=(XMM(d).c[c] == s.c[c]) ? 0xff : 0;
5189 CYCLES(1); // TODO: correct cycle count
5192 void I386_OPS_BASE::SSEOP(pcmpeqw_r128_rm128)() // Opcode 66 0f 75
5194 UINT8 modrm = FETCH();
5195 if( modrm >= 0xc0 ) {
5198 d=(modrm >> 3) & 0x7;
5199 for (int c=0;c <= 7;c++)
5200 XMM(d).w[c]=(XMM(d).s[c] == XMM(s).s[c]) ? 0xffff : 0;
5203 int d=(modrm >> 3) & 0x7;
5204 UINT32 ea = GetEA(modrm, 0);
5206 for (int c=0;c <= 7;c++)
5207 XMM(d).w[c]=(XMM(d).s[c] == s.s[c]) ? 0xffff : 0;
5209 CYCLES(1); // TODO: correct cycle count
5212 void I386_OPS_BASE::SSEOP(pcmpeqd_r128_rm128)() // Opcode 66 0f 76
5214 UINT8 modrm = FETCH();
5215 if( modrm >= 0xc0 ) {
5218 d=(modrm >> 3) & 0x7;
5219 for (int c=0;c <= 3;c++)
5220 XMM(d).d[c]=(XMM(d).i[c] == XMM(s).i[c]) ? 0xffffffff : 0;
5223 int d=(modrm >> 3) & 0x7;
5224 UINT32 ea = GetEA(modrm, 0);
5226 for (int c=0;c <= 3;c++)
5227 XMM(d).d[c]=(XMM(d).i[c] == s.i[c]) ? 0xffffffff : 0;
5229 CYCLES(1); // TODO: correct cycle count
5232 void I386_OPS_BASE::SSEOP(paddq_r128_rm128)() // Opcode 66 0f d4
5234 UINT8 modrm = FETCH();
5235 if( modrm >= 0xc0 ) {
5238 d=(modrm >> 3) & 0x7;
5239 XMM(d).q[0]=XMM(d).q[0]+XMM(s).q[0];
5240 XMM(d).q[1]=XMM(d).q[1]+XMM(s).q[1];
5243 int d=(modrm >> 3) & 0x7;
5244 UINT32 ea = GetEA(modrm, 0);
5246 XMM(d).q[0]=XMM(d).q[0]+src.q[0];
5247 XMM(d).q[1]=XMM(d).q[1]+src.q[1];
5249 CYCLES(1); // TODO: correct cycle count
5252 void I386_OPS_BASE::SSEOP(pmullw_r128_rm128)() // Opcode 66 0f d5
5254 UINT8 modrm = FETCH();
5255 if( modrm >= 0xc0 ) {
5258 d=(modrm >> 3) & 0x7;
5259 for (int n = 0; n < 8;n++)
5260 XMM(d).w[n]=(UINT32)((INT32)XMM(d).s[n]*(INT32)XMM(s).s[n]) & 0xffff;
5264 UINT32 ea = GetEA(modrm, 0);
5266 d=(modrm >> 3) & 0x7;
5267 for (int n = 0; n < 8;n++)
5268 XMM(d).w[n]=(UINT32)((INT32)XMM(d).s[n]*(INT32)src.s[n]) & 0xffff;
5270 CYCLES(1); // TODO: correct cycle count
5273 void I386_OPS_BASE::SSEOP(paddb_r128_rm128)() // Opcode 66 0f fc
5275 UINT8 modrm = FETCH();
5276 if( modrm >= 0xc0 ) {
5277 for (int n=0;n < 16;n++)
5278 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] + XMM(modrm & 7).b[n];
5281 UINT32 ea = GetEA(modrm, 0);
5283 for (int n=0;n < 16;n++)
5284 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] + s.b[n];
5286 CYCLES(1); // TODO: correct cycle count
5289 void I386_OPS_BASE::SSEOP(paddw_r128_rm128)() // Opcode 66 0f fd
5291 UINT8 modrm = FETCH();
5292 if( modrm >= 0xc0 ) {
5293 for (int n=0;n < 8;n++)
5294 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] + XMM(modrm & 7).w[n];
5297 UINT32 ea = GetEA(modrm, 0);
5299 for (int n=0;n < 8;n++)
5300 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] + s.w[n];
5302 CYCLES(1); // TODO: correct cycle count
5305 void I386_OPS_BASE::SSEOP(paddd_r128_rm128)() // Opcode 66 0f fe
5307 UINT8 modrm = FETCH();
5308 if( modrm >= 0xc0 ) {
5309 for (int n=0;n < 4;n++)
5310 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] + XMM(modrm & 7).d[n];
5313 UINT32 ea = GetEA(modrm, 0);
5315 for (int n=0;n < 4;n++)
5316 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] + s.d[n];
5318 CYCLES(1); // TODO: correct cycle count
5321 void I386_OPS_BASE::SSEOP(psubusb_r128_rm128)() // Opcode 66 0f d8
5323 UINT8 modrm = FETCH();
5324 if( modrm >= 0xc0 ) {
5325 for (int n=0;n < 16;n++)
5326 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] < XMM(modrm & 7).b[n] ? 0 : XMM((modrm >> 3) & 0x7).b[n]-XMM(modrm & 7).b[n];
5329 UINT32 ea = GetEA(modrm, 0);
5331 for (int n=0;n < 16;n++)
5332 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : XMM((modrm >> 3) & 0x7).b[n]-src.b[n];
5334 CYCLES(1); // TODO: correct cycle count
5337 void I386_OPS_BASE::SSEOP(psubusw_r128_rm128)() // Opcode 66 0f d9
5339 UINT8 modrm = FETCH();
5340 if( modrm >= 0xc0 ) {
5341 for (int n=0;n < 8;n++)
5342 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] < XMM(modrm & 7).w[n] ? 0 : XMM((modrm >> 3) & 0x7).w[n]-XMM(modrm & 7).w[n];
5345 UINT32 ea = GetEA(modrm, 0);
5347 for (int n=0;n < 8;n++)
5348 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : XMM((modrm >> 3) & 0x7).w[n]-src.w[n];
5350 CYCLES(1); // TODO: correct cycle count
5353 void I386_OPS_BASE::SSEOP(pand_r128_rm128)() // Opcode 66 0f db
5355 UINT8 modrm = FETCH();
5356 if( modrm >= 0xc0 ) {
5357 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 7).q[0];
5358 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 7).q[1];
5361 UINT32 ea = GetEA(modrm, 0);
5363 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
5364 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
5366 CYCLES(1); // TODO: correct cycle count
5369 void I386_OPS_BASE::SSEOP(pandn_r128_rm128)() // Opcode 66 0f df
5371 UINT8 modrm = FETCH();
5372 if( modrm >= 0xc0 ) {
5373 XMM((modrm >> 3) & 0x7).q[0]=(~XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 7).q[0];
5374 XMM((modrm >> 3) & 0x7).q[1]=(~XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 7).q[1];
5377 UINT32 ea = GetEA(modrm, 0);
5379 XMM((modrm >> 3) & 0x7).q[0]=(~XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
5380 XMM((modrm >> 3) & 0x7).q[1]=(~XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
5382 CYCLES(1); // TODO: correct cycle count
5385 void I386_OPS_BASE::SSEOP(paddusb_r128_rm128)() // Opcode 66 0f dc
5387 UINT8 modrm = FETCH();
5388 if( modrm >= 0xc0 ) {
5389 for (int n=0;n < 16;n++)
5390 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] > (0xff-XMM(modrm & 7).b[n]) ? 0xff : XMM((modrm >> 3) & 0x7).b[n]+XMM(modrm & 7).b[n];
5393 UINT32 ea = GetEA(modrm, 0);
5395 for (int n=0;n < 16;n++)
5396 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : XMM((modrm >> 3) & 0x7).b[n]+src.b[n];
5398 CYCLES(1); // TODO: correct cycle count
5401 void I386_OPS_BASE::SSEOP(paddusw_r128_rm128)() // Opcode 66 0f dd
5403 UINT8 modrm = FETCH();
5404 if( modrm >= 0xc0 ) {
5405 for (int n=0;n < 8;n++)
5406 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] > (0xffff-XMM(modrm & 7).w[n]) ? 0xffff : XMM((modrm >> 3) & 0x7).w[n]+XMM(modrm & 7).w[n];
5409 UINT32 ea = GetEA(modrm, 0);
5411 for (int n=0;n < 8;n++)
5412 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : XMM((modrm >> 3) & 0x7).w[n]+src.w[n];
5414 CYCLES(1); // TODO: correct cycle count
5417 void I386_OPS_BASE::SSEOP(pmaxub_r128_rm128)() // Opcode 66 0f de
5419 UINT8 modrm = FETCH();
5420 if( modrm >= 0xc0 ) {
5421 for (int n=0;n < 16;n++)
5422 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] > XMM(modrm & 0x7).b[n] ? XMM((modrm >> 3) & 0x7).b[n] : XMM(modrm & 0x7).b[n];
5425 UINT32 ea = GetEA(modrm, 0);
5427 for (int n=0;n < 16;n++)
5428 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] > s.b[n] ? XMM((modrm >> 3) & 0x7).b[n] : s.b[n];
5430 CYCLES(1); // TODO: correct cycle count
5433 void I386_OPS_BASE::SSEOP(pmulhuw_r128_rm128)() // Opcode 66 0f e4
5435 UINT8 modrm = FETCH();
5436 if( modrm >= 0xc0 ) {
5437 for (int n=0;n < 8;n++)
5438 XMM((modrm >> 3) & 0x7).w[n]=((UINT32)XMM((modrm >> 3) & 0x7).w[n]*(UINT32)XMM(modrm & 7).w[n]) >> 16;
5441 UINT32 ea = GetEA(modrm, 0);
5443 for (int n=0;n < 8;n++)
5444 XMM((modrm >> 3) & 0x7).w[n]=((UINT32)XMM((modrm >> 3) & 0x7).w[n]*(UINT32)s.w[n]) >> 16;
5446 CYCLES(1); // TODO: correct cycle count
5449 void I386_OPS_BASE::SSEOP(pmulhw_r128_rm128)() // Opcode 66 0f e5
5451 UINT8 modrm = FETCH();
5452 if( modrm >= 0xc0 ) {
5453 for (int n=0;n < 8;n++)
5454 XMM((modrm >> 3) & 0x7).w[n]=(UINT32)((INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)XMM(modrm & 7).s[n]) >> 16;
5457 UINT32 ea = GetEA(modrm, 0);
5459 for (int n=0;n < 8;n++)
5460 XMM((modrm >> 3) & 0x7).w[n]=(UINT32)((INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)src.s[n]) >> 16;
5462 CYCLES(1); // TODO: correct cycle count
5465 void I386_OPS_BASE::SSEOP(psubsb_r128_rm128)() // Opcode 66 0f e8
5467 UINT8 modrm = FETCH();
5468 if( modrm >= 0xc0 ) {
5469 for (int n=0;n < 16;n++)
5470 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] - (INT16)XMM(modrm & 7).c[n]);
5473 UINT32 ea = GetEA(modrm, 0);
5475 for (int n=0;n < 16;n++)
5476 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] - (INT16)s.c[n]);
5478 CYCLES(1); // TODO: correct cycle count
5481 void I386_OPS_BASE::SSEOP(psubsw_r128_rm128)() // Opcode 66 0f e9
5483 UINT8 modrm = FETCH();
5484 if( modrm >= 0xc0 ) {
5485 for (int n=0;n < 8;n++)
5486 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] - (INT32)XMM(modrm & 7).s[n]);
5489 UINT32 ea = GetEA(modrm, 0);
5491 for (int n=0;n < 8;n++)
5492 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] - (INT32)s.s[n]);
5494 CYCLES(1); // TODO: correct cycle count
5497 void I386_OPS_BASE::SSEOP(pminsw_r128_rm128)() // Opcode 66 0f ea
5499 UINT8 modrm = FETCH();
5500 if( modrm >= 0xc0 ) {
5501 for (int n=0;n < 8;n++)
5502 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] < XMM(modrm & 0x7).s[n] ? XMM((modrm >> 3) & 0x7).s[n] : XMM(modrm & 0x7).s[n];
5505 UINT32 ea = GetEA(modrm, 0);
5507 for (int n=0;n < 8;n++)
5508 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] < s.s[n] ? XMM((modrm >> 3) & 0x7).s[n] : s.s[n];
5510 CYCLES(1); // TODO: correct cycle count
5513 void I386_OPS_BASE::SSEOP(pmaxsw_r128_rm128)() // Opcode 66 0f ee
5515 UINT8 modrm = FETCH();
5516 if( modrm >= 0xc0 ) {
5517 for (int n=0;n < 8;n++)
5518 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] > XMM(modrm & 0x7).s[n] ? XMM((modrm >> 3) & 0x7).s[n] : XMM(modrm & 0x7).s[n];
5521 UINT32 ea = GetEA(modrm, 0);
5523 for (int n=0;n < 8;n++)
5524 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] > s.s[n] ? XMM((modrm >> 3) & 0x7).s[n] : s.s[n];
5526 CYCLES(1); // TODO: correct cycle count
5529 void I386_OPS_BASE::SSEOP(paddsb_r128_rm128)() // Opcode 66 0f ec
5531 UINT8 modrm = FETCH();
5532 if( modrm >= 0xc0 ) {
5533 for (int n=0;n < 16;n++)
5534 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] + (INT16)XMM(modrm & 7).c[n]);
5537 UINT32 ea = GetEA(modrm, 0);
5539 for (int n=0;n < 16;n++)
5540 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] + (INT16)s.c[n]);
5542 CYCLES(1); // TODO: correct cycle count
5545 void I386_OPS_BASE::SSEOP(paddsw_r128_rm128)() // Opcode 66 0f ed
5547 UINT8 modrm = FETCH();
5548 if( modrm >= 0xc0 ) {
5549 for (int n=0;n < 8;n++)
5550 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] + (INT32)XMM(modrm & 7).s[n]);
5553 UINT32 ea = GetEA(modrm, 0);
5555 for (int n=0;n < 8;n++)
5556 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] + (INT32)s.s[n]);
5558 CYCLES(1); // TODO: correct cycle count
5561 void I386_OPS_BASE::SSEOP(por_r128_rm128)() // Opcode 66 0f eb
5563 UINT8 modrm = FETCH();
5564 if( modrm >= 0xc0 ) {
5565 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 7).q[0];
5566 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 7).q[1];
5569 UINT32 ea = GetEA(modrm, 0);
5571 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] | s.q[0];
5572 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] | s.q[1];
5574 CYCLES(1); // TODO: correct cycle count
5577 void I386_OPS_BASE::SSEOP(pxor_r128_rm128)() // Opcode 66 0f ef
5579 UINT8 modrm = FETCH();
5580 if( modrm >= 0xc0 ) {
5581 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] ^ XMM(modrm & 7).q[0];
5582 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] ^ XMM(modrm & 7).q[1];
5585 UINT32 ea = GetEA(modrm, 0);
5587 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] ^ s.q[0];
5588 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] ^ s.q[1];
5590 CYCLES(1); // TODO: correct cycle count
5593 void I386_OPS_BASE::SSEOP(pmaddwd_r128_rm128)() // Opcode 66 0f f5
5595 UINT8 modrm = FETCH();
5596 if( modrm >= 0xc0 ) {
5597 for (int n=0;n < 4;n++)
5598 XMM((modrm >> 3) & 0x7).i[n]=(INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)XMM(modrm & 7).s[n]+
5599 (INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)XMM(modrm & 7).s[n];
5602 UINT32 ea = GetEA(modrm, 0);
5604 for (int n=0;n < 4;n++)
5605 XMM((modrm >> 3) & 0x7).i[n]=(INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)s.s[n]+
5606 (INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)s.s[n];
5608 CYCLES(1); // TODO: correct cycle count
5611 void I386_OPS_BASE::SSEOP(psubb_r128_rm128)() // Opcode 66 0f f8
5613 UINT8 modrm = FETCH();
5614 if( modrm >= 0xc0 ) {
5615 for (int n=0;n < 16;n++)
5616 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] - XMM(modrm & 7).b[n];
5619 UINT32 ea = GetEA(modrm, 0);
5621 for (int n=0;n < 16;n++)
5622 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] - s.b[n];
5624 CYCLES(1); // TODO: correct cycle count
5627 void I386_OPS_BASE::SSEOP(psubw_r128_rm128)() // Opcode 66 0f f9
5629 UINT8 modrm = FETCH();
5630 if( modrm >= 0xc0 ) {
5631 for (int n=0;n < 8;n++)
5632 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] - XMM(modrm & 7).w[n];
5635 UINT32 ea = GetEA(modrm, 0);
5637 for (int n=0;n < 8;n++)
5638 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] - s.w[n];
5640 CYCLES(1); // TODO: correct cycle count
5643 void I386_OPS_BASE::SSEOP(psubd_r128_rm128)() // Opcode 66 0f fa
5645 UINT8 modrm = FETCH();
5646 if( modrm >= 0xc0 ) {
5647 for (int n=0;n < 4;n++)
5648 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] - XMM(modrm & 7).d[n];
5651 UINT32 ea = GetEA(modrm, 0);
5653 for (int n=0;n < 4;n++)
5654 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] - s.d[n];
5656 CYCLES(1); // TODO: correct cycle count
5659 void I386_OPS_BASE::SSEOP(psadbw_r128_rm128)() // Opcode 66 0f f6
5662 UINT8 modrm = FETCH();
5663 if( modrm >= 0xc0 ) {
5665 for (int n=0;n < 8;n++)
5666 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)XMM(modrm & 0x7).b[n]);
5667 XMM((modrm >> 3) & 0x7).l[0]=(UINT64)temp & 0xffff;
5669 for (int n=8;n < 16;n++)
5670 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)XMM(modrm & 0x7).b[n]);
5671 XMM((modrm >> 3) & 0x7).l[1]=(UINT64)temp & 0xffff;
5674 UINT32 ea = GetEA(modrm, 0);
5677 for (int n=0;n < 8;n++)
5678 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]);
5679 XMM((modrm >> 3) & 0x7).l[0]=(UINT64)temp & 0xffff;
5681 for (int n=8;n < 16;n++)
5682 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]);
5683 XMM((modrm >> 3) & 0x7).l[1]=(UINT64)temp & 0xffff;
5685 CYCLES(1); // TODO: correct cycle count
5688 void I386_OPS_BASE::SSEOP(pavgb_r128_rm128)() // Opcode 66 0f e0
5690 UINT8 modrm = FETCH();
5691 if( modrm >= 0xc0 ) {
5692 for (int n=0;n < 16;n++)
5693 XMM((modrm >> 3) & 0x7).b[n] = ((UINT16)XMM((modrm >> 3) & 0x7).b[n] + (UINT16)XMM(modrm & 0x7).b[n] + 1) >> 1;
5696 UINT32 ea = GetEA(modrm, 0);
5698 for (int n=0;n < 16;n++)
5699 XMM((modrm >> 3) & 0x7).b[n] = ((UINT16)XMM((modrm >> 3) & 0x7).b[n] + (UINT16)s.b[n] + 1) >> 1;
5701 CYCLES(1); // TODO: correct cycle count
5704 void I386_OPS_BASE::SSEOP(pavgw_r128_rm128)() // Opcode 66 0f e3
5706 UINT8 modrm = FETCH();
5707 if( modrm >= 0xc0 ) {
5708 for (int n=0;n < 8;n++)
5709 XMM((modrm >> 3) & 0x7).w[n] = ((UINT32)XMM((modrm >> 3) & 0x7).w[n] + (UINT32)XMM(modrm & 0x7).w[n] + 1) >> 1;
5712 UINT32 ea = GetEA(modrm, 0);
5714 for (int n=0;n < 8;n++)
5715 XMM((modrm >> 3) & 0x7).w[n] = ((UINT32)XMM((modrm >> 3) & 0x7).w[n] + (UINT32)s.w[n] + 1) >> 1;
5717 CYCLES(1); // TODO: correct cycle count
5720 void I386_OPS_BASE::SSEOP(psrlw_r128_rm128)() // Opcode 66 0f d1
5722 UINT8 modrm = FETCH();
5723 if( modrm >= 0xc0 ) {
5724 int count=(int)XMM(modrm & 7).q[0];
5725 for (int n=0; n < 8;n++)
5726 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] >> count;
5729 UINT32 ea = GetEA(modrm, 0);
5731 int count=(int)src.q[0];
5732 for (int n=0; n < 8;n++)
5733 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] >> count;
5735 CYCLES(1); // TODO: correct cycle count
5738 void I386_OPS_BASE::SSEOP(psrld_r128_rm128)() // Opcode 66 0f d2
5740 UINT8 modrm = FETCH();
5741 if( modrm >= 0xc0 ) {
5742 int count=(int)XMM(modrm & 7).q[0];
5743 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] >> count;
5744 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] >> count;
5745 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] >> count;
5746 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] >> count;
5749 UINT32 ea = GetEA(modrm, 0);
5751 int count=(int)src.q[0];
5752 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] >> count;
5753 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] >> count;
5754 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] >> count;
5755 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] >> count;
5757 CYCLES(1); // TODO: correct cycle count
5760 void I386_OPS_BASE::SSEOP(psrlq_r128_rm128)() // Opcode 66 0f d3
5762 UINT8 modrm = FETCH();
5763 if( modrm >= 0xc0 ) {
5764 int count=(int)XMM(modrm & 7).q[0];
5765 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] >> count;
5766 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] >> count;
5769 UINT32 ea = GetEA(modrm, 0);
5771 int count=(int)src.q[0];
5772 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] >> count;
5773 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] >> count;
5775 CYCLES(1); // TODO: correct cycle count
5778 void I386_OPS_BASE::SSEOP(psllw_r128_rm128)() // Opcode 66 0f f1
5780 UINT8 modrm = FETCH();
5781 if( modrm >= 0xc0 ) {
5782 int count=(int)XMM(modrm & 7).q[0];
5783 for (int n=0; n < 8;n++)
5784 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] << count;
5787 UINT32 ea = GetEA(modrm, 0);
5789 int count=(int)s.q[0];
5790 for (int n=0; n < 8;n++)
5791 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] << count;
5793 CYCLES(1); // TODO: correct cycle count
5796 void I386_OPS_BASE::SSEOP(pslld_r128_rm128)() // Opcode 66 0f f2
5798 UINT8 modrm = FETCH();
5799 if( modrm >= 0xc0 ) {
5800 int count=(int)XMM(modrm & 7).q[0];
5801 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] << count;
5802 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] << count;
5803 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] << count;
5804 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] << count;
5807 UINT32 ea = GetEA(modrm, 0);
5809 int count=(int)s.q[0];
5810 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] << count;
5811 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] << count;
5812 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] << count;
5813 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] << count;
5815 CYCLES(1); // TODO: correct cycle count
5818 void I386_OPS_BASE::SSEOP(psllq_r128_rm128)() // Opcode 66 0f f3
5820 UINT8 modrm = FETCH();
5821 if( modrm >= 0xc0 ) {
5822 int count=(int)XMM(modrm & 7).q[0];
5823 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] << count;
5824 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] << count;
5827 UINT32 ea = GetEA(modrm, 0);
5829 int count=(int)s.q[0];
5830 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] << count;
5831 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] << count;
5833 CYCLES(1); // TODO: correct cycle count
5836 void I386_OPS_BASE::SSEOP(psraw_r128_rm128)() // Opcode 66 0f e1
5838 UINT8 modrm = FETCH();
5839 if( modrm >= 0xc0 ) {
5840 int count=(int)XMM(modrm & 7).q[0];
5841 for (int n=0; n < 8;n++)
5842 XMM((modrm >> 3) & 0x7).s[n]=XMM((modrm >> 3) & 0x7).s[n] >> count;
5845 UINT32 ea = GetEA(modrm, 0);
5847 int count=(int)src.q[0];
5848 for (int n=0; n < 8;n++)
5849 XMM((modrm >> 3) & 0x7).s[n]=XMM((modrm >> 3) & 0x7).s[n] >> count;
5851 CYCLES(1); // TODO: correct cycle count
5854 void I386_OPS_BASE::SSEOP(psrad_r128_rm128)() // Opcode 66 0f e2
5856 UINT8 modrm = FETCH();
5857 if( modrm >= 0xc0 ) {
5858 int count=(int)XMM(modrm & 7).q[0];
5859 XMM((modrm >> 3) & 0x7).i[0]=XMM((modrm >> 3) & 0x7).i[0] >> count;
5860 XMM((modrm >> 3) & 0x7).i[1]=XMM((modrm >> 3) & 0x7).i[1] >> count;
5861 XMM((modrm >> 3) & 0x7).i[2]=XMM((modrm >> 3) & 0x7).i[2] >> count;
5862 XMM((modrm >> 3) & 0x7).i[3]=XMM((modrm >> 3) & 0x7).i[3] >> count;
5865 UINT32 ea = GetEA(modrm, 0);
5867 int count=(int)src.q[0];
5868 XMM((modrm >> 3) & 0x7).i[0]=XMM((modrm >> 3) & 0x7).i[0] >> count;
5869 XMM((modrm >> 3) & 0x7).i[1]=XMM((modrm >> 3) & 0x7).i[1] >> count;
5870 XMM((modrm >> 3) & 0x7).i[2]=XMM((modrm >> 3) & 0x7).i[2] >> count;
5871 XMM((modrm >> 3) & 0x7).i[3]=XMM((modrm >> 3) & 0x7).i[3] >> count;
5873 CYCLES(1); // TODO: correct cycle count
5876 void I386_OPS_BASE::SSEOP(movntdq_m128_r128)() // Opcode 66 0f e7
5878 UINT8 modrm = FETCH();
5879 if( modrm >= 0xc0 ) {
5880 CYCLES(1); // unsupported
5882 // since cache is not implemented
5883 UINT32 ea = GetEA(modrm, 0);
5884 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
5885 CYCLES(1); // TODO: correct cycle count
5889 void I386_OPS_BASE::SSEOP(cvttpd2dq_r128_rm128)() // Opcode 66 0f e6
5891 UINT8 modrm = FETCH();
5892 if( modrm >= 0xc0 ) {
5893 XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM((modrm >> 3) & 0x7).f64[0];
5894 XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM((modrm >> 3) & 0x7).f64[1];
5895 XMM((modrm >> 3) & 0x7).q[1] = 0;
5898 UINT32 ea = GetEA(modrm, 0);
5900 XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f64[0];
5901 XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f64[1];
5902 XMM((modrm >> 3) & 0x7).q[1] = 0;
5904 CYCLES(1); // TODO: correct cycle count
5907 void I386_OPS_BASE::SSEOP(movq_r128m64_r128)() // Opcode 66 0f d6
5909 UINT8 modrm = FETCH();
5910 if( modrm >= 0xc0 ) {
5911 XMM(modrm & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0];
5912 XMM(modrm & 0x7).q[1] = 0;
5914 UINT32 ea = GetEA(modrm, 0);
5915 WRITE64( ea, XMM((modrm >> 3) & 0x7).q[0]);
5917 CYCLES(1); // TODO: correct cycle count
5920 void I386_OPS_BASE::SSEOP(addsubpd_r128_rm128)() // Opcode 66 0f d0
5922 UINT8 modrm = FETCH();
5923 if( modrm >= 0xc0 ) {
5926 d=(modrm >> 3) & 0x7;
5927 XMM(d).f64[0]=XMM(d).f64[0]-XMM(s).f64[0];
5928 XMM(d).f64[1]=XMM(d).f64[1]+XMM(s).f64[1];
5932 UINT32 ea = GetEA(modrm, 0);
5933 d=(modrm >> 3) & 0x7;
5935 XMM(d).f64[0]=XMM(d).f64[0]-src.f64[0];
5936 XMM(d).f64[1]=XMM(d).f64[1]+src.f64[1];
5938 CYCLES(1); // TODO: correct cycle count
5941 void I386_OPS_BASE::SSEOP(haddpd_r128_rm128)() // Opcode 66 0f 7c
5943 UINT8 modrm = FETCH();
5944 if( modrm >= 0xc0 ) {
5947 d=(modrm >> 3) & 0x7;
5948 XMM(d).f64[0]=XMM(d).f64[0]+XMM(d).f64[1];
5949 XMM(d).f64[1]=XMM(s).f64[0]+XMM(s).f64[1];
5953 UINT32 ea = GetEA(modrm, 0);
5954 d=(modrm >> 3) & 0x7;
5956 XMM(d).f64[0]=XMM(d).f64[0]+XMM(d).f64[1];
5957 XMM(d).f64[1]=src.f64[0]+src.f64[1];
5959 CYCLES(1); // TODO: correct cycle count
5962 void I386_OPS_BASE::SSEOP(hsubpd_r128_rm128)() // Opcode 66 0f 7d
5964 UINT8 modrm = FETCH();
5965 if( modrm >= 0xc0 ) {
5968 d=(modrm >> 3) & 0x7;
5969 XMM(d).f64[0]=XMM(d).f64[0]-XMM(d).f64[1];
5970 XMM(d).f64[1]=XMM(s).f64[0]-XMM(s).f64[1];
5974 UINT32 ea = GetEA(modrm, 0);
5975 d=(modrm >> 3) & 0x7;
5977 XMM(d).f64[0]=XMM(d).f64[0]-XMM(d).f64[1];
5978 XMM(d).f64[1]=src.f64[0]-src.f64[1];
5980 CYCLES(1); // TODO: correct cycle count
5983 void I386_OPS_BASE::SSEOP(sqrtpd_r128_rm128)() // Opcode 66 0f 51
5985 UINT8 modrm = FETCH();
5986 if( modrm >= 0xc0 ) {
5989 d=(modrm >> 3) & 0x7;
5990 XMM(d).f64[0]=sqrt(XMM(s).f64[0]);
5991 XMM(d).f64[1]=sqrt(XMM(s).f64[1]);
5995 UINT32 ea = GetEA(modrm, 0);
5996 d=(modrm >> 3) & 0x7;
5998 XMM(d).f64[0]=sqrt(src.f64[0]);
5999 XMM(d).f64[1]=sqrt(src.f64[1]);
6001 CYCLES(1); // TODO: correct cycle count
6004 void I386_OPS_BASE::SSEOP(cvtpi2pd_r128_rm64)() // Opcode 66 0f 2a
6006 UINT8 modrm = FETCH();
6007 if( modrm >= 0xc0 ) {
6009 XMM((modrm >> 3) & 0x7).f64[0] = (double)MMX(modrm & 0x7).i[0];
6010 XMM((modrm >> 3) & 0x7).f64[1] = (double)MMX(modrm & 0x7).i[1];
6013 UINT32 ea = GetEA(modrm, 0);
6015 XMM((modrm >> 3) & 0x7).f64[0] = (double)r.i[0];
6016 XMM((modrm >> 3) & 0x7).f64[1] = (double)r.i[1];
6018 CYCLES(1); // TODO: correct cycle count
6021 void I386_OPS_BASE::SSEOP(cvttpd2pi_r64_rm128)() // Opcode 66 0f 2c
6023 UINT8 modrm = FETCH();
6025 if( modrm >= 0xc0 ) {
6026 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f64[0];
6027 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f64[1];
6030 UINT32 ea = GetEA(modrm, 0);
6032 MMX((modrm >> 3) & 0x7).i[0] = r.f64[0];
6033 MMX((modrm >> 3) & 0x7).i[1] = r.f64[1];
6035 CYCLES(1); // TODO: correct cycle count
6038 void I386_OPS_BASE::SSEOP(cvtpd2pi_r64_rm128)() // Opcode 66 0f 2d
6040 UINT8 modrm = FETCH();
6042 if( modrm >= 0xc0 ) {
6043 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f64[0];
6044 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f64[1];
6047 UINT32 ea = GetEA(modrm, 0);
6049 MMX((modrm >> 3) & 0x7).i[0] = r.f64[0];
6050 MMX((modrm >> 3) & 0x7).i[1] = r.f64[1];
6052 CYCLES(1); // TODO: correct cycle count
6055 void I386_OPS_BASE::SSEOP(cvtpd2ps_r128_rm128)() // Opcode 66 0f 5a
6057 UINT8 modrm = FETCH();
6058 if( modrm >= 0xc0 ) {
6059 XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).f64[0];
6060 XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).f64[1];
6061 XMM((modrm >> 3) & 0x7).q[1] = 0;
6064 UINT32 ea = GetEA(modrm, 0);
6066 XMM((modrm >> 3) & 0x7).f[0] = (float)r.f64[0];
6067 XMM((modrm >> 3) & 0x7).f[1] = (float)r.f64[1];
6068 XMM((modrm >> 3) & 0x7).q[1] = 0;
6070 CYCLES(1); // TODO: correct cycle count
6073 void I386_OPS_BASE::SSEOP(cvtps2dq_r128_rm128)() // Opcode 66 0f 5b
6075 UINT8 modrm = FETCH();
6076 if( modrm >= 0xc0 ) {
6077 XMM((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
6078 XMM((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
6079 XMM((modrm >> 3) & 0x7).i[2] = XMM(modrm & 0x7).f[2];
6080 XMM((modrm >> 3) & 0x7).i[3] = XMM(modrm & 0x7).f[3];
6083 UINT32 ea = GetEA(modrm, 0);
6085 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
6086 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
6087 XMM((modrm >> 3) & 0x7).i[2] = r.f[2];
6088 XMM((modrm >> 3) & 0x7).i[3] = r.f[3];
6090 CYCLES(1); // TODO: correct cycle count
6093 void I386_OPS_BASE::SSEOP(addpd_r128_rm128)() // Opcode 66 0f 58
6095 UINT8 modrm = FETCH();
6096 if( modrm >= 0xc0 ) {
6097 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + XMM(modrm & 0x7).f64[0];
6098 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] + XMM(modrm & 0x7).f64[1];
6101 UINT32 ea = GetEA(modrm, 0);
6103 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + src.f64[0];
6104 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] + src.f64[1];
6106 CYCLES(1); // TODO: correct cycle count
6109 void I386_OPS_BASE::SSEOP(mulpd_r128_rm128)() // Opcode 66 0f 59
6111 UINT8 modrm = FETCH();
6112 if( modrm >= 0xc0 ) {
6113 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * XMM(modrm & 0x7).f64[0];
6114 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] * XMM(modrm & 0x7).f64[1];
6117 UINT32 ea = GetEA(modrm, 0);
6119 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * src.f64[0];
6120 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] * src.f64[1];
6122 CYCLES(1); // TODO: correct cycle count
6125 void I386_OPS_BASE::SSEOP(subpd_r128_rm128)() // Opcode 66 0f 5c
6127 UINT8 modrm = FETCH();
6128 if( modrm >= 0xc0 ) {
6129 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - XMM(modrm & 0x7).f64[0];
6130 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] - XMM(modrm & 0x7).f64[1];
6133 UINT32 ea = GetEA(modrm, 0);
6135 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - src.f64[0];
6136 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] - src.f64[1];
6138 CYCLES(1); // TODO: correct cycle count
6141 void I386_OPS_BASE::SSEOP(minpd_r128_rm128)() // Opcode 66 0f 5d
6143 UINT8 modrm = FETCH();
6144 if( modrm >= 0xc0 ) {
6145 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6146 XMM((modrm >> 3) & 0x7).f64[1] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[1], XMM(modrm & 0x7).f64[1]);
6149 UINT32 ea = GetEA(modrm, 0);
6151 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6152 XMM((modrm >> 3) & 0x7).f64[1] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[1], src.f64[1]);
6154 CYCLES(1); // TODO: correct cycle count
6157 void I386_OPS_BASE::SSEOP(divpd_r128_rm128)() // Opcode 66 0f 5e
6159 UINT8 modrm = FETCH();
6160 if( modrm >= 0xc0 ) {
6161 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / XMM(modrm & 0x7).f64[0];
6162 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] / XMM(modrm & 0x7).f64[1];
6165 UINT32 ea = GetEA(modrm, 0);
6167 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / src.f64[0];
6168 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] / src.f64[1];
6170 CYCLES(1); // TODO: correct cycle count
6173 void I386_OPS_BASE::SSEOP(maxpd_r128_rm128)() // Opcode 66 0f 5f
6175 UINT8 modrm = FETCH();
6176 if( modrm >= 0xc0 ) {
6177 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6178 XMM((modrm >> 3) & 0x7).f64[1] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[1], XMM(modrm & 0x7).f64[1]);
6181 UINT32 ea = GetEA(modrm, 0);
6183 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6184 XMM((modrm >> 3) & 0x7).f64[1] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[1], src.f64[1]);
6186 CYCLES(1); // TODO: correct cycle count
6189 void I386_OPS_BASE::SSEOP(movntpd_m128_r128)() // Opcode 66 0f 2b
6191 UINT8 modrm = FETCH();
6192 if( modrm >= 0xc0 ) {
6193 // unsupported by cpu
6194 CYCLES(1); // TODO: correct cycle count
6196 // since cache is not implemented
6197 UINT32 ea = GetEA(modrm, 0);
6198 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
6199 CYCLES(1); // TODO: correct cycle count
6203 void I386_OPS_BASE::SSEOP(movapd_r128_rm128)() // Opcode 66 0f 28
6205 UINT8 modrm = FETCH();
6206 if( modrm >= 0xc0 ) {
6207 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
6209 UINT32 ea = GetEA(modrm, 0);
6210 READXMM( ea, XMM((modrm >> 3) & 0x7));
6212 CYCLES(1); // TODO: correct cycle count
6215 void I386_OPS_BASE::SSEOP(movapd_rm128_r128)() // Opcode 66 0f 29
6217 UINT8 modrm = FETCH();
6218 if( modrm >= 0xc0 ) {
6219 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
6221 UINT32 ea = GetEA(modrm, 0);
6222 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
6224 CYCLES(1); // TODO: correct cycle count
6227 void I386_OPS_BASE::SSEOP(movsd_r128_r128m64)() // Opcode f2 0f 10
6229 UINT8 modrm = FETCH();
6230 if( modrm >= 0xc0 ) {
6231 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
6233 UINT32 ea = GetEA(modrm, 0);
6234 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
6235 XMM((modrm >> 3) & 0x7).q[1] = 0;
6237 CYCLES(1); // TODO: correct cycle count
6240 void I386_OPS_BASE::SSEOP(movsd_r128m64_r128)() // Opcode f2 0f 11
6242 UINT8 modrm = FETCH();
6243 if( modrm >= 0xc0 ) {
6244 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
6246 UINT32 ea = GetEA(modrm, 0);
6247 WRITEXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
6249 CYCLES(1); // TODO: correct cycle count
6252 void I386_OPS_BASE::SSEOP(movddup_r128_r128m64)() // Opcode f2 0f 12
6254 UINT8 modrm = FETCH();
6255 if( modrm >= 0xc0 ) {
6256 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
6257 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[0];
6259 UINT32 ea = GetEA(modrm, 0);
6260 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
6261 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[0];
6263 CYCLES(1); // TODO: correct cycle count
6266 void I386_OPS_BASE::SSEOP(cvtsi2sd_r128_rm32)() // Opcode f2 0f 2a
6268 UINT8 modrm = FETCH();
6269 if( modrm >= 0xc0 ) {
6270 XMM((modrm >> 3) & 0x7).f64[0] = (INT32)LOAD_RM32(modrm);
6272 UINT32 ea = GetEA(modrm, 0);
6273 XMM((modrm >> 3) & 0x7).f64[0] = (INT32)READ32(ea);
6275 CYCLES(1); // TODO: correct cycle count
6278 void I386_OPS_BASE::SSEOP(cvttsd2si_r32_r128m64)() // Opcode f2 0f 2c
6281 UINT8 modrm = FETCH();
6282 if( modrm >= 0xc0 ) {
6283 src = (INT32)XMM(modrm & 0x7).f64[0];
6284 } else { // otherwise is a memory address
6286 UINT32 ea = GetEA(modrm, 0);
6287 READXMM_LO64( ea, t);
6288 src = (INT32)t.f64[0];
6290 STORE_REG32(modrm, (UINT32)src);
6291 CYCLES(1); // TODO: correct cycle count
6294 void I386_OPS_BASE::SSEOP(cvtsd2si_r32_r128m64)() // Opcode f2 0f 2d
6297 UINT8 modrm = FETCH();
6298 if( modrm >= 0xc0 ) {
6299 src = (INT32)XMM(modrm & 0x7).f64[0];
6300 } else { // otherwise is a memory address
6302 UINT32 ea = GetEA(modrm, 0);
6303 READXMM_LO64( ea, t);
6304 src = (INT32)t.f64[0];
6306 STORE_REG32(modrm, (UINT32)src);
6307 CYCLES(1); // TODO: correct cycle count
6310 void I386_OPS_BASE::SSEOP(sqrtsd_r128_r128m64)() // Opcode f2 0f 51
6312 UINT8 modrm = FETCH();
6313 if( modrm >= 0xc0 ) {
6316 d=(modrm >> 3) & 0x7;
6317 XMM(d).f64[0]=sqrt(XMM(s).f64[0]);
6321 UINT32 ea = GetEA(modrm, 0);
6322 d=(modrm >> 3) & 0x7;
6324 XMM(d).f64[0]=sqrt(src.f64[0]);
6326 CYCLES(1); // TODO: correct cycle count
6329 void I386_OPS_BASE::SSEOP(addsd_r128_r128m64)() // Opcode f2 0f 58
6331 UINT8 modrm = FETCH();
6332 if( modrm >= 0xc0 ) {
6333 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + XMM(modrm & 0x7).f64[0];
6336 UINT32 ea = GetEA(modrm, 0);
6338 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + src.f64[0];
6340 CYCLES(1); // TODO: correct cycle count
6343 void I386_OPS_BASE::SSEOP(mulsd_r128_r128m64)() // Opcode f2 0f 59
6345 UINT8 modrm = FETCH();
6346 if( modrm >= 0xc0 ) {
6347 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * XMM(modrm & 0x7).f64[0];
6350 UINT32 ea = GetEA(modrm, 0);
6352 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * src.f64[0];
6354 CYCLES(1); // TODO: correct cycle count
6357 void I386_OPS_BASE::SSEOP(cvtsd2ss_r128_r128m64)() // Opcode f2 0f 5a
6359 UINT8 modrm = FETCH();
6360 if( modrm >= 0xc0 ) {
6361 XMM((modrm >> 3) & 0x7).f[0] = XMM(modrm & 0x7).f64[0];
6364 UINT32 ea = GetEA(modrm, 0);
6365 READXMM_LO64( ea, s);
6366 XMM((modrm >> 3) & 0x7).f[0] = s.f64[0];
6368 CYCLES(1); // TODO: correct cycle count
6371 void I386_OPS_BASE::SSEOP(subsd_r128_r128m64)() // Opcode f2 0f 5c
6373 UINT8 modrm = FETCH();
6374 if( modrm >= 0xc0 ) {
6375 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - XMM(modrm & 0x7).f64[0];
6378 UINT32 ea = GetEA(modrm, 0);
6380 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - src.f64[0];
6382 CYCLES(1); // TODO: correct cycle count
6385 void I386_OPS_BASE::SSEOP(minsd_r128_r128m64)() // Opcode f2 0f 5d
6387 UINT8 modrm = FETCH();
6388 if( modrm >= 0xc0 ) {
6389 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6392 UINT32 ea = GetEA(modrm, 0);
6394 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6396 CYCLES(1); // TODO: correct cycle count
6399 void I386_OPS_BASE::SSEOP(divsd_r128_r128m64)() // Opcode f2 0f 5e
6401 UINT8 modrm = FETCH();
6402 if( modrm >= 0xc0 ) {
6403 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / XMM(modrm & 0x7).f64[0];
6406 UINT32 ea = GetEA(modrm, 0);
6408 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / src.f64[0];
6410 CYCLES(1); // TODO: correct cycle count
6413 void I386_OPS_BASE::SSEOP(maxsd_r128_r128m64)() // Opcode f2 0f 5f
6415 UINT8 modrm = FETCH();
6416 if( modrm >= 0xc0 ) {
6417 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6420 UINT32 ea = GetEA(modrm, 0);
6422 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6424 CYCLES(1); // TODO: correct cycle count
6427 void I386_OPS_BASE::SSEOP(haddps_r128_rm128)() // Opcode f2 0f 7c
6429 UINT8 modrm = FETCH();
6430 if( modrm >= 0xc0 ) {
6432 float f1, f2, f3, f4;
6434 d=(modrm >> 3) & 0x7;
6435 f1=XMM(d).f[0]+XMM(d).f[1];
6436 f2=XMM(d).f[2]+XMM(d).f[3];
6437 f3=XMM(s).f[0]+XMM(s).f[1];
6438 f4=XMM(s).f[2]+XMM(s).f[3];
6447 UINT32 ea = GetEA(modrm, 0);
6448 d=(modrm >> 3) & 0x7;
6450 f1=XMM(d).f[0]+XMM(d).f[1];
6451 f2=XMM(d).f[2]+XMM(d).f[3];
6454 XMM(d).f[2]=src.f[0]+src.f[1];
6455 XMM(d).f[3]=src.f[2]+src.f[3];
6457 CYCLES(1); // TODO: correct cycle count
6460 void I386_OPS_BASE::SSEOP(hsubps_r128_rm128)() // Opcode f2 0f 7d
6462 UINT8 modrm = FETCH();
6463 if( modrm >= 0xc0 ) {
6465 float f1, f2, f3, f4;
6467 d=(modrm >> 3) & 0x7;
6468 f1=XMM(d).f[0]-XMM(d).f[1];
6469 f2=XMM(d).f[2]-XMM(d).f[3];
6470 f3=XMM(s).f[0]-XMM(s).f[1];
6471 f4=XMM(s).f[2]-XMM(s).f[3];
6480 UINT32 ea = GetEA(modrm, 0);
6481 d=(modrm >> 3) & 0x7;
6483 f1=XMM(d).f[0]-XMM(d).f[1];
6484 f2=XMM(d).f[2]-XMM(d).f[3];
6487 XMM(d).f[2]=src.f[0]-src.f[1];
6488 XMM(d).f[3]=src.f[2]-src.f[3];
6490 CYCLES(1); // TODO: correct cycle count
6493 void I386_OPS_BASE::SSEOP(cmpsd_r128_r128m64_i8)() // Opcode f2 0f c2
6495 UINT8 modrm = FETCH();
6496 if( modrm >= 0xc0 ) {
6498 UINT8 imm8 = FETCH();
6500 d=(modrm >> 3) & 0x7;
6501 sse_predicate_compare_double_scalar(imm8, XMM(d), XMM(s));
6505 UINT32 ea = GetEA(modrm, 0);
6506 UINT8 imm8 = FETCH();
6507 READXMM_LO64( ea, s);
6508 d=(modrm >> 3) & 0x7;
6509 sse_predicate_compare_double_scalar(imm8, XMM(d), s);
6511 CYCLES(1); // TODO: correct cycle count
6514 void I386_OPS_BASE::SSEOP(addsubps_r128_rm128)() // Opcode f2 0f d0
6516 UINT8 modrm = FETCH();
6517 if( modrm >= 0xc0 ) {
6518 XMM((modrm >> 3) & 0x7).f[0]=XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
6519 XMM((modrm >> 3) & 0x7).f[1]=XMM((modrm >> 3) & 0x7).f[1] + XMM(modrm & 0x7).f[1];
6520 XMM((modrm >> 3) & 0x7).f[2]=XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2];
6521 XMM((modrm >> 3) & 0x7).f[3]=XMM((modrm >> 3) & 0x7).f[3] + XMM(modrm & 0x7).f[3];
6524 UINT32 ea = GetEA(modrm, 0);
6526 XMM((modrm >> 3) & 0x7).f[0]=XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
6527 XMM((modrm >> 3) & 0x7).f[1]=XMM((modrm >> 3) & 0x7).f[1] + src.f[1];
6528 XMM((modrm >> 3) & 0x7).f[2]=XMM((modrm >> 3) & 0x7).f[2] - src.f[2];
6529 XMM((modrm >> 3) & 0x7).f[3]=XMM((modrm >> 3) & 0x7).f[3] + src.f[3];
6531 CYCLES(1); // TODO: correct cycle count
6534 void I386_OPS_BASE::SSEOP(movdq2q_r64_r128)() // Opcode f2 0f d6
6536 UINT8 modrm = FETCH();
6538 if( modrm >= 0xc0 ) {
6539 MMX((modrm >> 3) & 0x7).q = XMM(modrm & 0x7).q[0];
6540 CYCLES(1); // TODO: correct cycle count
6542 // unsupported by cpu
6543 CYCLES(1); // TODO: correct cycle count
6547 void I386_OPS_BASE::SSEOP(cvtpd2dq_r128_rm128)() // Opcode f2 0f e6
6549 UINT8 modrm = FETCH();
6550 if( modrm >= 0xc0 ) {
6551 XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM((modrm >> 3) & 0x7).f64[0];
6552 XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM((modrm >> 3) & 0x7).f64[1];
6553 XMM((modrm >> 3) & 0x7).q[1] = 0;
6556 UINT32 ea = GetEA(modrm, 0);
6558 XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f64[0];
6559 XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f64[1];
6560 XMM((modrm >> 3) & 0x7).q[1] = 0;
6562 CYCLES(1); // TODO: correct cycle count
6565 void I386_OPS_BASE::SSEOP(lddqu_r128_m128)() // Opcode f2 0f f0
6567 UINT8 modrm = FETCH();
6568 if( modrm >= 0xc0 ) {
6569 // unsupported by cpu
6570 CYCLES(1); // TODO: correct cycle count
6572 UINT32 ea = GetEA(modrm, 0);
6573 READXMM(ea, XMM((modrm >> 3) & 0x7));