1 // license:BSD-3-Clause
2 // copyright-holders:Ville Linde, Barry Rodewald, Carl, Philip Bennett
3 // Pentium+ specific opcodes
4 #include "./i386_opdef.h"
6 #define FAULT(fault,error) {cpustate->ext = 1; i386_trap_with_error(fault,0,0,error); return;}
7 #define FAULT_EXP(fault,error) {cpustate->ext = 1; i386_trap_with_error(fault,0,trap_level+1,error); return;}
9 extern flag float32_is_nan( float32 a ); // since its not defined in softfloat.h
10 extern flag float64_is_nan( float64 a ); // since its not defined in softfloat.h
12 void I386_OPS_BASE::PENTIUMOP(rdmsr)() // Opcode 0x0f 32
17 data = MSR_READ(REG32(ECX),&valid_msr);
19 if(cpustate->CPL != 0 || valid_msr == 0) // if current privilege level isn't 0 or the register isn't recognized ...
20 FAULT(FAULT_GP,0) // ... throw a general exception fault
23 REG32(EDX) = data >> 32;
24 REG32(EAX) = data & 0xffffffff;
30 void I386_OPS_BASE::PENTIUMOP(wrmsr)() // Opcode 0x0f 30
35 data = (UINT64)REG32(EAX);
36 data |= (UINT64)(REG32(EDX)) << 32;
38 MSR_WRITE(REG32(ECX),data,&valid_msr);
40 if(cpustate->CPL != 0 || valid_msr == 0) // if current privilege level isn't 0 or the register isn't recognized
41 FAULT(FAULT_GP,0) // ... throw a general exception fault
43 CYCLES(1); // TODO: correct cycle count (~30-45)
46 void I386_OPS_BASE::PENTIUMOP(rdtsc)() // Opcode 0x0f 31
48 UINT64 ts = cpustate->tsc + (cpustate->base_cycles - cpustate->cycles);
49 REG32(EAX) = (UINT32)(ts);
50 REG32(EDX) = (UINT32)(ts >> 32);
55 void I386_OPS_BASE::PENTIUMOP(ud2)() // Opcode 0x0f 0b
60 void I386_OPS_BASE::PENTIUMOP(rsm)()
62 UINT32 smram_state = cpustate->smbase + 0xfe00;
65 logerror("i386: Invalid RSM outside SMM at %08X\n", cpustate->pc - 1);
70 // load state, no sanity checks anywhere
71 cpustate->smbase = READ32(smram_state+SMRAM_SMBASE);
72 cpustate->cr[4] = READ32(smram_state+SMRAM_IP5_CR4);
73 cpustate->sreg[ES].limit = READ32(smram_state+SMRAM_IP5_ESLIM);
74 cpustate->sreg[ES].base = READ32(smram_state+SMRAM_IP5_ESBASE);
75 cpustate->sreg[ES].flags = READ32(smram_state+SMRAM_IP5_ESACC);
76 cpustate->sreg[CS].limit = READ32(smram_state+SMRAM_IP5_CSLIM);
77 cpustate->sreg[CS].base = READ32(smram_state+SMRAM_IP5_CSBASE);
78 cpustate->sreg[CS].flags = READ32(smram_state+SMRAM_IP5_CSACC);
79 cpustate->sreg[SS].limit = READ32(smram_state+SMRAM_IP5_SSLIM);
80 cpustate->sreg[SS].base = READ32(smram_state+SMRAM_IP5_SSBASE);
81 cpustate->sreg[SS].flags = READ32(smram_state+SMRAM_IP5_SSACC);
82 cpustate->sreg[DS].limit = READ32(smram_state+SMRAM_IP5_DSLIM);
83 cpustate->sreg[DS].base = READ32(smram_state+SMRAM_IP5_DSBASE);
84 cpustate->sreg[DS].flags = READ32(smram_state+SMRAM_IP5_DSACC);
85 cpustate->sreg[FS].limit = READ32(smram_state+SMRAM_IP5_FSLIM);
86 cpustate->sreg[FS].base = READ32(smram_state+SMRAM_IP5_FSBASE);
87 cpustate->sreg[FS].flags = READ32(smram_state+SMRAM_IP5_FSACC);
88 cpustate->sreg[GS].limit = READ32(smram_state+SMRAM_IP5_GSLIM);
89 cpustate->sreg[GS].base = READ32(smram_state+SMRAM_IP5_GSBASE);
90 cpustate->sreg[GS].flags = READ32(smram_state+SMRAM_IP5_GSACC);
91 cpustate->ldtr.flags = READ32(smram_state+SMRAM_IP5_LDTACC);
92 cpustate->ldtr.limit = READ32(smram_state+SMRAM_IP5_LDTLIM);
93 cpustate->ldtr.base = READ32(smram_state+SMRAM_IP5_LDTBASE);
94 cpustate->gdtr.limit = READ32(smram_state+SMRAM_IP5_GDTLIM);
95 cpustate->gdtr.base = READ32(smram_state+SMRAM_IP5_GDTBASE);
96 cpustate->idtr.limit = READ32(smram_state+SMRAM_IP5_IDTLIM);
97 cpustate->idtr.base = READ32(smram_state+SMRAM_IP5_IDTBASE);
98 cpustate->task.limit = READ32(smram_state+SMRAM_IP5_TRLIM);
99 cpustate->task.base = READ32(smram_state+SMRAM_IP5_TRBASE);
100 cpustate->task.flags = READ32(smram_state+SMRAM_IP5_TRACC);
102 cpustate->sreg[ES].selector = READ32(smram_state+SMRAM_ES);
103 cpustate->sreg[CS].selector = READ32(smram_state+SMRAM_CS);
104 cpustate->sreg[SS].selector = READ32(smram_state+SMRAM_SS);
105 cpustate->sreg[DS].selector = READ32(smram_state+SMRAM_DS);
106 cpustate->sreg[FS].selector = READ32(smram_state+SMRAM_FS);
107 cpustate->sreg[GS].selector = READ32(smram_state+SMRAM_GS);
108 cpustate->ldtr.segment = READ32(smram_state+SMRAM_LDTR);
109 cpustate->task.segment = READ32(smram_state+SMRAM_TR);
111 cpustate->dr[7] = READ32(smram_state+SMRAM_DR7);
112 cpustate->dr[6] = READ32(smram_state+SMRAM_DR6);
113 REG32(EAX) = READ32(smram_state+SMRAM_EAX);
114 REG32(ECX) = READ32(smram_state+SMRAM_ECX);
115 REG32(EDX) = READ32(smram_state+SMRAM_EDX);
116 REG32(EBX) = READ32(smram_state+SMRAM_EBX);
117 REG32(ESP) = READ32(smram_state+SMRAM_ESP);
118 REG32(EBP) = READ32(smram_state+SMRAM_EBP);
119 REG32(ESI) = READ32(smram_state+SMRAM_ESI);
120 REG32(EDI) = READ32(smram_state+SMRAM_EDI);
121 cpustate->eip = READ32(smram_state+SMRAM_EIP);
122 cpustate->eflags = READ32(smram_state+SMRAM_EFLAGS);
123 cpustate->cr[3] = READ32(smram_state+SMRAM_CR3);
124 cpustate->cr[0] = READ32(smram_state+SMRAM_CR0);
126 cpustate->CPL = (cpustate->sreg[SS].flags >> 13) & 3; // cpl == dpl of ss
128 for(int i = 0; i <= GS; i++)
130 if(PROTECTED_MODE && !V8086_MODE)
132 cpustate->sreg[i].valid = cpustate->sreg[i].selector ? true : false;
133 cpustate->sreg[i].d = (cpustate->sreg[i].flags & 0x4000) ? 1 : 0;
136 cpustate->sreg[i].valid = true;
139 // if(!cpustate->smiact.isnull())
140 // cpustate->smiact(false);
141 cpustate->smm = false;
143 CHANGE_PC(cpustate->eip);
144 cpustate->nmi_masked = false;
145 if(cpustate->smi_latched)
150 if(cpustate->nmi_latched)
152 cpustate->nmi_latched = false;
157 void I386_OPS_BASE::PENTIUMOP(prefetch_m8)() // Opcode 0x0f 18
159 UINT8 modrm = FETCH();
160 UINT32 ea = GetEA(modrm, 0, 1);
161 CYCLES(1+(ea & 1)); // TODO: correct cycle count
164 void I386_OPS_BASE::PENTIUMOP(cmovo_r16_rm16)() // Opcode 0x0f 40
167 UINT8 modrm = FETCH();
171 if (cpustate->OF == 1)
173 src = LOAD_RM16(modrm);
174 STORE_REG16(modrm, src);
176 CYCLES(1); // TODO: correct cycle count
180 UINT32 ea = GetEA(modrm, 0, 2);
181 if (cpustate->OF == 1)
184 STORE_REG16(modrm, src);
186 CYCLES(1); // TODO: correct cycle count
190 void I386_OPS_BASE::PENTIUMOP(cmovo_r32_rm32)() // Opcode 0x0f 40
193 UINT8 modrm = FETCH();
197 if (cpustate->OF == 1)
199 src = LOAD_RM32(modrm);
200 STORE_REG32(modrm, src);
202 CYCLES(1); // TODO: correct cycle count
206 UINT32 ea = GetEA(modrm, 0, 4);
207 if (cpustate->OF == 1)
210 STORE_REG32(modrm, src);
212 CYCLES(1); // TODO: correct cycle count
216 void I386_OPS_BASE::PENTIUMOP(cmovno_r16_rm16)() // Opcode 0x0f 41
219 UINT8 modrm = FETCH();
223 if (cpustate->OF == 0)
225 src = LOAD_RM16(modrm);
226 STORE_REG16(modrm, src);
228 CYCLES(1); // TODO: correct cycle count
232 UINT32 ea = GetEA(modrm, 0, 2);
233 if (cpustate->OF == 0)
236 STORE_REG16(modrm, src);
238 CYCLES(1); // TODO: correct cycle count
242 void I386_OPS_BASE::PENTIUMOP(cmovno_r32_rm32)() // Opcode 0x0f 41
245 UINT8 modrm = FETCH();
249 if (cpustate->OF == 0)
251 src = LOAD_RM32(modrm);
252 STORE_REG32(modrm, src);
254 CYCLES(1); // TODO: correct cycle count
258 UINT32 ea = GetEA(modrm, 0, 4);
259 if (cpustate->OF == 0)
262 STORE_REG32(modrm, src);
264 CYCLES(1); // TODO: correct cycle count
268 void I386_OPS_BASE::PENTIUMOP(cmovb_r16_rm16)() // Opcode 0x0f 42
271 UINT8 modrm = FETCH();
275 if (cpustate->CF == 1)
277 src = LOAD_RM16(modrm);
278 STORE_REG16(modrm, src);
280 CYCLES(1); // TODO: correct cycle count
284 UINT32 ea = GetEA(modrm, 0, 2);
285 if (cpustate->CF == 1)
288 STORE_REG16(modrm, src);
290 CYCLES(1); // TODO: correct cycle count
294 void I386_OPS_BASE::PENTIUMOP(cmovb_r32_rm32)() // Opcode 0x0f 42
297 UINT8 modrm = FETCH();
301 if (cpustate->CF == 1)
303 src = LOAD_RM32(modrm);
304 STORE_REG32(modrm, src);
306 CYCLES(1); // TODO: correct cycle count
310 UINT32 ea = GetEA(modrm, 0, 4);
311 if (cpustate->CF == 1)
314 STORE_REG32(modrm, src);
316 CYCLES(1); // TODO: correct cycle count
320 void I386_OPS_BASE::PENTIUMOP(cmovae_r16_rm16)() // Opcode 0x0f 43
323 UINT8 modrm = FETCH();
327 if (cpustate->CF == 0)
329 src = LOAD_RM16(modrm);
330 STORE_REG16(modrm, src);
332 CYCLES(1); // TODO: correct cycle count
336 UINT32 ea = GetEA(modrm, 0, 2);
337 if (cpustate->CF == 0)
340 STORE_REG16(modrm, src);
342 CYCLES(1); // TODO: correct cycle count
346 void I386_OPS_BASE::PENTIUMOP(cmovae_r32_rm32)() // Opcode 0x0f 43
349 UINT8 modrm = FETCH();
353 if (cpustate->CF == 0)
355 src = LOAD_RM32(modrm);
356 STORE_REG32(modrm, src);
358 CYCLES(1); // TODO: correct cycle count
362 UINT32 ea = GetEA(modrm, 0, 4);
363 if (cpustate->CF == 0)
366 STORE_REG32(modrm, src);
368 CYCLES(1); // TODO: correct cycle count
372 void I386_OPS_BASE::PENTIUMOP(cmove_r16_rm16)() // Opcode 0x0f 44
375 UINT8 modrm = FETCH();
379 if (cpustate->ZF == 1)
381 src = LOAD_RM16(modrm);
382 STORE_REG16(modrm, src);
384 CYCLES(1); // TODO: correct cycle count
388 UINT32 ea = GetEA(modrm, 0, 2);
389 if (cpustate->ZF == 1)
392 STORE_REG16(modrm, src);
394 CYCLES(1); // TODO: correct cycle count
398 void I386_OPS_BASE::PENTIUMOP(cmove_r32_rm32)() // Opcode 0x0f 44
401 UINT8 modrm = FETCH();
405 if (cpustate->ZF == 1)
407 src = LOAD_RM32(modrm);
408 STORE_REG32(modrm, src);
410 CYCLES(1); // TODO: correct cycle count
414 UINT32 ea = GetEA(modrm, 0, 4);
415 if (cpustate->ZF == 1)
418 STORE_REG32(modrm, src);
420 CYCLES(1); // TODO: correct cycle count
424 void I386_OPS_BASE::PENTIUMOP(cmovne_r16_rm16)() // Opcode 0x0f 45
427 UINT8 modrm = FETCH();
431 if (cpustate->ZF == 0)
433 src = LOAD_RM16(modrm);
434 STORE_REG16(modrm, src);
436 CYCLES(1); // TODO: correct cycle count
440 UINT32 ea = GetEA(modrm, 0, 2);
441 if (cpustate->ZF == 0)
444 STORE_REG16(modrm, src);
446 CYCLES(1); // TODO: correct cycle count
450 void I386_OPS_BASE::PENTIUMOP(cmovne_r32_rm32)() // Opcode 0x0f 45
453 UINT8 modrm = FETCH();
457 if (cpustate->ZF == 0)
459 src = LOAD_RM32(modrm);
460 STORE_REG32(modrm, src);
462 CYCLES(1); // TODO: correct cycle count
466 UINT32 ea = GetEA(modrm, 0, 4);
467 if (cpustate->ZF == 0)
470 STORE_REG32(modrm, src);
472 CYCLES(1); // TODO: correct cycle count
476 void I386_OPS_BASE::PENTIUMOP(cmovbe_r16_rm16)() // Opcode 0x0f 46
479 UINT8 modrm = FETCH();
483 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
485 src = LOAD_RM16(modrm);
486 STORE_REG16(modrm, src);
488 CYCLES(1); // TODO: correct cycle count
492 UINT32 ea = GetEA(modrm, 0, 2);
493 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
496 STORE_REG16(modrm, src);
498 CYCLES(1); // TODO: correct cycle count
502 void I386_OPS_BASE::PENTIUMOP(cmovbe_r32_rm32)() // Opcode 0x0f 46
505 UINT8 modrm = FETCH();
509 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
511 src = LOAD_RM32(modrm);
512 STORE_REG32(modrm, src);
514 CYCLES(1); // TODO: correct cycle count
518 UINT32 ea = GetEA(modrm, 0, 4);
519 if ((cpustate->CF == 1) || (cpustate->ZF == 1))
522 STORE_REG32(modrm, src);
524 CYCLES(1); // TODO: correct cycle count
528 void I386_OPS_BASE::PENTIUMOP(cmova_r16_rm16)() // Opcode 0x0f 47
531 UINT8 modrm = FETCH();
535 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
537 src = LOAD_RM16(modrm);
538 STORE_REG16(modrm, src);
540 CYCLES(1); // TODO: correct cycle count
544 UINT32 ea = GetEA(modrm, 0, 2);
545 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
548 STORE_REG16(modrm, src);
550 CYCLES(1); // TODO: correct cycle count
554 void I386_OPS_BASE::PENTIUMOP(cmova_r32_rm32)() // Opcode 0x0f 47
557 UINT8 modrm = FETCH();
561 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
563 src = LOAD_RM32(modrm);
564 STORE_REG32(modrm, src);
566 CYCLES(1); // TODO: correct cycle count
570 UINT32 ea = GetEA(modrm, 0, 4);
571 if ((cpustate->CF == 0) && (cpustate->ZF == 0))
574 STORE_REG32(modrm, src);
576 CYCLES(1); // TODO: correct cycle count
580 void I386_OPS_BASE::PENTIUMOP(cmovs_r16_rm16)() // Opcode 0x0f 48
583 UINT8 modrm = FETCH();
587 if (cpustate->SF == 1)
589 src = LOAD_RM16(modrm);
590 STORE_REG16(modrm, src);
592 CYCLES(1); // TODO: correct cycle count
596 UINT32 ea = GetEA(modrm, 0, 2);
597 if (cpustate->SF == 1)
600 STORE_REG16(modrm, src);
602 CYCLES(1); // TODO: correct cycle count
606 void I386_OPS_BASE::PENTIUMOP(cmovs_r32_rm32)() // Opcode 0x0f 48
609 UINT8 modrm = FETCH();
613 if (cpustate->SF == 1)
615 src = LOAD_RM32(modrm);
616 STORE_REG32(modrm, src);
618 CYCLES(1); // TODO: correct cycle count
622 UINT32 ea = GetEA(modrm, 0, 4);
623 if (cpustate->SF == 1)
626 STORE_REG32(modrm, src);
628 CYCLES(1); // TODO: correct cycle count
632 void I386_OPS_BASE::PENTIUMOP(cmovns_r16_rm16)() // Opcode 0x0f 49
635 UINT8 modrm = FETCH();
639 if (cpustate->SF == 0)
641 src = LOAD_RM16(modrm);
642 STORE_REG16(modrm, src);
644 CYCLES(1); // TODO: correct cycle count
648 UINT32 ea = GetEA(modrm, 0, 2);
649 if (cpustate->SF == 0)
652 STORE_REG16(modrm, src);
654 CYCLES(1); // TODO: correct cycle count
658 void I386_OPS_BASE::PENTIUMOP(cmovns_r32_rm32)() // Opcode 0x0f 49
661 UINT8 modrm = FETCH();
665 if (cpustate->SF == 0)
667 src = LOAD_RM32(modrm);
668 STORE_REG32(modrm, src);
670 CYCLES(1); // TODO: correct cycle count
674 UINT32 ea = GetEA(modrm, 0, 4);
675 if (cpustate->SF == 0)
678 STORE_REG32(modrm, src);
680 CYCLES(1); // TODO: correct cycle count
684 void I386_OPS_BASE::PENTIUMOP(cmovp_r16_rm16)() // Opcode 0x0f 4a
687 UINT8 modrm = FETCH();
691 if (cpustate->PF == 1)
693 src = LOAD_RM16(modrm);
694 STORE_REG16(modrm, src);
696 CYCLES(1); // TODO: correct cycle count
700 UINT32 ea = GetEA(modrm, 0, 2);
701 if (cpustate->PF == 1)
704 STORE_REG16(modrm, src);
706 CYCLES(1); // TODO: correct cycle count
710 void I386_OPS_BASE::PENTIUMOP(cmovp_r32_rm32)() // Opcode 0x0f 4a
713 UINT8 modrm = FETCH();
717 if (cpustate->PF == 1)
719 src = LOAD_RM32(modrm);
720 STORE_REG32(modrm, src);
722 CYCLES(1); // TODO: correct cycle count
726 UINT32 ea = GetEA(modrm, 0, 4);
727 if (cpustate->PF == 1)
730 STORE_REG32(modrm, src);
732 CYCLES(1); // TODO: correct cycle count
736 void I386_OPS_BASE::PENTIUMOP(cmovnp_r16_rm16)() // Opcode 0x0f 4b
739 UINT8 modrm = FETCH();
743 if (cpustate->PF == 0)
745 src = LOAD_RM16(modrm);
746 STORE_REG16(modrm, src);
748 CYCLES(1); // TODO: correct cycle count
752 UINT32 ea = GetEA(modrm, 0, 2);
753 if (cpustate->PF == 0)
756 STORE_REG16(modrm, src);
758 CYCLES(1); // TODO: correct cycle count
762 void I386_OPS_BASE::PENTIUMOP(cmovnp_r32_rm32)() // Opcode 0x0f 4b
765 UINT8 modrm = FETCH();
769 if (cpustate->PF == 0)
771 src = LOAD_RM32(modrm);
772 STORE_REG32(modrm, src);
774 CYCLES(1); // TODO: correct cycle count
778 UINT32 ea = GetEA(modrm, 0, 4);
779 if (cpustate->PF == 0)
782 STORE_REG32(modrm, src);
784 CYCLES(1); // TODO: correct cycle count
788 void I386_OPS_BASE::PENTIUMOP(cmovl_r16_rm16)() // Opcode 0x0f 4c
791 UINT8 modrm = FETCH();
795 if (cpustate->SF != cpustate->OF)
797 src = LOAD_RM16(modrm);
798 STORE_REG16(modrm, src);
800 CYCLES(1); // TODO: correct cycle count
804 UINT32 ea = GetEA(modrm, 0, 2);
805 if (cpustate->SF != cpustate->OF)
808 STORE_REG16(modrm, src);
810 CYCLES(1); // TODO: correct cycle count
814 void I386_OPS_BASE::PENTIUMOP(cmovl_r32_rm32)() // Opcode 0x0f 4c
817 UINT8 modrm = FETCH();
821 if (cpustate->SF != cpustate->OF)
823 src = LOAD_RM32(modrm);
824 STORE_REG32(modrm, src);
826 CYCLES(1); // TODO: correct cycle count
830 UINT32 ea = GetEA(modrm, 0, 4);
831 if (cpustate->SF != cpustate->OF)
834 STORE_REG32(modrm, src);
836 CYCLES(1); // TODO: correct cycle count
840 void I386_OPS_BASE::PENTIUMOP(cmovge_r16_rm16)() // Opcode 0x0f 4d
843 UINT8 modrm = FETCH();
847 if (cpustate->SF == cpustate->OF)
849 src = LOAD_RM16(modrm);
850 STORE_REG16(modrm, src);
852 CYCLES(1); // TODO: correct cycle count
856 UINT32 ea = GetEA(modrm, 0, 2);
857 if (cpustate->SF == cpustate->OF)
860 STORE_REG16(modrm, src);
862 CYCLES(1); // TODO: correct cycle count
866 void I386_OPS_BASE::PENTIUMOP(cmovge_r32_rm32)() // Opcode 0x0f 4d
869 UINT8 modrm = FETCH();
873 if (cpustate->SF == cpustate->OF)
875 src = LOAD_RM32(modrm);
876 STORE_REG32(modrm, src);
878 CYCLES(1); // TODO: correct cycle count
882 UINT32 ea = GetEA(modrm, 0, 4);
883 if (cpustate->SF == cpustate->OF)
886 STORE_REG32(modrm, src);
888 CYCLES(1); // TODO: correct cycle count
892 void I386_OPS_BASE::PENTIUMOP(cmovle_r16_rm16)() // Opcode 0x0f 4e
895 UINT8 modrm = FETCH();
899 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
901 src = LOAD_RM16(modrm);
902 STORE_REG16(modrm, src);
904 CYCLES(1); // TODO: correct cycle count
908 UINT32 ea = GetEA(modrm, 0, 2);
909 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
912 STORE_REG16(modrm, src);
914 CYCLES(1); // TODO: correct cycle count
918 void I386_OPS_BASE::PENTIUMOP(cmovle_r32_rm32)() // Opcode 0x0f 4e
921 UINT8 modrm = FETCH();
925 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
927 src = LOAD_RM32(modrm);
928 STORE_REG32(modrm, src);
930 CYCLES(1); // TODO: correct cycle count
934 UINT32 ea = GetEA(modrm, 0, 4);
935 if ((cpustate->ZF == 1) || (cpustate->SF != cpustate->OF))
938 STORE_REG32(modrm, src);
940 CYCLES(1); // TODO: correct cycle count
944 void I386_OPS_BASE::PENTIUMOP(cmovg_r16_rm16)() // Opcode 0x0f 4f
947 UINT8 modrm = FETCH();
951 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
953 src = LOAD_RM16(modrm);
954 STORE_REG16(modrm, src);
956 CYCLES(1); // TODO: correct cycle count
960 UINT32 ea = GetEA(modrm, 0, 2);
961 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
964 STORE_REG16(modrm, src);
966 CYCLES(1); // TODO: correct cycle count
970 void I386_OPS_BASE::PENTIUMOP(cmovg_r32_rm32)() // Opcode 0x0f 4f
973 UINT8 modrm = FETCH();
977 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
979 src = LOAD_RM32(modrm);
980 STORE_REG32(modrm, src);
982 CYCLES(1); // TODO: correct cycle count
986 UINT32 ea = GetEA(modrm, 0, 4);
987 if ((cpustate->ZF == 0) && (cpustate->SF == cpustate->OF))
990 STORE_REG32(modrm, src);
992 CYCLES(1); // TODO: correct cycle count
996 void I386_OPS_BASE::PENTIUMOP(movnti_m16_r16)() // Opcode 0f c3
998 UINT8 modrm = FETCH();
999 if( modrm >= 0xc0 ) {
1000 // unsupported by cpu
1001 CYCLES(1); // TODO: correct cycle count
1003 // since cache is not implemented
1004 UINT32 ea = GetEA(modrm, 0, 2);
1005 WRITE16(ea,LOAD_RM16(modrm));
1006 CYCLES(1); // TODO: correct cycle count
1010 void I386_OPS_BASE::PENTIUMOP(movnti_m32_r32)() // Opcode 0f c3
1012 UINT8 modrm = FETCH();
1013 if( modrm >= 0xc0 ) {
1014 // unsupported by cpu
1015 CYCLES(1); // TODO: correct cycle count
1017 // since cache is not implemented
1018 UINT32 ea = GetEA(modrm, 0, 4);
1019 WRITE32(ea,LOAD_RM32(modrm));
1020 CYCLES(1); // TODO: correct cycle count
1024 void I386_OPS_BASE::I386OP(cyrix_special)() // Opcode 0x0f 3a-3d
1027 0f 3a BB0_RESET (set BB0 pointer = base)
1028 0f 3b BB1_RESET (set BB1 pointer = base)
1029 0f 3c CPU_WRITE (write special CPU memory-mapped register, [ebx] = eax)
1030 0f 3d CPU_READ (read special CPU memory-mapped register, eax, = [ebx])
1036 void I386_OPS_BASE::I386OP(cyrix_unknown)() // Opcode 0x0f 74
1038 logerror("Unemulated 0x0f 0x74 opcode called\n");
1043 void I386_OPS_BASE::PENTIUMOP(cmpxchg8b_m64)() // Opcode 0x0f c7
1045 UINT8 modm = FETCH();
1046 if( modm >= 0xc0 ) {
1047 report_invalid_modrm( "cmpxchg8b_m64", modm);
1049 UINT32 ea = GetEA(modm, 0, 8);
1050 UINT64 value = READ64( ea);
1051 UINT64 edx_eax = (((UINT64) REG32(EDX)) << 32) | REG32(EAX);
1052 UINT64 ecx_ebx = (((UINT64) REG32(ECX)) << 32) | REG32(EBX);
1054 if( value == edx_eax ) {
1055 WRITE64( ea, ecx_ebx);
1057 CYCLES(CYCLES_CMPXCHG_REG_MEM_T);
1059 REG32(EDX) = (UINT32) (value >> 32);
1060 REG32(EAX) = (UINT32) (value >> 0);
1062 CYCLES(CYCLES_CMPXCHG_REG_MEM_F);
1067 void I386_OPS_BASE::PENTIUMOP(movntq_m64_r64)() // Opcode 0f e7
1069 //MMXPROLOG(); // TODO: check if needed
1070 UINT8 modrm = FETCH();
1071 if( modrm >= 0xc0 ) {
1072 CYCLES(1); // unsupported
1074 // since cache is not implemented
1075 UINT32 ea = GetEA(modrm, 0, 8);
1076 WRITEMMX( ea, MMX((modrm >> 3) & 0x7));
1077 CYCLES(1); // TODO: correct cycle count
1081 void I386_OPS_BASE::PENTIUMOP(maskmovq_r64_r64)() // Opcode 0f f7
1084 UINT8 modm = FETCH();
1085 UINT32 ea = GetEA(7, 0, 8); // ds:di/edi/rdi register
1089 for (n=0;n <= 7;n++)
1090 if (MMX(m).b[n] & 127)
1091 WRITE8(ea+n, MMX(s).b[n]);
1094 void I386_OPS_BASE::SSEOP(maskmovdqu_r128_r128)() // Opcode 66 0f f7
1097 UINT8 modm = FETCH();
1098 UINT32 ea = GetEA(7, 0, 16); // ds:di/edi/rdi register
1101 for (n=0;n < 16;n++)
1102 if (XMM(m).b[n] & 127)
1103 WRITE8(ea+n, XMM(s).b[n]);
1106 void I386_OPS_BASE::PENTIUMOP(popcnt_r16_rm16)() // Opcode f3 0f b8
1109 UINT8 modrm = FETCH();
1112 if( modrm >= 0xc0 ) {
1113 src = LOAD_RM16(modrm);
1115 UINT32 ea = GetEA(modrm, 0, 2);
1119 for (n=0;n < 16;n++) {
1120 count=count+(src & 1);
1123 STORE_REG16(modrm, count);
1124 CYCLES(1); // TODO: correct cycle count
1127 void I386_OPS_BASE::PENTIUMOP(popcnt_r32_rm32)() // Opcode f3 0f b8
1130 UINT8 modrm = FETCH();
1133 if( modrm >= 0xc0 ) {
1134 src = LOAD_RM32(modrm);
1136 UINT32 ea = GetEA(modrm, 0, 4);
1140 for (n=0;n < 32;n++) {
1141 count=count+(src & 1);
1144 STORE_REG32(modrm, count);
1145 CYCLES(1); // TODO: correct cycle count
1148 void I386_OPS_BASE::PENTIUMOP(tzcnt_r16_rm16)()
1150 // for CPUs that don't support TZCNT, fall back to BSF
1151 i386_bsf_r16_rm16();
1152 // TODO: actually implement TZCNT
1155 void I386_OPS_BASE::PENTIUMOP(tzcnt_r32_rm32)()
1157 // for CPUs that don't support TZCNT, fall back to BSF
1158 i386_bsf_r32_rm32();
1159 // TODO: actually implement TZCNT
1163 void I386_OPS_BASE::MMXOP(group_0f71)() // Opcode 0f 71
1165 UINT8 modm = FETCH();
1166 UINT8 imm8 = FETCH();
1168 if( modm >= 0xc0 ) {
1169 switch ( (modm & 0x38) >> 3 )
1172 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] >> imm8;
1173 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] >> imm8;
1174 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] >> imm8;
1175 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] >> imm8;
1178 MMX(modm & 7).s[0]=MMX(modm & 7).s[0] >> imm8;
1179 MMX(modm & 7).s[1]=MMX(modm & 7).s[1] >> imm8;
1180 MMX(modm & 7).s[2]=MMX(modm & 7).s[2] >> imm8;
1181 MMX(modm & 7).s[3]=MMX(modm & 7).s[3] >> imm8;
1184 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] << imm8;
1185 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] << imm8;
1186 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] << imm8;
1187 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] << imm8;
1190 report_invalid_modrm( "mmx_group0f71", modm);
1195 void I386_OPS_BASE::I386_OPS_BASE::SSEOP(group_660f71)() // Opcode 66 0f 71
1197 UINT8 modm = FETCH();
1198 UINT8 imm8 = FETCH();
1200 switch ((modm & 0x38) >> 3)
1203 for (int n = 0; n < 8;n++)
1204 XMM(modm & 7).w[n] = XMM(modm & 7).w[n] >> imm8;
1207 for (int n = 0; n < 8;n++)
1208 XMM(modm & 7).s[n] = XMM(modm & 7).s[n] >> imm8;
1211 for (int n = 0; n < 8;n++)
1212 XMM(modm & 7).w[n] = XMM(modm & 7).w[n] << imm8;
1215 report_invalid_modrm( "mmx_group660f71", modm);
1220 void I386_OPS_BASE::MMXOP(group_0f72)() // Opcode 0f 72
1222 UINT8 modm = FETCH();
1223 UINT8 imm8 = FETCH();
1225 if( modm >= 0xc0 ) {
1226 switch ( (modm & 0x38) >> 3 )
1229 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] >> imm8;
1230 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] >> imm8;
1233 MMX(modm & 7).i[0]=MMX(modm & 7).i[0] >> imm8;
1234 MMX(modm & 7).i[1]=MMX(modm & 7).i[1] >> imm8;
1237 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] << imm8;
1238 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] << imm8;
1241 report_invalid_modrm( "mmx_group0f72", modm);
1246 void I386_OPS_BASE::SSEOP(group_660f72)() // Opcode 66 0f 72
1248 UINT8 modm = FETCH();
1249 UINT8 imm8 = FETCH();
1251 switch ((modm & 0x38) >> 3)
1254 for (int n = 0; n < 4;n++)
1255 XMM(modm & 7).d[n] = XMM(modm & 7).d[n] >> imm8;
1258 for (int n = 0; n < 4;n++)
1259 XMM(modm & 7).i[n] = XMM(modm & 7).i[n] >> imm8;
1262 for (int n = 0; n < 4;n++)
1263 XMM(modm & 7).d[n] = XMM(modm & 7).d[n] << imm8;
1266 report_invalid_modrm( "mmx_group660f72", modm);
1271 void I386_OPS_BASE::MMXOP(group_0f73)() // Opcode 0f 73
1273 UINT8 modm = FETCH();
1274 UINT8 imm8 = FETCH();
1276 if( modm >= 0xc0 ) {
1277 switch ( (modm & 0x38) >> 3 )
1280 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q >> imm8;
1283 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q << imm8;
1286 report_invalid_modrm( "mmx_group0f73", modm);
1291 void I386_OPS_BASE::SSEOP(group_660f73)() // Opcode 66 0f 73
1294 UINT8 modm = FETCH();
1295 UINT8 imm8 = FETCH();
1297 switch ((modm & 0x38) >> 3)
1300 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] >> imm8;
1301 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] >> imm8;
1306 XMM(modm & 7).q[0] = 0;
1307 XMM(modm & 7).q[1] = 0;
1311 imm8 = (imm8 & 7) << 3;
1312 XMM(modm & 7).q[0] = XMM(modm & 7).q[1] >> imm8;
1313 XMM(modm & 7).q[1] = 0;
1317 t0 = XMM(modm & 7).q[0];
1319 XMM(modm & 7).q[0] = (XMM(modm & 7).q[1] << (64 - imm8)) | (t0 >> imm8);
1320 XMM(modm & 7).q[1] = t0 >> imm8;
1324 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] << imm8;
1325 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] << imm8;
1330 XMM(modm & 7).q[0] = 0;
1331 XMM(modm & 7).q[1] = 0;
1335 imm8 = (imm8 & 7) << 3;
1336 XMM(modm & 7).q[1] = XMM(modm & 7).q[0] << imm8;
1337 XMM(modm & 7).q[0] = 0;
1342 XMM(modm & 7).q[1] = (XMM(modm & 7).q[0] >> (64 - imm8)) | (XMM(modm & 7).q[1] << imm8);
1343 XMM(modm & 7).q[0] = XMM(modm & 7).q[0] << imm8;
1347 report_invalid_modrm( "sse_group660f73", modm);
1352 void I386_OPS_BASE::MMXOP(psrlw_r64_rm64)() // Opcode 0f d1
1355 UINT8 modrm = FETCH();
1356 if( modrm >= 0xc0 ) {
1357 int count=(int)MMX(modrm & 7).q;
1358 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1359 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1360 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1361 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1364 UINT32 ea = GetEA(modrm, 0, 8);
1366 int count=(int)src.q;
1367 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1368 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1369 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1370 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1372 CYCLES(1); // TODO: correct cycle count
1375 void I386_OPS_BASE::MMXOP(psrld_r64_rm64)() // Opcode 0f d2
1378 UINT8 modrm = FETCH();
1379 if( modrm >= 0xc0 ) {
1380 int count=(int)MMX(modrm & 7).q;
1381 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1382 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1385 UINT32 ea = GetEA(modrm, 0, 8);
1387 int count=(int)src.q;
1388 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1389 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1391 CYCLES(1); // TODO: correct cycle count
1394 void I386_OPS_BASE::MMXOP(psrlq_r64_rm64)() // Opcode 0f d3
1397 UINT8 modrm = FETCH();
1398 if( modrm >= 0xc0 ) {
1399 int count=(int)MMX(modrm & 7).q;
1400 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1403 UINT32 ea = GetEA(modrm, 0, 8);
1405 int count=(int)src.q;
1406 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1408 CYCLES(1); // TODO: correct cycle count
1411 void I386_OPS_BASE::MMXOP(paddq_r64_rm64)() // Opcode 0f d4
1414 UINT8 modrm = FETCH();
1415 if( modrm >= 0xc0 ) {
1416 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+MMX(modrm & 7).q;
1419 UINT32 ea = GetEA(modrm, 0, 8);
1421 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+src.q;
1423 CYCLES(1); // TODO: correct cycle count
1426 void I386_OPS_BASE::MMXOP(pmullw_r64_rm64)() // Opcode 0f d5
1429 UINT8 modrm = FETCH();
1430 if( modrm >= 0xc0 ) {
1431 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) & 0xffff;
1432 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) & 0xffff;
1433 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) & 0xffff;
1434 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) & 0xffff;
1437 UINT32 ea = GetEA(modrm, 0, 8);
1439 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) & 0xffff;
1440 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) & 0xffff;
1441 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) & 0xffff;
1442 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) & 0xffff;
1444 CYCLES(1); // TODO: correct cycle count
1447 void I386_OPS_BASE::MMXOP(psubusb_r64_rm64)() // Opcode 0f d8
1451 UINT8 modrm = FETCH();
1452 if( modrm >= 0xc0 ) {
1454 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 7).b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-MMX(modrm & 7).b[n];
1457 UINT32 ea = GetEA(modrm, 0, 8);
1460 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-src.b[n];
1462 CYCLES(1); // TODO: correct cycle count
1465 void I386_OPS_BASE::MMXOP(psubusw_r64_rm64)() // Opcode 0f d9
1469 UINT8 modrm = FETCH();
1470 if( modrm >= 0xc0 ) {
1472 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < MMX(modrm & 7).w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-MMX(modrm & 7).w[n];
1475 UINT32 ea = GetEA(modrm, 0, 8);
1478 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-src.w[n];
1480 CYCLES(1); // TODO: correct cycle count
1483 void I386_OPS_BASE::MMXOP(pand_r64_rm64)() // Opcode 0f db
1486 UINT8 modrm = FETCH();
1487 if( modrm >= 0xc0 ) {
1488 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & MMX(modrm & 7).q;
1491 UINT32 ea = GetEA(modrm, 0, 8);
1493 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & src.q;
1495 CYCLES(1); // TODO: correct cycle count
1498 void I386_OPS_BASE::MMXOP(paddusb_r64_rm64)() // Opcode 0f dc
1502 UINT8 modrm = FETCH();
1503 if( modrm >= 0xc0 ) {
1505 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-MMX(modrm & 7).b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+MMX(modrm & 7).b[n];
1508 UINT32 ea = GetEA(modrm, 0, 8);
1511 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+src.b[n];
1513 CYCLES(1); // TODO: correct cycle count
1516 void I386_OPS_BASE::MMXOP(paddusw_r64_rm64)() // Opcode 0f dd
1520 UINT8 modrm = FETCH();
1521 if( modrm >= 0xc0 ) {
1523 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-MMX(modrm & 7).w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+MMX(modrm & 7).w[n];
1526 UINT32 ea = GetEA(modrm, 0, 8);
1529 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+src.w[n];
1531 CYCLES(1); // TODO: correct cycle count
1534 void I386_OPS_BASE::MMXOP(pandn_r64_rm64)() // Opcode 0f df
1537 UINT8 modrm = FETCH();
1538 if( modrm >= 0xc0 ) {
1539 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & MMX(modrm & 7).q;
1542 UINT32 ea = GetEA(modrm, 0, 8);
1544 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & src.q;
1546 CYCLES(1); // TODO: correct cycle count
1549 void I386_OPS_BASE::MMXOP(psraw_r64_rm64)() // Opcode 0f e1
1552 UINT8 modrm = FETCH();
1553 if( modrm >= 0xc0 ) {
1554 int count=(int)MMX(modrm & 7).q;
1555 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1556 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1557 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1558 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1561 UINT32 ea = GetEA(modrm, 0, 8);
1563 int count=(int)src.q;
1564 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1565 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1566 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1567 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1569 CYCLES(1); // TODO: correct cycle count
1572 void I386_OPS_BASE::MMXOP(psrad_r64_rm64)() // Opcode 0f e2
1575 UINT8 modrm = FETCH();
1576 if( modrm >= 0xc0 ) {
1577 int count=(int)MMX(modrm & 7).q;
1578 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1579 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1582 UINT32 ea = GetEA(modrm, 0, 8);
1584 int count=(int)src.q;
1585 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1586 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1588 CYCLES(1); // TODO: correct cycle count
1591 void I386_OPS_BASE::MMXOP(pmulhw_r64_rm64)() // Opcode 0f e5
1594 UINT8 modrm = FETCH();
1595 if( modrm >= 0xc0 ) {
1596 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]) >> 16;
1597 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1]) >> 16;
1598 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]) >> 16;
1599 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3]) >> 16;
1602 UINT32 ea = GetEA(modrm, 0, 8);
1604 MMX((modrm >> 3) & 0x7).w[0]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)src.s[0]) >> 16;
1605 MMX((modrm >> 3) & 0x7).w[1]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)src.s[1]) >> 16;
1606 MMX((modrm >> 3) & 0x7).w[2]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)src.s[2]) >> 16;
1607 MMX((modrm >> 3) & 0x7).w[3]=(UINT32)((INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)src.s[3]) >> 16;
1609 CYCLES(1); // TODO: correct cycle count
1612 void I386_OPS_BASE::MMXOP(psubsb_r64_rm64)() // Opcode 0f e8
1616 UINT8 modrm = FETCH();
1617 if( modrm >= 0xc0 ) {
1619 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)MMX(modrm & 7).c[n]);
1622 UINT32 ea = GetEA(modrm, 0, 8);
1625 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] - (INT16)s.c[n]);
1627 CYCLES(1); // TODO: correct cycle count
1630 void I386_OPS_BASE::MMXOP(psubsw_r64_rm64)() // Opcode 0f e9
1634 UINT8 modrm = FETCH();
1635 if( modrm >= 0xc0 ) {
1637 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)MMX(modrm & 7).s[n]);
1640 UINT32 ea = GetEA(modrm, 0, 8);
1643 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] - (INT32)s.s[n]);
1645 CYCLES(1); // TODO: correct cycle count
1648 void I386_OPS_BASE::MMXOP(por_r64_rm64)() // Opcode 0f eb
1651 UINT8 modrm = FETCH();
1652 if( modrm >= 0xc0 ) {
1653 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | MMX(modrm & 7).q;
1656 UINT32 ea = GetEA(modrm, 0, 8);
1658 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | s.q;
1660 CYCLES(1); // TODO: correct cycle count
1663 void I386_OPS_BASE::MMXOP(paddsb_r64_rm64)() // Opcode 0f ec
1667 UINT8 modrm = FETCH();
1668 if( modrm >= 0xc0 ) {
1670 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)MMX(modrm & 7).c[n]);
1673 UINT32 ea = GetEA(modrm, 0, 8);
1676 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)MMX((modrm >> 3) & 0x7).c[n] + (INT16)s.c[n]);
1678 CYCLES(1); // TODO: correct cycle count
1681 void I386_OPS_BASE::MMXOP(paddsw_r64_rm64)() // Opcode 0f ed
1685 UINT8 modrm = FETCH();
1686 if( modrm >= 0xc0 ) {
1688 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)MMX(modrm & 7).s[n]);
1691 UINT32 ea = GetEA(modrm, 0, 8);
1694 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)MMX((modrm >> 3) & 0x7).s[n] + (INT32)s.s[n]);
1696 CYCLES(1); // TODO: correct cycle count
1699 void I386_OPS_BASE::MMXOP(pxor_r64_rm64)() // Opcode 0f ef
1702 UINT8 modrm = FETCH();
1703 if( modrm >= 0xc0 ) {
1704 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ MMX(modrm & 7).q;
1707 UINT32 ea = GetEA(modrm, 0, 8);
1709 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ s.q;
1711 CYCLES(1); // TODO: correct cycle count
1714 void I386_OPS_BASE::MMXOP(psllw_r64_rm64)() // Opcode 0f f1
1717 UINT8 modrm = FETCH();
1718 if( modrm >= 0xc0 ) {
1719 int count=(int)MMX(modrm & 7).q;
1720 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1721 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1722 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1723 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1726 UINT32 ea = GetEA(modrm, 0, 8);
1729 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1730 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1731 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1732 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1734 CYCLES(1); // TODO: correct cycle count
1737 void I386_OPS_BASE::MMXOP(pslld_r64_rm64)() // Opcode 0f f2
1740 UINT8 modrm = FETCH();
1741 if( modrm >= 0xc0 ) {
1742 int count=(int)MMX(modrm & 7).q;
1743 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1744 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1747 UINT32 ea = GetEA(modrm, 0, 8);
1750 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1751 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1753 CYCLES(1); // TODO: correct cycle count
1756 void I386_OPS_BASE::MMXOP(psllq_r64_rm64)() // Opcode 0f f3
1759 UINT8 modrm = FETCH();
1760 if( modrm >= 0xc0 ) {
1761 int count=(int)MMX(modrm & 7).q;
1762 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1765 UINT32 ea = GetEA(modrm, 0, 8);
1768 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1770 CYCLES(1); // TODO: correct cycle count
1773 void I386_OPS_BASE::MMXOP(pmaddwd_r64_rm64)() // Opcode 0f f5
1776 UINT8 modrm = FETCH();
1777 if( modrm >= 0xc0 ) {
1778 MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)MMX(modrm & 7).s[0]+
1779 (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)MMX(modrm & 7).s[1];
1780 MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)MMX(modrm & 7).s[2]+
1781 (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)MMX(modrm & 7).s[3];
1784 UINT32 ea = GetEA(modrm, 0, 8);
1786 MMX((modrm >> 3) & 0x7).i[0]=(INT32)MMX((modrm >> 3) & 0x7).s[0]*(INT32)s.s[0]+
1787 (INT32)MMX((modrm >> 3) & 0x7).s[1]*(INT32)s.s[1];
1788 MMX((modrm >> 3) & 0x7).i[1]=(INT32)MMX((modrm >> 3) & 0x7).s[2]*(INT32)s.s[2]+
1789 (INT32)MMX((modrm >> 3) & 0x7).s[3]*(INT32)s.s[3];
1791 CYCLES(1); // TODO: correct cycle count
1794 void I386_OPS_BASE::MMXOP(psubb_r64_rm64)() // Opcode 0f f8
1798 UINT8 modrm = FETCH();
1799 if( modrm >= 0xc0 ) {
1801 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - MMX(modrm & 7).b[n];
1804 UINT32 ea = GetEA(modrm, 0, 8);
1807 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - s.b[n];
1809 CYCLES(1); // TODO: correct cycle count
1812 void I386_OPS_BASE::MMXOP(psubw_r64_rm64)() // Opcode 0f f9
1816 UINT8 modrm = FETCH();
1817 if( modrm >= 0xc0 ) {
1819 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - MMX(modrm & 7).w[n];
1822 UINT32 ea = GetEA(modrm, 0, 8);
1825 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - s.w[n];
1827 CYCLES(1); // TODO: correct cycle count
1830 void I386_OPS_BASE::MMXOP(psubd_r64_rm64)() // Opcode 0f fa
1834 UINT8 modrm = FETCH();
1835 if( modrm >= 0xc0 ) {
1837 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - MMX(modrm & 7).d[n];
1840 UINT32 ea = GetEA(modrm, 0, 8);
1843 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - s.d[n];
1845 CYCLES(1); // TODO: correct cycle count
1848 void I386_OPS_BASE::MMXOP(paddb_r64_rm64)() // Opcode 0f fc
1852 UINT8 modrm = FETCH();
1853 if( modrm >= 0xc0 ) {
1855 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + MMX(modrm & 7).b[n];
1858 UINT32 ea = GetEA(modrm, 0, 8);
1861 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + s.b[n];
1863 CYCLES(1); // TODO: correct cycle count
1866 void I386_OPS_BASE::MMXOP(paddw_r64_rm64)() // Opcode 0f fd
1870 UINT8 modrm = FETCH();
1871 if( modrm >= 0xc0 ) {
1873 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + MMX(modrm & 7).w[n];
1876 UINT32 ea = GetEA(modrm, 0, 8);
1879 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + s.w[n];
1881 CYCLES(1); // TODO: correct cycle count
1884 void I386_OPS_BASE::MMXOP(paddd_r64_rm64)() // Opcode 0f fe
1888 UINT8 modrm = FETCH();
1889 if( modrm >= 0xc0 ) {
1891 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + MMX(modrm & 7).d[n];
1894 UINT32 ea = GetEA(modrm, 0, 8);
1897 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + s.d[n];
1899 CYCLES(1); // TODO: correct cycle count
1902 void I386_OPS_BASE::MMXOP(emms)() // Opcode 0f 77
1904 cpustate->x87_tw = 0xffff; // tag word = 0xffff
1906 CYCLES(1); // TODO: correct cycle count
1909 void I386_OPS_BASE::I386OP(cyrix_svdc)() // Opcode 0f 78
1911 UINT8 modrm = FETCH();
1913 if( modrm < 0xc0 ) {
1914 UINT32 ea = GetEA(modrm, 0, 10);
1915 int index = (modrm >> 3) & 7;
1951 i386_trap( 6, 0, 0);
1955 limit = cpustate->sreg[index].limit;
1957 if (cpustate->sreg[index].flags & 0x8000) //G bit
1962 WRITE16(ea + 0, limit);
1963 WRITE32(ea + 2, cpustate->sreg[index].base);
1964 WRITE16(ea + 5, cpustate->sreg[index].flags); //replace top 8 bits of base
1965 WRITE8(ea + 7, cpustate->sreg[index].base >> 24);
1966 WRITE16(ea + 8, cpustate->sreg[index].selector);
1968 i386_trap( 6, 0, 0);
1970 CYCLES(1); // TODO: correct cycle count
1973 void I386_OPS_BASE::I386OP(cyrix_rsdc)() // Opcode 0f 79
1975 UINT8 modrm = FETCH();
1977 if( modrm < 0xc0 ) {
1978 UINT32 ea = GetEA(modrm, 0, 10);
1979 int index = (modrm >> 3) & 7;
2017 i386_trap( 6, 0, 0);
2021 base = (READ32(ea + 2) & 0x00ffffff) | (READ8(ea + 7) << 24);
2022 flags = READ16(ea + 5);
2023 limit = READ16(ea + 0) | ((flags & 3) << 16);
2025 if (flags & 0x8000) //G bit
2027 limit = (limit << 12) | 0xfff;
2030 cpustate->sreg[index].selector = READ16(ea + 8);
2031 cpustate->sreg[index].flags = flags;
2032 cpustate->sreg[index].base = base;
2033 cpustate->sreg[index].limit = limit;
2035 i386_trap( 6, 0, 0);
2037 CYCLES(1); // TODO: correct cycle count
2040 void I386_OPS_BASE::I386OP(cyrix_svldt)() // Opcode 0f 7a
2042 if ( PROTECTED_MODE && !V8086_MODE )
2044 UINT8 modrm = FETCH();
2046 if( !(modrm & 0xf8) ) {
2047 UINT32 ea = GetEA(modrm, 0, 10);
2048 UINT32 limit = cpustate->ldtr.limit;
2050 if (cpustate->ldtr.flags & 0x8000) //G bit
2055 WRITE16(ea + 0, limit);
2056 WRITE32(ea + 2, cpustate->ldtr.base);
2057 WRITE16(ea + 5, cpustate->ldtr.flags); //replace top 8 bits of base
2058 WRITE8(ea + 7, cpustate->ldtr.base >> 24);
2059 WRITE16(ea + 8, cpustate->ldtr.segment);
2061 i386_trap( 6, 0, 0);
2064 i386_trap( 6, 0, 0);
2066 CYCLES(1); // TODO: correct cycle count
2069 void I386_OPS_BASE::I386OP(cyrix_rsldt)() // Opcode 0f 7b
2071 if ( PROTECTED_MODE && !V8086_MODE )
2076 UINT8 modrm = FETCH();
2078 if( !(modrm & 0xf8) ) {
2079 UINT32 ea = GetEA(modrm, 0, 10);
2080 UINT16 flags = READ16(ea + 5);
2081 UINT32 base = (READ32(ea + 2) | 0x00ffffff) | (READ8(ea + 7) << 24);
2082 UINT32 limit = READ16(ea + 0) | ((flags & 3) << 16);
2085 if (flags & 0x8000) //G bit
2087 limit = (limit << 12) | 0xfff;
2090 memset(&seg, 0, sizeof(seg));
2091 seg.selector = READ16(ea + 8);
2092 i386_load_protected_mode_segment(&seg,NULL);
2093 cpustate->ldtr.limit = limit;
2094 cpustate->ldtr.base = base;
2095 cpustate->ldtr.flags = flags;
2097 i386_trap( 6, 0, 0);
2100 i386_trap( 6, 0, 0);
2102 CYCLES(1); // TODO: correct cycle count
2105 void I386_OPS_BASE::I386OP(cyrix_svts)() // Opcode 0f 7c
2107 if ( PROTECTED_MODE )
2109 UINT8 modrm = FETCH();
2111 if( !(modrm & 0xf8) ) {
2112 UINT32 ea = GetEA(modrm, 0, 10);
2113 UINT32 limit = cpustate->task.limit;
2115 if (cpustate->task.flags & 0x8000) //G bit
2120 WRITE16(ea + 0, limit);
2121 WRITE32(ea + 2, cpustate->task.base);
2122 WRITE16(ea + 5, cpustate->task.flags); //replace top 8 bits of base
2123 WRITE8(ea + 7, cpustate->task.base >> 24);
2124 WRITE16(ea + 8, cpustate->task.segment);
2126 i386_trap( 6, 0, 0);
2129 i386_trap( 6, 0, 0);
2133 void I386_OPS_BASE::I386OP(cyrix_rsts)() // Opcode 0f 7d
2135 if ( PROTECTED_MODE )
2140 UINT8 modrm = FETCH();
2142 if( !(modrm & 0xf8) ) {
2143 UINT32 ea = GetEA(modrm, 0, 10);
2144 UINT16 flags = READ16(ea + 5);
2145 UINT32 base = (READ32(ea + 2) | 0x00ffffff) | (READ8(ea + 7) << 24);
2146 UINT32 limit = READ16(ea + 0) | ((flags & 3) << 16);
2148 if (flags & 0x8000) //G bit
2150 limit = (limit << 12) | 0xfff;
2152 cpustate->task.segment = READ16(ea + 8);
2153 cpustate->task.limit = limit;
2154 cpustate->task.base = base;
2155 cpustate->task.flags = flags;
2157 i386_trap( 6, 0, 0);
2160 i386_trap( 6, 0, 0);
2162 CYCLES(1); // TODO: correct cycle count
2165 void I386_OPS_BASE::MMXOP(movd_r64_rm32)() // Opcode 0f 6e
2168 UINT8 modrm = FETCH();
2169 if( modrm >= 0xc0 ) {
2170 MMX((modrm >> 3) & 0x7).d[0]=LOAD_RM32(modrm);
2172 UINT32 ea = GetEA(modrm, 0, 4);
2173 MMX((modrm >> 3) & 0x7).d[0]=READ32(ea);
2175 MMX((modrm >> 3) & 0x7).d[1]=0;
2176 CYCLES(1); // TODO: correct cycle count
2179 void I386_OPS_BASE::MMXOP(movq_r64_rm64)() // Opcode 0f 6f
2182 UINT8 modrm = FETCH();
2183 if( modrm >= 0xc0 ) {
2184 MMX((modrm >> 3) & 0x7).l=MMX(modrm & 0x7).l;
2186 UINT32 ea = GetEA(modrm, 0, 8);
2187 READMMX( ea, MMX((modrm >> 3) & 0x7));
2189 CYCLES(1); // TODO: correct cycle count
2192 void I386_OPS_BASE::MMXOP(movd_rm32_r64)() // Opcode 0f 7e
2195 UINT8 modrm = FETCH();
2196 if( modrm >= 0xc0 ) {
2197 STORE_RM32(modrm, MMX((modrm >> 3) & 0x7).d[0]);
2199 UINT32 ea = GetEA(modrm, 0, 4);
2200 WRITE32(ea, MMX((modrm >> 3) & 0x7).d[0]);
2202 CYCLES(1); // TODO: correct cycle count
2205 void I386_OPS_BASE::MMXOP(movq_rm64_r64)() // Opcode 0f 7f
2208 UINT8 modrm = FETCH();
2209 if( modrm >= 0xc0 ) {
2210 MMX(modrm & 0x7)=MMX((modrm >> 3) & 0x7);
2212 UINT32 ea = GetEA(modrm, 0, 8);
2213 WRITEMMX( ea, MMX((modrm >> 3) & 0x7));
2215 CYCLES(1); // TODO: correct cycle count
2218 void I386_OPS_BASE::MMXOP(pcmpeqb_r64_rm64)() // Opcode 0f 74
2222 UINT8 modrm = FETCH();
2223 if( modrm >= 0xc0 ) {
2226 d=(modrm >> 3) & 0x7;
2227 for (c=0;c <= 7;c++)
2228 MMX(d).b[c]=(MMX(d).b[c] == MMX(s).b[c]) ? 0xff : 0;
2231 int d=(modrm >> 3) & 0x7;
2232 UINT32 ea = GetEA(modrm, 0, 8);
2234 for (c=0;c <= 7;c++)
2235 MMX(d).b[c]=(MMX(d).b[c] == s.b[c]) ? 0xff : 0;
2237 CYCLES(1); // TODO: correct cycle count
2240 void I386_OPS_BASE::MMXOP(pcmpeqw_r64_rm64)() // Opcode 0f 75
2243 UINT8 modrm = FETCH();
2244 if( modrm >= 0xc0 ) {
2247 d=(modrm >> 3) & 0x7;
2248 MMX(d).w[0]=(MMX(d).w[0] == MMX(s).w[0]) ? 0xffff : 0;
2249 MMX(d).w[1]=(MMX(d).w[1] == MMX(s).w[1]) ? 0xffff : 0;
2250 MMX(d).w[2]=(MMX(d).w[2] == MMX(s).w[2]) ? 0xffff : 0;
2251 MMX(d).w[3]=(MMX(d).w[3] == MMX(s).w[3]) ? 0xffff : 0;
2254 int d=(modrm >> 3) & 0x7;
2255 UINT32 ea = GetEA(modrm, 0, 8);
2257 MMX(d).w[0]=(MMX(d).w[0] == s.w[0]) ? 0xffff : 0;
2258 MMX(d).w[1]=(MMX(d).w[1] == s.w[1]) ? 0xffff : 0;
2259 MMX(d).w[2]=(MMX(d).w[2] == s.w[2]) ? 0xffff : 0;
2260 MMX(d).w[3]=(MMX(d).w[3] == s.w[3]) ? 0xffff : 0;
2262 CYCLES(1); // TODO: correct cycle count
2265 void I386_OPS_BASE::MMXOP(pcmpeqd_r64_rm64)() // Opcode 0f 76
2268 UINT8 modrm = FETCH();
2269 if( modrm >= 0xc0 ) {
2272 d=(modrm >> 3) & 0x7;
2273 MMX(d).d[0]=(MMX(d).d[0] == MMX(s).d[0]) ? 0xffffffff : 0;
2274 MMX(d).d[1]=(MMX(d).d[1] == MMX(s).d[1]) ? 0xffffffff : 0;
2277 int d=(modrm >> 3) & 0x7;
2278 UINT32 ea = GetEA(modrm, 0, 8);
2280 MMX(d).d[0]=(MMX(d).d[0] == s.d[0]) ? 0xffffffff : 0;
2281 MMX(d).d[1]=(MMX(d).d[1] == s.d[1]) ? 0xffffffff : 0;
2283 CYCLES(1); // TODO: correct cycle count
2286 void I386_OPS_BASE::MMXOP(pshufw_r64_rm64_i8)() // Opcode 0f 70
2289 UINT8 modrm = FETCH();
2290 if( modrm >= 0xc0 ) {
2293 UINT8 imm8 = FETCH();
2295 d=(modrm >> 3) & 0x7;
2297 MMX(d).w[0]=t.w[imm8 & 3];
2298 MMX(d).w[1]=t.w[(imm8 >> 2) & 3];
2299 MMX(d).w[2]=t.w[(imm8 >> 4) & 3];
2300 MMX(d).w[3]=t.w[(imm8 >> 6) & 3];
2303 int d=(modrm >> 3) & 0x7;
2304 UINT32 ea = GetEA(modrm, 0, 8);
2305 UINT8 imm8 = FETCH();
2307 MMX(d).w[0]=s.w[imm8 & 3];
2308 MMX(d).w[1]=s.w[(imm8 >> 2) & 3];
2309 MMX(d).w[2]=s.w[(imm8 >> 4) & 3];
2310 MMX(d).w[3]=s.w[(imm8 >> 6) & 3];
2312 CYCLES(1); // TODO: correct cycle count
2315 void I386_OPS_BASE::SSEOP(punpcklbw_r128_rm128)() // Opcode 66 0f 60
2317 UINT8 modrm = FETCH();
2318 if (modrm >= 0xc0) {
2322 d = (modrm >> 3) & 0x7;
2323 xd.l[0] = XMM(d).l[0];
2324 xs.l[0] = XMM(s).l[0];
2325 XMM(d).b[0] = xd.b[0];
2326 XMM(d).b[1] = xs.b[0];
2327 XMM(d).b[2] = xd.b[1];
2328 XMM(d).b[3] = xs.b[1];
2329 XMM(d).b[4] = xd.b[2];
2330 XMM(d).b[5] = xs.b[2];
2331 XMM(d).b[6] = xd.b[3];
2332 XMM(d).b[7] = xs.b[3];
2333 XMM(d).b[8] = xd.b[4];
2334 XMM(d).b[9] = xs.b[4];
2335 XMM(d).b[10] = xd.b[5];
2336 XMM(d).b[11] = xs.b[5];
2337 XMM(d).b[12] = xd.b[6];
2338 XMM(d).b[13] = xs.b[6];
2339 XMM(d).b[14] = xd.b[7];
2340 XMM(d).b[15] = xs.b[7];
2344 int d = (modrm >> 3) & 0x7;
2345 UINT32 ea = GetEA(modrm, 0, 8);
2346 xd.l[0] = XMM(d).l[0];
2347 xs.q[0] = READ64( ea);
2348 for (int n = 0; n < 8; n++) {
2349 XMM(d).b[n << 1] = xd.b[n];
2350 XMM(d).b[(n << 1) | 1] = xs.b[n];
2353 CYCLES(1); // TODO: correct cycle count
2356 void I386_OPS_BASE::SSEOP(punpcklwd_r128_rm128)()
2358 UINT8 modrm = FETCH();
2359 if (modrm >= 0xc0) {
2363 d = (modrm >> 3) & 0x7;
2364 xd.l[0] = XMM(d).l[0];
2365 xs.l[0] = XMM(s).l[0];
2366 for (int n = 0; n < 4; n++) {
2367 XMM(d).w[n << 1] = xd.w[n];
2368 XMM(d).w[(n << 1) | 1] = xs.w[n];
2373 int d = (modrm >> 3) & 0x7;
2374 UINT32 ea = GetEA(modrm, 0, 8);
2375 xd.l[0] = XMM(d).l[0];
2376 xs.q[0] = READ64( ea);
2377 for (int n = 0; n < 4; n++) {
2378 XMM(d).w[n << 1] = xd.w[n];
2379 XMM(d).w[(n << 1) | 1] = xs.w[n];
2382 CYCLES(1); // TODO: correct cycle count
2385 void I386_OPS_BASE::SSEOP(punpckldq_r128_rm128)()
2387 UINT8 modrm = FETCH();
2388 if (modrm >= 0xc0) {
2392 d = (modrm >> 3) & 0x7;
2393 xd.l[0] = XMM(d).l[0];
2394 xs.l[0] = XMM(s).l[0];
2395 for (int n = 0; n < 2; n++) {
2396 XMM(d).d[n << 1] = xd.d[n];
2397 XMM(d).d[(n << 1) | 1] = xs.d[n];
2402 int d = (modrm >> 3) & 0x7;
2403 UINT32 ea = GetEA(modrm, 0, 8);
2404 xd.l[0] = XMM(d).l[0];
2405 xs.q[0] = READ64( ea);
2406 for (int n = 0; n < 2; n++) {
2407 XMM(d).d[n << 1] = xd.d[n];
2408 XMM(d).d[(n << 1) | 1] = xs.d[n];
2411 CYCLES(1); // TODO: correct cycle count
2414 void I386_OPS_BASE::SSEOP(punpcklqdq_r128_rm128)()
2416 UINT8 modrm = FETCH();
2417 if (modrm >= 0xc0) {
2421 d = (modrm >> 3) & 0x7;
2422 xd.l[0] = XMM(d).l[0];
2423 xs.l[0] = XMM(s).l[0];
2424 XMM(d).q[0] = xd.q[0];
2425 XMM(d).q[1] = xs.q[0];
2429 int d = (modrm >> 3) & 0x7;
2430 UINT32 ea = GetEA(modrm, 0, 8);
2431 xd.l[0] = XMM(d).l[0];
2432 xs.q[0] = READ64( ea);
2433 XMM(d).q[0] = xd.q[0];
2434 XMM(d).q[1] = xs.q[0];
2436 CYCLES(1); // TODO: correct cycle count
2439 void I386_OPS_BASE::MMXOP(punpcklbw_r64_r64m32)() // Opcode 0f 60
2442 UINT8 modrm = FETCH();
2443 if( modrm >= 0xc0 ) {
2447 d=(modrm >> 3) & 0x7;
2449 MMX(d).b[0]=t & 0xff;
2450 MMX(d).b[1]=MMX(s).b[0];
2451 MMX(d).b[2]=(t >> 8) & 0xff;
2452 MMX(d).b[3]=MMX(s).b[1];
2453 MMX(d).b[4]=(t >> 16) & 0xff;
2454 MMX(d).b[5]=MMX(s).b[2];
2455 MMX(d).b[6]=(t >> 24) & 0xff;
2456 MMX(d).b[7]=MMX(s).b[3];
2459 int d=(modrm >> 3) & 0x7;
2460 UINT32 ea = GetEA(modrm, 0, 4);
2463 MMX(d).b[0]=t & 0xff;
2464 MMX(d).b[1]=s & 0xff;
2465 MMX(d).b[2]=(t >> 8) & 0xff;
2466 MMX(d).b[3]=(s >> 8) & 0xff;
2467 MMX(d).b[4]=(t >> 16) & 0xff;
2468 MMX(d).b[5]=(s >> 16) & 0xff;
2469 MMX(d).b[6]=(t >> 24) & 0xff;
2470 MMX(d).b[7]=(s >> 24) & 0xff;
2472 CYCLES(1); // TODO: correct cycle count
2475 void I386_OPS_BASE::MMXOP(punpcklwd_r64_r64m32)() // Opcode 0f 61
2478 UINT8 modrm = FETCH();
2479 if( modrm >= 0xc0 ) {
2483 d=(modrm >> 3) & 0x7;
2485 MMX(d).w[0]=MMX(d).w[0];
2486 MMX(d).w[1]=MMX(s).w[0];
2488 MMX(d).w[3]=MMX(s).w[1];
2492 int d=(modrm >> 3) & 0x7;
2493 UINT32 ea = GetEA(modrm, 0, 4);
2496 MMX(d).w[0]=MMX(d).w[0];
2497 MMX(d).w[1]=s & 0xffff;
2499 MMX(d).w[3]=(s >> 16) & 0xffff;
2501 CYCLES(1); // TODO: correct cycle count
2504 void I386_OPS_BASE::MMXOP(punpckldq_r64_r64m32)() // Opcode 0f 62
2507 UINT8 modrm = FETCH();
2508 if( modrm >= 0xc0 ) {
2511 d=(modrm >> 3) & 0x7;
2512 MMX(d).d[0]=MMX(d).d[0];
2513 MMX(d).d[1]=MMX(s).d[0];
2516 int d=(modrm >> 3) & 0x7;
2517 UINT32 ea = GetEA(modrm, 0, 4);
2519 MMX(d).d[0]=MMX(d).d[0];
2522 CYCLES(1); // TODO: correct cycle count
2525 void I386_OPS_BASE::MMXOP(packsswb_r64_rm64)() // Opcode 0f 63
2528 UINT8 modrm = FETCH();
2529 if( modrm >= 0xc0 ) {
2532 d=(modrm >> 3) & 0x7;
2533 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2534 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2535 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2536 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2537 MMX(d).c[4]=SaturatedSignedWordToSignedByte(MMX(s).s[0]);
2538 MMX(d).c[5]=SaturatedSignedWordToSignedByte(MMX(s).s[1]);
2539 MMX(d).c[6]=SaturatedSignedWordToSignedByte(MMX(s).s[2]);
2540 MMX(d).c[7]=SaturatedSignedWordToSignedByte(MMX(s).s[3]);
2543 int d=(modrm >> 3) & 0x7;
2544 UINT32 ea = GetEA(modrm, 0, 8);
2546 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2547 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2548 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2549 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2550 MMX(d).c[4]=SaturatedSignedWordToSignedByte(s.s[0]);
2551 MMX(d).c[5]=SaturatedSignedWordToSignedByte(s.s[1]);
2552 MMX(d).c[6]=SaturatedSignedWordToSignedByte(s.s[2]);
2553 MMX(d).c[7]=SaturatedSignedWordToSignedByte(s.s[3]);
2555 CYCLES(1); // TODO: correct cycle count
2558 void I386_OPS_BASE::MMXOP(pcmpgtb_r64_rm64)() // Opcode 0f 64
2562 UINT8 modrm = FETCH();
2563 if( modrm >= 0xc0 ) {
2566 d=(modrm >> 3) & 0x7;
2567 for (c=0;c <= 7;c++)
2568 MMX(d).b[c]=(MMX(d).c[c] > MMX(s).c[c]) ? 0xff : 0;
2571 int d=(modrm >> 3) & 0x7;
2572 UINT32 ea = GetEA(modrm, 0, 8);
2574 for (c=0;c <= 7;c++)
2575 MMX(d).b[c]=(MMX(d).c[c] > s.c[c]) ? 0xff : 0;
2577 CYCLES(1); // TODO: correct cycle count
2580 void I386_OPS_BASE::MMXOP(pcmpgtw_r64_rm64)() // Opcode 0f 65
2584 UINT8 modrm = FETCH();
2585 if( modrm >= 0xc0 ) {
2588 d=(modrm >> 3) & 0x7;
2589 for (c=0;c <= 3;c++)
2590 MMX(d).w[c]=(MMX(d).s[c] > MMX(s).s[c]) ? 0xffff : 0;
2593 int d=(modrm >> 3) & 0x7;
2594 UINT32 ea = GetEA(modrm, 0, 8);
2596 for (c=0;c <= 3;c++)
2597 MMX(d).w[c]=(MMX(d).s[c] > s.s[c]) ? 0xffff : 0;
2599 CYCLES(1); // TODO: correct cycle count
2602 void I386_OPS_BASE::MMXOP(pcmpgtd_r64_rm64)() // Opcode 0f 66
2606 UINT8 modrm = FETCH();
2607 if( modrm >= 0xc0 ) {
2610 d=(modrm >> 3) & 0x7;
2611 for (c=0;c <= 1;c++)
2612 MMX(d).d[c]=(MMX(d).i[c] > MMX(s).i[c]) ? 0xffffffff : 0;
2615 int d=(modrm >> 3) & 0x7;
2616 UINT32 ea = GetEA(modrm, 0, 8);
2618 for (c=0;c <= 1;c++)
2619 MMX(d).d[c]=(MMX(d).i[c] > s.i[c]) ? 0xffffffff : 0;
2621 CYCLES(1); // TODO: correct cycle count
2624 void I386_OPS_BASE::MMXOP(packuswb_r64_rm64)() // Opcode 0f 67
2627 UINT8 modrm = FETCH();
2628 if( modrm >= 0xc0 ) {
2632 d=(modrm >> 3) & 0x7;
2635 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(ds.s[0]);
2636 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(ds.s[1]);
2637 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(ds.s[2]);
2638 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(ds.s[3]);
2639 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(sd.s[0]);
2640 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(sd.s[1]);
2641 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(sd.s[2]);
2642 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(sd.s[3]);
2645 int d=(modrm >> 3) & 0x7;
2646 UINT32 ea = GetEA(modrm, 0, 8);
2649 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(t.s[0]);
2650 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(t.s[1]);
2651 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(t.s[2]);
2652 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(t.s[3]);
2653 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(s.s[0]);
2654 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(s.s[1]);
2655 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(s.s[2]);
2656 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(s.s[3]);
2658 CYCLES(1); // TODO: correct cycle count
2661 void I386_OPS_BASE::MMXOP(punpckhbw_r64_rm64)() // Opcode 0f 68
2664 UINT8 modrm = FETCH();
2665 if( modrm >= 0xc0 ) {
2668 d=(modrm >> 3) & 0x7;
2669 MMX(d).b[0]=MMX(d).b[4];
2670 MMX(d).b[1]=MMX(s).b[4];
2671 MMX(d).b[2]=MMX(d).b[5];
2672 MMX(d).b[3]=MMX(s).b[5];
2673 MMX(d).b[4]=MMX(d).b[6];
2674 MMX(d).b[5]=MMX(s).b[6];
2675 MMX(d).b[6]=MMX(d).b[7];
2676 MMX(d).b[7]=MMX(s).b[7];
2679 int d=(modrm >> 3) & 0x7;
2680 UINT32 ea = GetEA(modrm, 0, 8);
2682 MMX(d).b[0]=MMX(d).b[4];
2684 MMX(d).b[2]=MMX(d).b[5];
2686 MMX(d).b[4]=MMX(d).b[6];
2688 MMX(d).b[6]=MMX(d).b[7];
2691 CYCLES(1); // TODO: correct cycle count
2694 void I386_OPS_BASE::MMXOP(punpckhwd_r64_rm64)() // Opcode 0f 69
2697 UINT8 modrm = FETCH();
2698 if( modrm >= 0xc0 ) {
2701 d=(modrm >> 3) & 0x7;
2702 MMX(d).w[0]=MMX(d).w[2];
2703 MMX(d).w[1]=MMX(s).w[2];
2704 MMX(d).w[2]=MMX(d).w[3];
2705 MMX(d).w[3]=MMX(s).w[3];
2708 int d=(modrm >> 3) & 0x7;
2709 UINT32 ea = GetEA(modrm, 0, 8);
2711 MMX(d).w[0]=MMX(d).w[2];
2713 MMX(d).w[2]=MMX(d).w[3];
2716 CYCLES(1); // TODO: correct cycle count
2719 void I386_OPS_BASE::MMXOP(punpckhdq_r64_rm64)() // Opcode 0f 6a
2722 UINT8 modrm = FETCH();
2723 if( modrm >= 0xc0 ) {
2726 d=(modrm >> 3) & 0x7;
2727 MMX(d).d[0]=MMX(d).d[1];
2728 MMX(d).d[1]=MMX(s).d[1];
2731 int d=(modrm >> 3) & 0x7;
2732 UINT32 ea = GetEA(modrm, 0, 8);
2734 MMX(d).d[0]=MMX(d).d[1];
2737 CYCLES(1); // TODO: correct cycle count
2740 void I386_OPS_BASE::MMXOP(packssdw_r64_rm64)() // Opcode 0f 6b
2743 UINT8 modrm = FETCH();
2744 if( modrm >= 0xc0 ) {
2746 INT32 t1, t2, t3, t4;
2748 d=(modrm >> 3) & 0x7;
2753 MMX(d).s[0] = SaturatedSignedDwordToSignedWord(t1);
2754 MMX(d).s[1] = SaturatedSignedDwordToSignedWord(t2);
2755 MMX(d).s[2] = SaturatedSignedDwordToSignedWord(t3);
2756 MMX(d).s[3] = SaturatedSignedDwordToSignedWord(t4);
2761 int d=(modrm >> 3) & 0x7;
2762 UINT32 ea = GetEA(modrm, 0, 8);
2766 MMX(d).s[0] = SaturatedSignedDwordToSignedWord(t1);
2767 MMX(d).s[1] = SaturatedSignedDwordToSignedWord(t2);
2768 MMX(d).s[2] = SaturatedSignedDwordToSignedWord(s.i[0]);
2769 MMX(d).s[3] = SaturatedSignedDwordToSignedWord(s.i[1]);
2771 CYCLES(1); // TODO: correct cycle count
2774 void I386_OPS_BASE::SSEOP(group_0fae)() // Opcode 0f ae
2776 UINT8 modm = FETCH();
2777 if( modm == 0xf8 ) {
2778 logerror("Unemulated SFENCE opcode called\n");
2779 CYCLES(1); // sfence instruction
2780 } else if( modm == 0xf0 ) {
2781 CYCLES(1); // mfence instruction
2782 } else if( modm == 0xe8 ) {
2783 CYCLES(1); // lfence instruction
2784 } else if( modm < 0xc0 ) {
2786 switch ( (modm & 0x38) >> 3 )
2788 case 2: // ldmxcsr m32
2789 ea = GetEA(modm, 0, 4);
2790 cpustate->mxcsr = READ32(ea);
2792 case 3: // stmxcsr m32
2793 ea = GetEA(modm, 0, 4);
2794 WRITE32(ea, cpustate->mxcsr);
2796 case 7: // clflush m8
2797 GetNonTranslatedEA(modm, NULL);
2800 report_invalid_modrm( "sse_group_0fae", modm);
2803 report_invalid_modrm( "sse_group_0fae", modm);
2807 void I386_OPS_BASE::SSEOP(cvttps2dq_r128_rm128)() // Opcode f3 0f 5b
2809 UINT8 modrm = FETCH();
2810 if( modrm >= 0xc0 ) {
2811 XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM(modrm & 0x7).f[0];
2812 XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM(modrm & 0x7).f[1];
2813 XMM((modrm >> 3) & 0x7).i[2]=(INT32)XMM(modrm & 0x7).f[2];
2814 XMM((modrm >> 3) & 0x7).i[3]=(INT32)XMM(modrm & 0x7).f[3];
2817 UINT32 ea = GetEA(modrm, 0, 16);
2819 XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f[0];
2820 XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f[1];
2821 XMM((modrm >> 3) & 0x7).i[2]=(INT32)src.f[2];
2822 XMM((modrm >> 3) & 0x7).i[3]=(INT32)src.f[3];
2824 CYCLES(1); // TODO: correct cycle count
2827 void I386_OPS_BASE::SSEOP(cvtss2sd_r128_r128m32)() // Opcode f3 0f 5a
2829 UINT8 modrm = FETCH();
2830 if( modrm >= 0xc0 ) {
2831 XMM((modrm >> 3) & 0x7).f64[0] = XMM(modrm & 0x7).f[0];
2834 UINT32 ea = GetEA(modrm, 0, 4);
2835 s.d[0] = READ32(ea);
2836 XMM((modrm >> 3) & 0x7).f64[0] = s.f[0];
2838 CYCLES(1); // TODO: correct cycle count
2841 void I386_OPS_BASE::SSEOP(cvttss2si_r32_r128m32)() // Opcode f3 0f 2c
2844 UINT8 modrm = FETCH(); // get mordm byte
2845 if( modrm >= 0xc0 ) { // if bits 7-6 are 11 the source is a xmm register (low doubleword)
2846 src = (INT32)XMM(modrm & 0x7).f[0^NATIVE_ENDIAN_VALUE_LE_BE(0,1)];
2847 } else { // otherwise is a memory address
2849 UINT32 ea = GetEA(modrm, 0, 4);
2850 t.d[0] = READ32(ea);
2851 src = (INT32)t.f[0];
2853 STORE_REG32(modrm, (UINT32)src);
2854 CYCLES(1); // TODO: correct cycle count
2857 void I386_OPS_BASE::SSEOP(cvtss2si_r32_r128m32)() // Opcode f3 0f 2d
2860 UINT8 modrm = FETCH();
2861 if( modrm >= 0xc0 ) {
2862 src = (INT32)XMM(modrm & 0x7).f[0];
2865 UINT32 ea = GetEA(modrm, 0, 4);
2866 t.d[0] = READ32(ea);
2867 src = (INT32)t.f[0];
2869 STORE_REG32(modrm, (UINT32)src);
2870 CYCLES(1); // TODO: correct cycle count
2873 void I386_OPS_BASE::SSEOP(cvtsi2ss_r128_rm32)() // Opcode f3 0f 2a
2875 UINT8 modrm = FETCH();
2876 if( modrm >= 0xc0 ) {
2877 XMM((modrm >> 3) & 0x7).f[0] = (INT32)LOAD_RM32(modrm);
2879 UINT32 ea = GetEA(modrm, 0, 4);
2880 XMM((modrm >> 3) & 0x7).f[0] = (INT32)READ32(ea);
2882 CYCLES(1); // TODO: correct cycle count
2885 void I386_OPS_BASE::SSEOP(cvtpi2ps_r128_rm64)() // Opcode 0f 2a
2887 UINT8 modrm = FETCH();
2889 if( modrm >= 0xc0 ) {
2890 XMM((modrm >> 3) & 0x7).f[0] = (float)MMX(modrm & 0x7).i[0];
2891 XMM((modrm >> 3) & 0x7).f[1] = (float)MMX(modrm & 0x7).i[1];
2894 UINT32 ea = GetEA(modrm, 0, 8);
2896 XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0];
2897 XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1];
2899 CYCLES(1); // TODO: correct cycle count
2902 void I386_OPS_BASE::SSEOP(cvttps2pi_r64_r128m64)() // Opcode 0f 2c
2904 UINT8 modrm = FETCH();
2906 if( modrm >= 0xc0 ) {
2907 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2908 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2911 UINT32 ea = GetEA(modrm, 0, 16);
2913 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2914 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2916 CYCLES(1); // TODO: correct cycle count
2919 void I386_OPS_BASE::SSEOP(cvtps2pi_r64_r128m64)() // Opcode 0f 2d
2921 UINT8 modrm = FETCH();
2923 if( modrm >= 0xc0 ) {
2924 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2925 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2928 UINT32 ea = GetEA(modrm, 0, 16);
2930 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2931 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2933 CYCLES(1); // TODO: correct cycle count
2936 void I386_OPS_BASE::SSEOP(cvtps2pd_r128_r128m64)() // Opcode 0f 5a
2938 UINT8 modrm = FETCH();
2939 if( modrm >= 0xc0 ) {
2940 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).f[0];
2941 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).f[1];
2944 UINT32 ea = GetEA(modrm, 0, 8);
2946 XMM((modrm >> 3) & 0x7).f64[0] = (double)r.f[0];
2947 XMM((modrm >> 3) & 0x7).f64[1] = (double)r.f[1];
2949 CYCLES(1); // TODO: correct cycle count
2952 void I386_OPS_BASE::SSEOP(cvtdq2ps_r128_rm128)() // Opcode 0f 5b
2954 UINT8 modrm = FETCH();
2955 if( modrm >= 0xc0 ) {
2956 XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).i[0];
2957 XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).i[1];
2958 XMM((modrm >> 3) & 0x7).f[2] = (float)XMM(modrm & 0x7).i[2];
2959 XMM((modrm >> 3) & 0x7).f[3] = (float)XMM(modrm & 0x7).i[3];
2962 UINT32 ea = GetEA(modrm, 0, 16);
2964 XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0];
2965 XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1];
2966 XMM((modrm >> 3) & 0x7).f[2] = (float)r.i[2];
2967 XMM((modrm >> 3) & 0x7).f[3] = (float)r.i[3];
2969 CYCLES(1); // TODO: correct cycle count
2972 void I386_OPS_BASE::SSEOP(cvtdq2pd_r128_r128m64)() // Opcode f3 0f e6
2974 UINT8 modrm = FETCH();
2975 if( modrm >= 0xc0 ) {
2976 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).i[0];
2977 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).i[1];
2980 UINT32 ea = GetEA(modrm, 0, 8);
2982 XMM((modrm >> 3) & 0x7).f64[0] = (double)s.i[0];
2983 XMM((modrm >> 3) & 0x7).f64[1] = (double)s.i[1];
2985 CYCLES(1); // TODO: correct cycle count
2988 void I386_OPS_BASE::SSEOP(movss_r128_rm128)() // Opcode f3 0f 10
2990 UINT8 modrm = FETCH();
2991 if( modrm >= 0xc0 ) {
2992 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
2994 UINT32 ea = GetEA(modrm, 0, 4);
2995 XMM((modrm >> 3) & 0x7).d[0] = READ32(ea);
2997 CYCLES(1); // TODO: correct cycle count
3000 void I386_OPS_BASE::SSEOP(movss_rm128_r128)() // Opcode f3 0f 11
3002 UINT8 modrm = FETCH();
3003 if( modrm >= 0xc0 ) {
3004 XMM(modrm & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0];
3006 UINT32 ea = GetEA(modrm, 0, 4);
3007 WRITE32(ea, XMM((modrm >> 3) & 0x7).d[0]);
3009 CYCLES(1); // TODO: correct cycle count
3012 void I386_OPS_BASE::SSEOP(movsldup_r128_rm128)() // Opcode f3 0f 12
3014 UINT8 modrm = FETCH();
3015 if( modrm >= 0xc0 ) {
3016 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
3017 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[0];
3018 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[2];
3019 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[2];
3022 UINT32 ea = GetEA(modrm, 0, 16);
3024 XMM((modrm >> 3) & 0x7).d[0] = src.d[0];
3025 XMM((modrm >> 3) & 0x7).d[1] = src.d[0];
3026 XMM((modrm >> 3) & 0x7).d[2] = src.d[2];
3027 XMM((modrm >> 3) & 0x7).d[3] = src.d[2];
3029 CYCLES(1); // TODO: correct cycle count
3032 void I386_OPS_BASE::SSEOP(movshdup_r128_rm128)() // Opcode f3 0f 16
3034 UINT8 modrm = FETCH();
3035 if( modrm >= 0xc0 ) {
3036 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[1];
3037 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[1];
3038 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[3];
3039 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[3];
3042 UINT32 ea = GetEA(modrm, 0, 16);
3044 XMM((modrm >> 3) & 0x7).d[0] = src.d[1];
3045 XMM((modrm >> 3) & 0x7).d[1] = src.d[1];
3046 XMM((modrm >> 3) & 0x7).d[2] = src.d[3];
3047 XMM((modrm >> 3) & 0x7).d[3] = src.d[3];
3049 CYCLES(1); // TODO: correct cycle count
3052 void I386_OPS_BASE::SSEOP(movaps_r128_rm128)() // Opcode 0f 28
3054 UINT8 modrm = FETCH();
3055 if( modrm >= 0xc0 ) {
3056 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3058 UINT32 ea = GetEA(modrm, 0, 16);
3059 READXMM( ea, XMM((modrm >> 3) & 0x7));
3061 CYCLES(1); // TODO: correct cycle count
3064 void I386_OPS_BASE::SSEOP(movaps_rm128_r128)() // Opcode 0f 29
3066 UINT8 modrm = FETCH();
3067 if( modrm >= 0xc0 ) {
3068 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3070 UINT32 ea = GetEA(modrm, 0, 16);
3071 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3073 CYCLES(1); // TODO: correct cycle count
3076 void I386_OPS_BASE::SSEOP(movups_r128_rm128)() // Opcode 0f 10
3078 UINT8 modrm = FETCH();
3079 if( modrm >= 0xc0 ) {
3080 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3082 UINT32 ea = GetEA(modrm, 0, 16);
3083 READXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3085 CYCLES(1); // TODO: correct cycle count
3088 void I386_OPS_BASE::SSEOP(movupd_r128_rm128)() // Opcode 66 0f 10
3090 UINT8 modrm = FETCH();
3091 if( modrm >= 0xc0 ) {
3092 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3094 UINT32 ea = GetEA(modrm, 0, 16);
3095 READXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3097 CYCLES(1); // TODO: correct cycle count
3100 void I386_OPS_BASE::SSEOP(movups_rm128_r128)() // Opcode 0f 11
3102 UINT8 modrm = FETCH();
3103 if( modrm >= 0xc0 ) {
3104 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3106 UINT32 ea = GetEA(modrm, 0, 16);
3107 WRITEXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3109 CYCLES(1); // TODO: correct cycle count
3112 void I386_OPS_BASE::SSEOP(movupd_rm128_r128)() // Opcode 66 0f 11
3114 UINT8 modrm = FETCH();
3115 if( modrm >= 0xc0 ) {
3116 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3118 UINT32 ea = GetEA(modrm, 0, 16);
3119 WRITEXMM( ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3121 CYCLES(1); // TODO: correct cycle count
3124 void I386_OPS_BASE::SSEOP(movlps_r128_m64)() // Opcode 0f 12
3126 UINT8 modrm = FETCH();
3127 if( modrm >= 0xc0 ) {
3129 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[1];
3130 CYCLES(1); // TODO: correct cycle count
3133 UINT32 ea = GetEA(modrm, 0, 8);
3134 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3135 CYCLES(1); // TODO: correct cycle count
3139 void I386_OPS_BASE::SSEOP(movlpd_r128_m64)() // Opcode 66 0f 12
3141 UINT8 modrm = FETCH();
3142 if( modrm >= 0xc0 ) {
3143 CYCLES(1); // TODO: correct cycle count
3146 UINT32 ea = GetEA(modrm, 0, 8);
3147 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3148 CYCLES(1); // TODO: correct cycle count
3152 void I386_OPS_BASE::SSEOP(movlps_m64_r128)() // Opcode 0f 13
3154 UINT8 modrm = FETCH();
3155 if( modrm >= 0xc0 ) {
3156 // unsupported by cpu
3157 CYCLES(1); // TODO: correct cycle count
3159 UINT32 ea = GetEA(modrm, 0, 8);
3160 WRITEXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3161 CYCLES(1); // TODO: correct cycle count
3165 void I386_OPS_BASE::SSEOP(movlpd_m64_r128)() // Opcode 66 0f 13
3167 UINT8 modrm = FETCH();
3168 if( modrm >= 0xc0 ) {
3169 // unsupported by cpu
3170 CYCLES(1); // TODO: correct cycle count
3172 UINT32 ea = GetEA(modrm, 0, 8);
3173 WRITEXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
3174 CYCLES(1); // TODO: correct cycle count
3178 void I386_OPS_BASE::SSEOP(movhps_r128_m64)() // Opcode 0f 16
3180 UINT8 modrm = FETCH();
3181 if( modrm >= 0xc0 ) {
3183 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[0];
3184 CYCLES(1); // TODO: correct cycle count
3187 UINT32 ea = GetEA(modrm, 0, 8);
3188 READXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3189 CYCLES(1); // TODO: correct cycle count
3193 void I386_OPS_BASE::SSEOP(movhpd_r128_m64)() // Opcode 66 0f 16
3195 UINT8 modrm = FETCH();
3196 if( modrm >= 0xc0 ) {
3197 // unsupported by cpu
3198 CYCLES(1); // TODO: correct cycle count
3201 UINT32 ea = GetEA(modrm, 0, 8);
3202 READXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3203 CYCLES(1); // TODO: correct cycle count
3207 void I386_OPS_BASE::SSEOP(movhps_m64_r128)() // Opcode 0f 17
3209 UINT8 modrm = FETCH();
3210 if( modrm >= 0xc0 ) {
3211 // unsupported by cpu
3212 CYCLES(1); // TODO: correct cycle count
3214 UINT32 ea = GetEA(modrm, 0, 8);
3215 WRITEXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3216 CYCLES(1); // TODO: correct cycle count
3220 void I386_OPS_BASE::SSEOP(movhpd_m64_r128)() // Opcode 66 0f 17
3222 UINT8 modrm = FETCH();
3223 if( modrm >= 0xc0 ) {
3224 // unsupported by cpu
3225 CYCLES(1); // TODO: correct cycle count
3227 UINT32 ea = GetEA(modrm, 0, 8);
3228 WRITEXMM_HI64( ea, XMM((modrm >> 3) & 0x7));
3229 CYCLES(1); // TODO: correct cycle count
3233 void I386_OPS_BASE::SSEOP(movntps_m128_r128)() // Opcode 0f 2b
3235 UINT8 modrm = FETCH();
3236 if( modrm >= 0xc0 ) {
3237 // unsupported by cpu
3238 CYCLES(1); // TODO: correct cycle count
3240 // since cache is not implemented
3241 UINT32 ea = GetEA(modrm, 0, 16);
3242 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3243 CYCLES(1); // TODO: correct cycle count
3247 void I386_OPS_BASE::SSEOP(movmskps_r16_r128)() // Opcode 0f 50
3249 UINT8 modrm = FETCH();
3250 if( modrm >= 0xc0 ) {
3252 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
3253 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
3254 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
3255 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
3256 STORE_REG16(modrm, b);
3258 CYCLES(1); // TODO: correct cycle count
3261 void I386_OPS_BASE::SSEOP(movmskps_r32_r128)() // Opcode 0f 50
3263 UINT8 modrm = FETCH();
3264 if( modrm >= 0xc0 ) {
3266 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
3267 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
3268 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
3269 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
3270 STORE_REG32(modrm, b);
3272 CYCLES(1); // TODO: correct cycle count
3275 void I386_OPS_BASE::SSEOP(movmskpd_r32_r128)() // Opcode 66 0f 50
3277 UINT8 modrm = FETCH();
3278 if( modrm >= 0xc0 ) {
3280 b=(XMM(modrm & 0x7).q[0] >> 63) & 1;
3281 b=b | ((XMM(modrm & 0x7).q[1] >> 62) & 2);
3282 STORE_REG32(modrm, b);
3284 CYCLES(1); // TODO: correct cycle count
3287 void I386_OPS_BASE::SSEOP(movq2dq_r128_r64)() // Opcode f3 0f d6
3290 UINT8 modrm = FETCH();
3291 if( modrm >= 0xc0 ) {
3292 XMM((modrm >> 3) & 0x7).q[0] = MMX(modrm & 7).q;
3293 XMM((modrm >> 3) & 0x7).q[1] = 0;
3295 CYCLES(1); // TODO: correct cycle count
3298 void I386_OPS_BASE::SSEOP(movdqu_r128_rm128)() // Opcode f3 0f 6f
3301 UINT8 modrm = FETCH();
3302 if( modrm >= 0xc0 ) {
3303 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3304 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1];
3306 UINT32 ea = GetEA(modrm, 0, 16);
3307 READXMM( ea, XMM((modrm >> 3) & 0x7));
3309 CYCLES(1); // TODO: correct cycle count
3312 void I386_OPS_BASE::SSEOP(movdqu_rm128_r128)() // Opcode f3 0f 7f
3315 UINT8 modrm = FETCH();
3316 if( modrm >= 0xc0 ) {
3317 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
3318 XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1];
3320 UINT32 ea = GetEA(modrm, 0, 16);
3321 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3323 CYCLES(1); // TODO: correct cycle count
3326 void I386_OPS_BASE::SSEOP(movd_m128_rm32)() // Opcode 66 0f 6e
3328 UINT8 modrm = FETCH();
3329 if (modrm >= 0xc0) {
3330 XMM((modrm >> 3) & 0x7).d[0] = LOAD_RM32(modrm);
3333 UINT32 ea = GetEA(modrm, 0, 4);
3334 XMM((modrm >> 3) & 0x7).d[0] = READ32(ea);
3336 XMM((modrm >> 3) & 0x7).d[1] = 0;
3337 XMM((modrm >> 3) & 0x7).q[1] = 0;
3338 CYCLES(1); // TODO: correct cycle count
3341 void I386_OPS_BASE::SSEOP(movdqa_m128_rm128)() // Opcode 66 0f 6f
3343 UINT8 modrm = FETCH();
3344 if (modrm >= 0xc0) {
3345 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3346 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1];
3349 UINT32 ea = GetEA(modrm, 0, 16);
3350 READXMM( ea, XMM((modrm >> 3) & 0x7));
3352 CYCLES(1); // TODO: correct cycle count
3355 void I386_OPS_BASE::SSEOP(movq_r128_r128m64)() // Opcode f3 0f 7e
3358 UINT8 modrm = FETCH();
3359 if( modrm >= 0xc0 ) {
3360 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3361 XMM((modrm >> 3) & 0x7).q[1] = 0;
3363 UINT32 ea = GetEA(modrm, 0, 8);
3364 XMM((modrm >> 3) & 0x7).q[0] = READ64( ea);
3365 XMM((modrm >> 3) & 0x7).q[1] = 0;
3367 CYCLES(1); // TODO: correct cycle count
3370 void I386_OPS_BASE::SSEOP(movd_rm32_r128)() // Opcode 66 0f 7e
3372 UINT8 modrm = FETCH();
3373 if (modrm >= 0xc0) {
3374 STORE_RM32(modrm, XMM((modrm >> 3) & 0x7).d[0]);
3377 UINT32 ea = GetEA(modrm, 0, 4);
3378 WRITE32(ea, XMM((modrm >> 3) & 0x7).d[0]);
3380 CYCLES(1); // TODO: correct cycle count
3383 void I386_OPS_BASE::SSEOP(movdqa_rm128_r128)() // Opcode 66 0f 7f
3385 UINT8 modrm = FETCH();
3386 if (modrm >= 0xc0) {
3387 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
3388 XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1];
3391 UINT32 ea = GetEA(modrm, 0, 16);
3392 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
3394 CYCLES(1); // TODO: correct cycle count
3397 void I386_OPS_BASE::SSEOP(pmovmskb_r16_r64)() // Opcode 0f d7
3400 UINT8 modrm = FETCH();
3401 if( modrm >= 0xc0 ) {
3403 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
3404 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
3405 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
3406 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
3407 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
3408 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
3409 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
3410 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
3411 STORE_REG16(modrm, b);
3413 CYCLES(1); // TODO: correct cycle count
3416 void I386_OPS_BASE::SSEOP(pmovmskb_r32_r64)() // Opcode 0f d7
3419 UINT8 modrm = FETCH();
3420 if( modrm >= 0xc0 ) {
3422 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
3423 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
3424 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
3425 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
3426 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
3427 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
3428 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
3429 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
3430 STORE_REG32(modrm, b);
3432 CYCLES(1); // TODO: correct cycle count
3435 void I386_OPS_BASE::SSEOP(pmovmskb_r32_r128)() // Opcode 66 0f d7
3437 UINT8 modrm = FETCH();
3438 if( modrm >= 0xc0 ) {
3440 b=(XMM(modrm & 0x7).b[0] >> 7) & 1;
3441 b=b | ((XMM(modrm & 0x7).b[1] >> 6) & 2);
3442 b=b | ((XMM(modrm & 0x7).b[2] >> 5) & 4);
3443 b=b | ((XMM(modrm & 0x7).b[3] >> 4) & 8);
3444 b=b | ((XMM(modrm & 0x7).b[4] >> 3) & 16);
3445 b=b | ((XMM(modrm & 0x7).b[5] >> 2) & 32);
3446 b=b | ((XMM(modrm & 0x7).b[6] >> 1) & 64);
3447 b=b | ((XMM(modrm & 0x7).b[7] >> 0) & 128);
3448 b=b | ((XMM(modrm & 0x7).b[8] << 1) & 256);
3449 b=b | ((XMM(modrm & 0x7).b[9] << 2) & 512);
3450 b=b | ((XMM(modrm & 0x7).b[10] << 3) & 1024);
3451 b=b | ((XMM(modrm & 0x7).b[11] << 4) & 2048);
3452 b=b | ((XMM(modrm & 0x7).b[12] << 5) & 4096);
3453 b=b | ((XMM(modrm & 0x7).b[13] << 6) & 8192);
3454 b=b | ((XMM(modrm & 0x7).b[14] << 7) & 16384);
3455 b=b | ((XMM(modrm & 0x7).b[15] << 8) & 32768);
3456 STORE_REG32(modrm, b);
3458 CYCLES(1); // TODO: correct cycle count
3461 void I386_OPS_BASE::SSEOP(xorps)() // Opcode 0f 57
3463 UINT8 modrm = FETCH();
3464 if( modrm >= 0xc0 ) {
3465 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ XMM(modrm & 0x7).d[0];
3466 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ XMM(modrm & 0x7).d[1];
3467 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ XMM(modrm & 0x7).d[2];
3468 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ XMM(modrm & 0x7).d[3];
3471 UINT32 ea = GetEA(modrm, 0, 16);
3473 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ src.d[0];
3474 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ src.d[1];
3475 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ src.d[2];
3476 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ src.d[3];
3478 CYCLES(1); // TODO: correct cycle count
3481 void I386_OPS_BASE::SSEOP(xorpd_r128_rm128)() // Opcode 66 0f 57
3483 UINT8 modrm = FETCH();
3484 if( modrm >= 0xc0 ) {
3485 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] ^ XMM(modrm & 0x7).q[0];
3486 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] ^ XMM(modrm & 0x7).q[1];
3489 UINT32 ea = GetEA(modrm, 0, 16);
3491 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] ^ src.q[0];
3492 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] ^ src.q[1];
3494 CYCLES(1); // TODO: correct cycle count
3497 void I386_OPS_BASE::SSEOP(addps)() // Opcode 0f 58
3499 UINT8 modrm = FETCH();
3500 if( modrm >= 0xc0 ) {
3501 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
3502 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + XMM(modrm & 0x7).f[1];
3503 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + XMM(modrm & 0x7).f[2];
3504 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + XMM(modrm & 0x7).f[3];
3507 UINT32 ea = GetEA(modrm, 0, 16);
3509 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
3510 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + src.f[1];
3511 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + src.f[2];
3512 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + src.f[3];
3514 CYCLES(1); // TODO: correct cycle count
3517 void I386_OPS_BASE::SSEOP(sqrtps_r128_rm128)() // Opcode 0f 51
3519 UINT8 modrm = FETCH();
3520 if( modrm >= 0xc0 ) {
3521 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
3522 XMM((modrm >> 3) & 0x7).f[1] = sqrt(XMM(modrm & 0x7).f[1]);
3523 XMM((modrm >> 3) & 0x7).f[2] = sqrt(XMM(modrm & 0x7).f[2]);
3524 XMM((modrm >> 3) & 0x7).f[3] = sqrt(XMM(modrm & 0x7).f[3]);
3527 UINT32 ea = GetEA(modrm, 0, 16);
3529 XMM((modrm >> 3) & 0x7).f[0] = sqrt(src.f[0]);
3530 XMM((modrm >> 3) & 0x7).f[1] = sqrt(src.f[1]);
3531 XMM((modrm >> 3) & 0x7).f[2] = sqrt(src.f[2]);
3532 XMM((modrm >> 3) & 0x7).f[3] = sqrt(src.f[3]);
3534 CYCLES(1); // TODO: correct cycle count
3537 void I386_OPS_BASE::SSEOP(rsqrtps_r128_rm128)() // Opcode 0f 52
3539 UINT8 modrm = FETCH();
3540 if( modrm >= 0xc0 ) {
3541 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
3542 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(XMM(modrm & 0x7).f[1]);
3543 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(XMM(modrm & 0x7).f[2]);
3544 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(XMM(modrm & 0x7).f[3]);
3547 UINT32 ea = GetEA(modrm, 0, 16);
3549 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(src.f[0]);
3550 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(src.f[1]);
3551 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(src.f[2]);
3552 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(src.f[3]);
3554 CYCLES(1); // TODO: correct cycle count
3557 void I386_OPS_BASE::SSEOP(rcpps_r128_rm128)() // Opcode 0f 53
3559 UINT8 modrm = FETCH();
3560 if( modrm >= 0xc0 ) {
3561 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / XMM(modrm & 0x7).f[0];
3562 XMM((modrm >> 3) & 0x7).f[1] = 1.0f / XMM(modrm & 0x7).f[1];
3563 XMM((modrm >> 3) & 0x7).f[2] = 1.0f / XMM(modrm & 0x7).f[2];
3564 XMM((modrm >> 3) & 0x7).f[3] = 1.0f / XMM(modrm & 0x7).f[3];
3567 UINT32 ea = GetEA(modrm, 0, 16);
3569 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / src.f[0];
3570 XMM((modrm >> 3) & 0x7).f[1] = 1.0f / src.f[1];
3571 XMM((modrm >> 3) & 0x7).f[2] = 1.0f / src.f[2];
3572 XMM((modrm >> 3) & 0x7).f[3] = 1.0f / src.f[3];
3574 CYCLES(1); // TODO: correct cycle count
3577 void I386_OPS_BASE::SSEOP(andps_r128_rm128)() // Opcode 0f 54
3579 UINT8 modrm = FETCH();
3580 if( modrm >= 0xc0 ) {
3581 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0];
3582 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1];
3585 UINT32 ea = GetEA(modrm, 0, 16);
3587 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
3588 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
3590 CYCLES(1); // TODO: correct cycle count
3593 void I386_OPS_BASE::SSEOP(andpd_r128_rm128)() // Opcode 66 0f 54
3595 UINT8 modrm = FETCH();
3596 if( modrm >= 0xc0 ) {
3597 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0];
3598 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1];
3601 UINT32 ea = GetEA(modrm, 0, 16);
3603 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
3604 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
3606 CYCLES(1); // TODO: correct cycle count
3609 void I386_OPS_BASE::SSEOP(andnps_r128_rm128)() // Opcode 0f 55
3611 UINT8 modrm = FETCH();
3612 if( modrm >= 0xc0 ) {
3613 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0];
3614 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1];
3617 UINT32 ea = GetEA(modrm, 0, 16);
3619 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
3620 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
3622 CYCLES(1); // TODO: correct cycle count
3625 void I386_OPS_BASE::SSEOP(andnpd_r128_rm128)() // Opcode 66 0f 55
3627 UINT8 modrm = FETCH();
3628 if( modrm >= 0xc0 ) {
3629 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0];
3630 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1];
3633 UINT32 ea = GetEA(modrm, 0, 16);
3635 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
3636 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
3638 CYCLES(1); // TODO: correct cycle count
3641 void I386_OPS_BASE::SSEOP(orps_r128_rm128)() // Opcode 0f 56
3643 UINT8 modrm = FETCH();
3644 if( modrm >= 0xc0 ) {
3645 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0];
3646 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1];
3649 UINT32 ea = GetEA(modrm, 0, 16);
3651 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0];
3652 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1];
3654 CYCLES(1); // TODO: correct cycle count
3657 void I386_OPS_BASE::SSEOP(orpd_r128_rm128)() // Opcode 66 0f 56
3659 UINT8 modrm = FETCH();
3660 if( modrm >= 0xc0 ) {
3661 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0];
3662 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1];
3665 UINT32 ea = GetEA(modrm, 0, 16);
3667 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0];
3668 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1];
3670 CYCLES(1); // TODO: correct cycle count
3673 void I386_OPS_BASE::SSEOP(mulps)() // Opcode 0f 59 ????
3675 UINT8 modrm = FETCH();
3676 if( modrm >= 0xc0 ) {
3677 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3678 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * XMM(modrm & 0x7).f[1];
3679 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * XMM(modrm & 0x7).f[2];
3680 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * XMM(modrm & 0x7).f[3];
3683 UINT32 ea = GetEA(modrm, 0, 16);
3685 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3686 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * src.f[1];
3687 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * src.f[2];
3688 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * src.f[3];
3690 CYCLES(1); // TODO: correct cycle count
3693 void I386_OPS_BASE::SSEOP(subps)() // Opcode 0f 5c
3695 UINT8 modrm = FETCH();
3696 if( modrm >= 0xc0 ) {
3697 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3698 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - XMM(modrm & 0x7).f[1];
3699 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2];
3700 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - XMM(modrm & 0x7).f[3];
3703 UINT32 ea = GetEA(modrm, 0, 16);
3705 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3706 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - src.f[1];
3707 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - src.f[2];
3708 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - src.f[3];
3710 CYCLES(1); // TODO: correct cycle count
3713 INLINE float sse_min_single(float src1, float src2)
3715 /*if ((src1 == 0) && (src2 == 0))
3726 INLINE double sse_min_double(double src1, double src2)
3728 /*if ((src1 == 0) && (src2 == 0))
3739 void I386_OPS_BASE::SSEOP(minps)() // Opcode 0f 5d
3741 UINT8 modrm = FETCH();
3742 if( modrm >= 0xc0 ) {
3743 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3744 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3745 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3746 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3749 UINT32 ea = GetEA(modrm, 0, 16);
3751 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3752 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3753 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3754 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3756 CYCLES(1); // TODO: correct cycle count
3759 void I386_OPS_BASE::SSEOP(divps)() // Opcode 0f 5e
3761 UINT8 modrm = FETCH();
3762 if( modrm >= 0xc0 ) {
3763 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3764 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / XMM(modrm & 0x7).f[1];
3765 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / XMM(modrm & 0x7).f[2];
3766 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / XMM(modrm & 0x7).f[3];
3769 UINT32 ea = GetEA(modrm, 0, 16);
3771 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3772 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / src.f[1];
3773 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / src.f[2];
3774 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / src.f[3];
3776 CYCLES(1); // TODO: correct cycle count
3779 INLINE float sse_max_single(float src1, float src2)
3781 /*if ((src1 == 0) && (src2 == 0))
3792 INLINE double sse_max_double(double src1, double src2)
3794 /*if ((src1 == 0) && (src2 == 0))
3805 void I386_OPS_BASE::SSEOP(maxps)() // Opcode 0f 5f
3807 UINT8 modrm = FETCH();
3808 if( modrm >= 0xc0 ) {
3809 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3810 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3811 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3812 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3815 UINT32 ea = GetEA(modrm, 0, 16);
3817 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3818 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3819 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3820 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3822 CYCLES(1); // TODO: correct cycle count
3825 void I386_OPS_BASE::SSEOP(maxss_r128_r128m32)() // Opcode f3 0f 5f
3827 UINT8 modrm = FETCH();
3828 if( modrm >= 0xc0 ) {
3829 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3832 UINT32 ea = GetEA(modrm, 0, 4);
3833 src.d[0]=READ32(ea);
3834 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3836 CYCLES(1); // TODO: correct cycle count
3839 void I386_OPS_BASE::SSEOP(addss)() // Opcode f3 0f 58
3841 UINT8 modrm = FETCH();
3842 if( modrm >= 0xc0 ) {
3843 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
3846 UINT32 ea = GetEA(modrm, 0, 16);
3848 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
3850 CYCLES(1); // TODO: correct cycle count
3853 void I386_OPS_BASE::SSEOP(subss)() // Opcode f3 0f 5c
3855 UINT8 modrm = FETCH();
3856 if( modrm >= 0xc0 ) {
3857 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3860 UINT32 ea = GetEA(modrm, 0, 16);
3862 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3864 CYCLES(1); // TODO: correct cycle count
3867 void I386_OPS_BASE::SSEOP(mulss)() // Opcode f3 0f 5e
3869 UINT8 modrm = FETCH();
3870 if( modrm >= 0xc0 ) {
3871 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3874 UINT32 ea = GetEA(modrm, 0, 16);
3876 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3878 CYCLES(1); // TODO: correct cycle count
3881 void I386_OPS_BASE::SSEOP(divss)() // Opcode 0f 59
3883 UINT8 modrm = FETCH();
3884 if( modrm >= 0xc0 ) {
3885 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3888 UINT32 ea = GetEA(modrm, 0, 16);
3890 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3892 CYCLES(1); // TODO: correct cycle count
3895 void I386_OPS_BASE::SSEOP(rcpss_r128_r128m32)() // Opcode f3 0f 53
3897 UINT8 modrm = FETCH();
3898 if( modrm >= 0xc0 ) {
3899 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / XMM(modrm & 0x7).f[0];
3902 UINT32 ea = GetEA(modrm, 0, 4);
3904 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / s.f[0];
3906 CYCLES(1); // TODO: correct cycle count
3909 void I386_OPS_BASE::SSEOP(sqrtss_r128_r128m32)() // Opcode f3 0f 51
3911 UINT8 modrm = FETCH();
3912 if( modrm >= 0xc0 ) {
3913 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
3916 UINT32 ea = GetEA(modrm, 0, 4);
3918 XMM((modrm >> 3) & 0x7).f[0] = sqrt(s.f[0]);
3920 CYCLES(1); // TODO: correct cycle count
3923 void I386_OPS_BASE::SSEOP(rsqrtss_r128_r128m32)() // Opcode f3 0f 52
3925 UINT8 modrm = FETCH();
3926 if( modrm >= 0xc0 ) {
3927 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
3930 UINT32 ea = GetEA(modrm, 0, 4);
3932 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(s.f[0]);
3934 CYCLES(1); // TODO: correct cycle count
3937 void I386_OPS_BASE::SSEOP(minss_r128_r128m32)() // Opcode f3 0f 5d
3939 UINT8 modrm = FETCH();
3940 if( modrm >= 0xc0 ) {
3941 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < XMM(modrm & 0x7).f[0] ? XMM((modrm >> 3) & 0x7).f[0] : XMM(modrm & 0x7).f[0];
3944 UINT32 ea = GetEA(modrm, 0, 4);
3945 s.d[0] = READ32(ea);
3946 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < s.f[0] ? XMM((modrm >> 3) & 0x7).f[0] : s.f[0];
3948 CYCLES(1); // TODO: correct cycle count
3951 void I386_OPS_BASE::SSEOP(comiss_r128_r128m32)() // Opcode 0f 2f
3954 UINT8 modrm = FETCH();
3955 if( modrm >= 0xc0 ) {
3956 a = XMM((modrm >> 3) & 0x7).d[0];
3957 b = XMM(modrm & 0x7).d[0];
3960 UINT32 ea = GetEA(modrm, 0, 16);
3962 a = XMM((modrm >> 3) & 0x7).d[0];
3968 if (float32_is_nan(a) || float32_is_nan(b))
3979 if (float32_eq(a, b))
3981 if (float32_lt(a, b))
3984 // should generate exception when at least one of the operands is either QNaN or SNaN
3985 CYCLES(1); // TODO: correct cycle count
3988 void I386_OPS_BASE::SSEOP(comisd_r128_r128m64)() // Opcode 66 0f 2f
3991 UINT8 modrm = FETCH();
3992 if( modrm >= 0xc0 ) {
3993 a = XMM((modrm >> 3) & 0x7).q[0];
3994 b = XMM(modrm & 0x7).q[0];
3997 UINT32 ea = GetEA(modrm, 0, 16);
3999 a = XMM((modrm >> 3) & 0x7).q[0];
4005 if (float64_is_nan(a) || float64_is_nan(b))
4016 if (float64_eq(a, b))
4018 if (float64_lt(a, b))
4021 // should generate exception when at least one of the operands is either QNaN or SNaN
4022 CYCLES(1); // TODO: correct cycle count
4025 void I386_OPS_BASE::SSEOP(ucomiss_r128_r128m32)() // Opcode 0f 2e
4028 UINT8 modrm = FETCH();
4029 if( modrm >= 0xc0 ) {
4030 a = XMM((modrm >> 3) & 0x7).d[0];
4031 b = XMM(modrm & 0x7).d[0];
4034 UINT32 ea = GetEA(modrm, 0, 16);
4036 a = XMM((modrm >> 3) & 0x7).d[0];
4042 if (float32_is_nan(a) || float32_is_nan(b))
4053 if (float32_eq(a, b))
4055 if (float32_lt(a, b))
4058 // should generate exception when at least one of the operands is SNaN
4059 CYCLES(1); // TODO: correct cycle count
4062 void I386_OPS_BASE::SSEOP(ucomisd_r128_r128m64)() // Opcode 66 0f 2e
4065 UINT8 modrm = FETCH();
4066 if( modrm >= 0xc0 ) {
4067 a = XMM((modrm >> 3) & 0x7).q[0];
4068 b = XMM(modrm & 0x7).q[0];
4071 UINT32 ea = GetEA(modrm, 0, 16);
4073 a = XMM((modrm >> 3) & 0x7).q[0];
4079 if (float64_is_nan(a) || float64_is_nan(b))
4090 if (float64_eq(a, b))
4092 if (float64_lt(a, b))
4095 // should generate exception when at least one of the operands is SNaN
4096 CYCLES(1); // TODO: correct cycle count
4099 void I386_OPS_BASE::SSEOP(shufps)() // Opcode 0f c6
4101 UINT8 modrm = FETCH();
4102 UINT8 sel = FETCH();
4110 d=(modrm >> 3) & 0x7;
4111 if( modrm >= 0xc0 ) {
4124 UINT32 ea = GetEA(modrm, 0, 16);
4130 XMM(d).d[2]=src.d[m3];
4131 XMM(d).d[3]=src.d[m4];
4133 CYCLES(1); // TODO: correct cycle count
4136 void I386_OPS_BASE::SSEOP(shufpd_r128_rm128_i8)() // Opcode 66 0f c6
4138 UINT8 modrm = FETCH();
4139 UINT8 sel = FETCH();
4145 d=(modrm >> 3) & 0x7;
4146 if( modrm >= 0xc0 ) {
4155 UINT32 ea = GetEA(modrm, 0, 16);
4159 XMM(d).q[1]=src.q[m2];
4161 CYCLES(1); // TODO: correct cycle count
4164 void I386_OPS_BASE::SSEOP(unpcklps_r128_rm128)() // Opcode 0f 14
4166 UINT8 modrm = FETCH();
4168 UINT32 t1, t2, t3, t4;
4170 d=(modrm >> 3) & 0x7;
4171 if( modrm >= 0xc0 ) {
4182 UINT32 ea = GetEA(modrm, 0, 16);
4185 XMM(d).d[3]=src.d[1];
4187 XMM(d).d[1]=src.d[0];
4189 CYCLES(1); // TODO: correct cycle count
4192 void I386_OPS_BASE::SSEOP(unpcklpd_r128_rm128)() // Opcode 66 0f 14
4194 UINT8 modrm = FETCH();
4197 d=(modrm >> 3) & 0x7;
4198 if( modrm >= 0xc0 ) {
4199 XMM(d).q[1]=XMM(s).q[0];
4200 XMM(d).q[0]=XMM(d).q[0];
4203 UINT32 ea = GetEA(modrm, 0, 16);
4205 XMM(d).q[1]=src.q[0];
4206 XMM(d).q[0]=XMM(d).q[0];
4208 CYCLES(1); // TODO: correct cycle count
4211 void I386_OPS_BASE::SSEOP(unpckhps_r128_rm128)() // Opcode 0f 15
4213 UINT8 modrm = FETCH();
4215 UINT32 t1, t2, t3, t4;
4217 d=(modrm >> 3) & 0x7;
4218 if( modrm >= 0xc0 ) {
4229 UINT32 ea = GetEA(modrm, 0, 16);
4234 XMM(d).d[1]=src.d[2];
4236 XMM(d).d[3]=src.d[3];
4238 CYCLES(1); // TODO: correct cycle count
4241 void I386_OPS_BASE::SSEOP(unpckhpd_r128_rm128)() // Opcode 66 0f 15
4243 UINT8 modrm = FETCH();
4246 d=(modrm >> 3) & 0x7;
4247 if( modrm >= 0xc0 ) {
4248 XMM(d).q[0]=XMM(d).q[1];
4249 XMM(d).q[1]=XMM(s).q[1];
4252 UINT32 ea = GetEA(modrm, 0, 16);
4254 XMM(d).q[0]=XMM(d).q[1];
4255 XMM(d).q[1]=src.q[1];
4257 CYCLES(1); // TODO: correct cycle count
4260 INLINE bool sse_issingleordered(float op1, float op2)
4262 // TODO: true when at least one of the two source operands being compared is a NaN
4263 return (op1 != op1) || (op1 != op2);
4266 INLINE bool sse_issingleunordered(float op1, float op2)
4268 // TODO: true when neither source operand is a NaN
4269 return !((op1 != op1) || (op1 != op2));
4272 INLINE bool sse_isdoubleordered(double op1, double op2)
4274 // TODO: true when at least one of the two source operands being compared is a NaN
4275 return (op1 != op1) || (op1 != op2);
4278 INLINE bool sse_isdoubleunordered(double op1, double op2)
4280 // TODO: true when neither source operand is a NaN
4281 return !((op1 != op1) || (op1 != op2));
4284 void I386_OPS_BASE::SSEOP(predicate_compare_single)(UINT8 imm8, XMM_REG d, XMM_REG s)
4289 d.d[0]=d.f[0] == s.f[0] ? 0xffffffff : 0;
4290 d.d[1]=d.f[1] == s.f[1] ? 0xffffffff : 0;
4291 d.d[2]=d.f[2] == s.f[2] ? 0xffffffff : 0;
4292 d.d[3]=d.f[3] == s.f[3] ? 0xffffffff : 0;
4295 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
4296 d.d[1]=d.f[1] < s.f[1] ? 0xffffffff : 0;
4297 d.d[2]=d.f[2] < s.f[2] ? 0xffffffff : 0;
4298 d.d[3]=d.f[3] < s.f[3] ? 0xffffffff : 0;
4301 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
4302 d.d[1]=d.f[1] <= s.f[1] ? 0xffffffff : 0;
4303 d.d[2]=d.f[2] <= s.f[2] ? 0xffffffff : 0;
4304 d.d[3]=d.f[3] <= s.f[3] ? 0xffffffff : 0;
4307 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4308 d.d[1]=sse_issingleunordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
4309 d.d[2]=sse_issingleunordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
4310 d.d[3]=sse_issingleunordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
4313 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
4314 d.d[1]=d.f[1] != s.f[1] ? 0xffffffff : 0;
4315 d.d[2]=d.f[2] != s.f[2] ? 0xffffffff : 0;
4316 d.d[3]=d.f[3] != s.f[3] ? 0xffffffff : 0;
4319 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
4320 d.d[1]=d.f[1] < s.f[1] ? 0 : 0xffffffff;
4321 d.d[2]=d.f[2] < s.f[2] ? 0 : 0xffffffff;
4322 d.d[3]=d.f[3] < s.f[3] ? 0 : 0xffffffff;
4325 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
4326 d.d[1]=d.f[1] <= s.f[1] ? 0 : 0xffffffff;
4327 d.d[2]=d.f[2] <= s.f[2] ? 0 : 0xffffffff;
4328 d.d[3]=d.f[3] <= s.f[3] ? 0 : 0xffffffff;
4331 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4332 d.d[1]=sse_issingleordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
4333 d.d[2]=sse_issingleordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
4334 d.d[3]=sse_issingleordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
4339 void I386_OPS_BASE::SSEOP(predicate_compare_double)(UINT8 imm8, XMM_REG d, XMM_REG s)
4344 d.q[0]=d.f64[0] == s.f64[0] ? U64(0xffffffffffffffff) : 0;
4345 d.q[1]=d.f64[1] == s.f64[1] ? U64(0xffffffffffffffff) : 0;
4348 d.q[0]=d.f64[0] < s.f64[0] ? U64(0xffffffffffffffff) : 0;
4349 d.q[1]=d.f64[1] < s.f64[1] ? U64(0xffffffffffffffff) : 0;
4352 d.q[0]=d.f64[0] <= s.f64[0] ? U64(0xffffffffffffffff) : 0;
4353 d.q[1]=d.f64[1] <= s.f64[1] ? U64(0xffffffffffffffff) : 0;
4356 d.q[0]=sse_isdoubleunordered(d.f64[0], s.f64[0]) ? U64(0xffffffffffffffff) : 0;
4357 d.q[1]=sse_isdoubleunordered(d.f64[1], s.f64[1]) ? U64(0xffffffffffffffff) : 0;
4360 d.q[0]=d.f64[0] != s.f64[0] ? U64(0xffffffffffffffff) : 0;
4361 d.q[1]=d.f64[1] != s.f64[1] ? U64(0xffffffffffffffff) : 0;
4364 d.q[0]=d.f64[0] < s.f64[0] ? 0 : U64(0xffffffffffffffff);
4365 d.q[1]=d.f64[1] < s.f64[1] ? 0 : U64(0xffffffffffffffff);
4368 d.q[0]=d.f64[0] <= s.f64[0] ? 0 : U64(0xffffffffffffffff);
4369 d.q[1]=d.f64[1] <= s.f64[1] ? 0 : U64(0xffffffffffffffff);
4372 d.q[0]=sse_isdoubleordered(d.f64[0], s.f64[0]) ? U64(0xffffffffffffffff) : 0;
4373 d.q[1]=sse_isdoubleordered(d.f64[1], s.f64[1]) ? U64(0xffffffffffffffff) : 0;
4378 void I386_OPS_BASE::SSEOP(predicate_compare_single_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
4383 d.d[0]=d.f[0] == s.f[0] ? 0xffffffff : 0;
4386 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
4389 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
4392 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4395 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
4398 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
4401 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
4404 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4409 void I386_OPS_BASE::SSEOP(predicate_compare_double_scalar)(UINT8 imm8, XMM_REG d, XMM_REG s)
4414 d.q[0]=d.f64[0] == s.f64[0] ? 0xffffffffffffffff : 0;
4417 d.q[0]=d.f64[0] < s.f64[0] ? 0xffffffffffffffff : 0;
4420 d.q[0]=d.f64[0] <= s.f64[0] ? 0xffffffffffffffff : 0;
4423 d.q[0]=sse_isdoubleunordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffff : 0;
4426 d.q[0]=d.f64[0] != s.f64[0] ? 0xffffffffffffffff : 0;
4429 d.q[0]=d.f64[0] < s.f64[0] ? 0 : 0xffffffffffffffff;
4432 d.q[0]=d.f64[0] <= s.f64[0] ? 0 : 0xffffffffffffffff;
4435 d.q[0]=sse_isdoubleordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffff : 0;
4440 void I386_OPS_BASE::SSEOP(cmpps_r128_rm128_i8)() // Opcode 0f c2
4442 UINT8 modrm = FETCH();
4443 if( modrm >= 0xc0 ) {
4445 UINT8 imm8 = FETCH();
4447 d=(modrm >> 3) & 0x7;
4448 sse_predicate_compare_single(imm8, XMM(d), XMM(s));
4452 UINT32 ea = GetEA(modrm, 0, 16);
4453 UINT8 imm8 = FETCH();
4455 d=(modrm >> 3) & 0x7;
4456 sse_predicate_compare_single(imm8, XMM(d), s);
4458 CYCLES(1); // TODO: correct cycle count
4461 void I386_OPS_BASE::SSEOP(cmppd_r128_rm128_i8)() // Opcode 66 0f c2
4463 UINT8 modrm = FETCH();
4464 if( modrm >= 0xc0 ) {
4466 UINT8 imm8 = FETCH();
4468 d=(modrm >> 3) & 0x7;
4469 sse_predicate_compare_double(imm8, XMM(d), XMM(s));
4473 UINT32 ea = GetEA(modrm, 0, 16);
4474 UINT8 imm8 = FETCH();
4476 d=(modrm >> 3) & 0x7;
4477 sse_predicate_compare_double(imm8, XMM(d), s);
4479 CYCLES(1); // TODO: correct cycle count
4482 void I386_OPS_BASE::SSEOP(cmpss_r128_r128m32_i8)() // Opcode f3 0f c2
4484 UINT8 modrm = FETCH();
4485 if( modrm >= 0xc0 ) {
4487 UINT8 imm8 = FETCH();
4489 d=(modrm >> 3) & 0x7;
4490 sse_predicate_compare_single_scalar(imm8, XMM(d), XMM(s));
4494 UINT32 ea = GetEA(modrm, 0, 4);
4495 UINT8 imm8 = FETCH();
4497 d=(modrm >> 3) & 0x7;
4498 sse_predicate_compare_single_scalar(imm8, XMM(d), s);
4500 CYCLES(1); // TODO: correct cycle count
4503 void I386_OPS_BASE::SSEOP(pinsrw_r64_r16m16_i8)() // Opcode 0f c4, 16bit register
4506 UINT8 modrm = FETCH();
4507 if( modrm >= 0xc0 ) {
4508 UINT8 imm8 = FETCH();
4509 UINT16 v = LOAD_RM16(modrm);
4510 if (cpustate->xmm_operand_size)
4511 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4513 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4515 UINT32 ea = GetEA(modrm, 0, 2);
4516 UINT8 imm8 = FETCH();
4517 UINT16 v = READ16(ea);
4518 if (cpustate->xmm_operand_size)
4519 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4521 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4523 CYCLES(1); // TODO: correct cycle count
4526 void I386_OPS_BASE::SSEOP(pinsrw_r64_r32m16_i8)() // Opcode 0f c4, 32bit register
4529 UINT8 modrm = FETCH();
4530 if( modrm >= 0xc0 ) {
4531 UINT8 imm8 = FETCH();
4532 UINT16 v = (UINT16)LOAD_RM32(modrm);
4533 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4535 UINT32 ea = GetEA(modrm, 0, 2);
4536 UINT8 imm8 = FETCH();
4537 UINT16 v = READ16(ea);
4538 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4540 CYCLES(1); // TODO: correct cycle count
4543 void I386_OPS_BASE::SSEOP(pinsrw_r128_r32m16_i8)() // Opcode 66 0f c4
4545 UINT8 modrm = FETCH();
4546 if (modrm >= 0xc0) {
4547 UINT8 imm8 = FETCH();
4548 UINT16 v = (UINT16)LOAD_RM32(modrm);
4549 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4552 UINT32 ea = GetEA(modrm, 0, 2);
4553 UINT8 imm8 = FETCH();
4554 UINT16 v = READ16(ea);
4555 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4557 CYCLES(1); // TODO: correct cycle count
4560 void I386_OPS_BASE::SSEOP(pextrw_r16_r64_i8)() // Opcode 0f c5
4563 UINT8 modrm = FETCH();
4564 if( modrm >= 0xc0 ) {
4565 UINT8 imm8 = FETCH();
4566 if (cpustate->xmm_operand_size)
4567 STORE_REG16(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
4569 STORE_REG16(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
4571 //UINT8 imm8 = FETCH();
4572 report_invalid_modrm( "pextrw_r16_r64_i8", modrm);
4574 CYCLES(1); // TODO: correct cycle count
4577 void I386_OPS_BASE::SSEOP(pextrw_r32_r64_i8)() // Opcode 0f c5
4580 UINT8 modrm = FETCH();
4581 if( modrm >= 0xc0 ) {
4582 UINT8 imm8 = FETCH();
4583 STORE_REG32(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
4585 //UINT8 imm8 = FETCH();
4586 report_invalid_modrm( "pextrw_r32_r64_i8", modrm);
4588 CYCLES(1); // TODO: correct cycle count
4591 void I386_OPS_BASE::SSEOP(pextrw_reg_r128_i8)() // Opcode 66 0f c5
4593 UINT8 modrm = FETCH();
4594 if (modrm >= 0xc0) {
4595 UINT8 imm8 = FETCH();
4596 STORE_REG32(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
4599 //UINT8 imm8 = FETCH();
4600 report_invalid_modrm( "sse_pextrw_reg_r128_i8", modrm);
4602 CYCLES(1); // TODO: correct cycle count
4605 void I386_OPS_BASE::SSEOP(pminub_r64_rm64)() // Opcode 0f da
4609 UINT8 modrm = FETCH();
4610 if( modrm >= 0xc0 ) {
4612 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
4615 UINT32 ea = GetEA(modrm, 0, 8);
4618 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
4620 CYCLES(1); // TODO: correct cycle count
4623 void I386_OPS_BASE::SSEOP(pminub_r128_rm128)() // Opcode 66 0f da
4625 UINT8 modrm = FETCH();
4626 if( modrm >= 0xc0 ) {
4627 for (int n=0;n < 16;n++)
4628 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] < XMM(modrm & 0x7).b[n] ? XMM((modrm >> 3) & 0x7).b[n] : XMM(modrm & 0x7).b[n];
4631 UINT32 ea = GetEA(modrm, 0, 16);
4633 for (int n=0;n < 16;n++)
4634 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] < s.b[n] ? XMM((modrm >> 3) & 0x7).b[n] : s.b[n];
4636 CYCLES(1); // TODO: correct cycle count
4639 void I386_OPS_BASE::SSEOP(pmaxub_r64_rm64)() // Opcode 0f de
4643 UINT8 modrm = FETCH();
4644 if( modrm >= 0xc0 ) {
4646 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
4649 UINT32 ea = GetEA(modrm, 0, 8);
4652 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
4654 CYCLES(1); // TODO: correct cycle count
4657 void I386_OPS_BASE::SSEOP(pavgb_r64_rm64)() // Opcode 0f e0
4661 UINT8 modrm = FETCH();
4662 if( modrm >= 0xc0 ) {
4664 MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)MMX(modrm & 0x7).b[n] + 1) >> 1;
4667 UINT32 ea = GetEA(modrm, 0, 8);
4670 MMX((modrm >> 3) & 0x7).b[n] = ((UINT16)MMX((modrm >> 3) & 0x7).b[n] + (UINT16)s.b[n] + 1) >> 1;
4672 CYCLES(1); // TODO: correct cycle count
4675 void I386_OPS_BASE::SSEOP(pavgw_r64_rm64)() // Opcode 0f e3
4679 UINT8 modrm = FETCH();
4680 if( modrm >= 0xc0 ) {
4682 MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)MMX(modrm & 0x7).w[n] + 1) >> 1;
4685 UINT32 ea = GetEA(modrm, 0, 8);
4688 MMX((modrm >> 3) & 0x7).w[n] = ((UINT32)MMX((modrm >> 3) & 0x7).w[n] + (UINT32)s.w[n] + 1) >> 1;
4690 CYCLES(1); // TODO: correct cycle count
4693 void I386_OPS_BASE::SSEOP(pmulhuw_r64_rm64)() // Opcode 0f e4
4696 UINT8 modrm = FETCH();
4697 if( modrm >= 0xc0 ) {
4698 MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)MMX(modrm & 7).w[0]) >> 16;
4699 MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)MMX(modrm & 7).w[1]) >> 16;
4700 MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)MMX(modrm & 7).w[2]) >> 16;
4701 MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)MMX(modrm & 7).w[3]) >> 16;
4704 UINT32 ea = GetEA(modrm, 0, 8);
4706 MMX((modrm >> 3) & 0x7).w[0]=((UINT32)MMX((modrm >> 3) & 0x7).w[0]*(UINT32)s.w[0]) >> 16;
4707 MMX((modrm >> 3) & 0x7).w[1]=((UINT32)MMX((modrm >> 3) & 0x7).w[1]*(UINT32)s.w[1]) >> 16;
4708 MMX((modrm >> 3) & 0x7).w[2]=((UINT32)MMX((modrm >> 3) & 0x7).w[2]*(UINT32)s.w[2]) >> 16;
4709 MMX((modrm >> 3) & 0x7).w[3]=((UINT32)MMX((modrm >> 3) & 0x7).w[3]*(UINT32)s.w[3]) >> 16;
4711 CYCLES(1); // TODO: correct cycle count
4714 void I386_OPS_BASE::SSEOP(pminsw_r64_rm64)() // Opcode 0f ea
4718 UINT8 modrm = FETCH();
4719 if( modrm >= 0xc0 ) {
4721 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
4724 UINT32 ea = GetEA(modrm, 0, 8);
4727 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
4729 CYCLES(1); // TODO: correct cycle count
4732 void I386_OPS_BASE::SSEOP(pmaxsw_r64_rm64)() // Opcode 0f ee
4736 UINT8 modrm = FETCH();
4737 if( modrm >= 0xc0 ) {
4739 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
4742 UINT32 ea = GetEA(modrm, 0, 8);
4745 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
4747 CYCLES(1); // TODO: correct cycle count
4750 void I386_OPS_BASE::SSEOP(pmuludq_r64_rm64)() // Opcode 0f f4
4753 UINT8 modrm = FETCH();
4754 if( modrm >= 0xc0 ) {
4755 MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)MMX(modrm & 0x7).d[0];
4758 UINT32 ea = GetEA(modrm, 0, 8);
4760 MMX((modrm >> 3) & 0x7).q = (UINT64)MMX((modrm >> 3) & 0x7).d[0] * (UINT64)s.d[0];
4762 CYCLES(1); // TODO: correct cycle count
4765 void I386_OPS_BASE::SSEOP(pmuludq_r128_rm128)() // Opcode 66 0f f4
4767 UINT8 modrm = FETCH();
4768 if( modrm >= 0xc0 ) {
4769 XMM((modrm >> 3) & 0x7).q[0] = (UINT64)XMM((modrm >> 3) & 0x7).d[0] * (UINT64)XMM(modrm & 0x7).d[0];
4770 XMM((modrm >> 3) & 0x7).q[1] = (UINT64)XMM((modrm >> 3) & 0x7).d[2] * (UINT64)XMM(modrm & 0x7).d[2];
4773 UINT32 ea = GetEA(modrm, 0, 16);
4775 XMM((modrm >> 3) & 0x7).q[0] = (UINT64)XMM((modrm >> 3) & 0x7).d[0] * (UINT64)s.d[0];
4776 XMM((modrm >> 3) & 0x7).q[1] = (UINT64)XMM((modrm >> 3) & 0x7).d[2] * (UINT64)s.d[2];
4778 CYCLES(1); // TODO: correct cycle count
4781 void I386_OPS_BASE::SSEOP(psadbw_r64_rm64)() // Opcode 0f f6
4786 UINT8 modrm = FETCH();
4787 if( modrm >= 0xc0 ) {
4790 temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)MMX(modrm & 0x7).b[n]);
4791 MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff;
4794 UINT32 ea = GetEA(modrm, 0, 8);
4798 temp += abs((INT32)MMX((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]);
4799 MMX((modrm >> 3) & 0x7).l=(UINT64)temp & 0xffff;
4801 CYCLES(1); // TODO: correct cycle count
4804 void I386_OPS_BASE::SSEOP(psubq_r64_rm64)() // Opcode 0f fb
4807 UINT8 modrm = FETCH();
4808 if( modrm >= 0xc0 ) {
4809 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - MMX(modrm & 7).q;
4812 UINT32 ea = GetEA(modrm, 0, 8);
4814 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - s.q;
4816 CYCLES(1); // TODO: correct cycle count
4819 void I386_OPS_BASE::SSEOP(psubq_r128_rm128)() // Opcode 66 0f fb
4821 UINT8 modrm = FETCH();
4822 if( modrm >= 0xc0 ) {
4823 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] - XMM(modrm & 7).q[0];
4824 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] - XMM(modrm & 7).q[1];
4827 UINT32 ea = GetEA(modrm, 0, 16);
4829 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] - s.q[0];
4830 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] - s.q[1];
4832 CYCLES(1); // TODO: correct cycle count
4835 void I386_OPS_BASE::SSEOP(pshufd_r128_rm128_i8)() // Opcode 66 0f 70
4837 UINT8 modrm = FETCH();
4838 if( modrm >= 0xc0 ) {
4841 UINT8 imm8 = FETCH();
4843 d=(modrm >> 3) & 0x7;
4846 XMM(d).d[0]=t.d[imm8 & 3];
4847 XMM(d).d[1]=t.d[(imm8 >> 2) & 3];
4848 XMM(d).d[2]=t.d[(imm8 >> 4) & 3];
4849 XMM(d).d[3]=t.d[(imm8 >> 6) & 3];
4852 int d=(modrm >> 3) & 0x7;
4853 UINT32 ea = GetEA(modrm, 0, 16);
4854 UINT8 imm8 = FETCH();
4856 XMM(d).d[0]=s.d[(imm8 & 3)];
4857 XMM(d).d[1]=s.d[((imm8 >> 2) & 3)];
4858 XMM(d).d[2]=s.d[((imm8 >> 4) & 3)];
4859 XMM(d).d[3]=s.d[((imm8 >> 6) & 3)];
4861 CYCLES(1); // TODO: correct cycle count
4864 void I386_OPS_BASE::SSEOP(pshuflw_r128_rm128_i8)() // Opcode f2 0f 70
4866 UINT8 modrm = FETCH();
4867 if( modrm >= 0xc0 ) {
4870 UINT8 imm8 = FETCH();
4872 d=(modrm >> 3) & 0x7;
4874 XMM(d).q[1]=XMM(s).q[1];
4875 XMM(d).w[0]=t.w[imm8 & 3];
4876 XMM(d).w[1]=t.w[(imm8 >> 2) & 3];
4877 XMM(d).w[2]=t.w[(imm8 >> 4) & 3];
4878 XMM(d).w[3]=t.w[(imm8 >> 6) & 3];
4881 int d=(modrm >> 3) & 0x7;
4882 UINT32 ea = GetEA(modrm, 0, 16);
4883 UINT8 imm8 = FETCH();
4886 XMM(d).w[0]=s.w[imm8 & 3];
4887 XMM(d).w[1]=s.w[(imm8 >> 2) & 3];
4888 XMM(d).w[2]=s.w[(imm8 >> 4) & 3];
4889 XMM(d).w[3]=s.w[(imm8 >> 6) & 3];
4891 CYCLES(1); // TODO: correct cycle count
4894 void I386_OPS_BASE::SSEOP(pshufhw_r128_rm128_i8)() // Opcode f3 0f 70
4896 UINT8 modrm = FETCH();
4897 if( modrm >= 0xc0 ) {
4900 UINT8 imm8 = FETCH();
4902 d=(modrm >> 3) & 0x7;
4904 XMM(d).q[0]=XMM(s).q[0];
4905 XMM(d).w[4]=t.w[imm8 & 3];
4906 XMM(d).w[5]=t.w[(imm8 >> 2) & 3];
4907 XMM(d).w[6]=t.w[(imm8 >> 4) & 3];
4908 XMM(d).w[7]=t.w[(imm8 >> 6) & 3];
4911 int d=(modrm >> 3) & 0x7;
4912 UINT32 ea = GetEA(modrm, 0, 16);
4913 UINT8 imm8 = FETCH();
4916 XMM(d).w[4]=s.w[4 + (imm8 & 3)];
4917 XMM(d).w[5]=s.w[4 + ((imm8 >> 2) & 3)];
4918 XMM(d).w[6]=s.w[4 + ((imm8 >> 4) & 3)];
4919 XMM(d).w[7]=s.w[4 + ((imm8 >> 6) & 3)];
4921 CYCLES(1); // TODO: correct cycle count
4924 void I386_OPS_BASE::SSEOP(packsswb_r128_rm128)() // Opcode 66 0f 63
4926 UINT8 modrm = FETCH();
4927 if (modrm >= 0xc0) {
4931 d = (modrm >> 3) & 0x7;
4932 t.q[0] = XMM(s).q[0];
4933 t.q[1] = XMM(s).q[1];
4934 for (int n = 0; n < 8; n++)
4935 XMM(d).c[n] = SaturatedSignedWordToSignedByte(XMM(d).s[n]);
4936 for (int n = 0; n < 8; n++)
4937 XMM(d).c[n+8] = SaturatedSignedWordToSignedByte(t.s[n]);
4941 int d = (modrm >> 3) & 0x7;
4942 UINT32 ea = GetEA(modrm, 0, 16);
4944 for (int n = 0; n < 8; n++)
4945 XMM(d).c[n] = SaturatedSignedWordToSignedByte(XMM(d).s[n]);
4946 for (int n = 0; n < 8; n++)
4947 XMM(d).c[n + 8] = SaturatedSignedWordToSignedByte(s.s[n]);
4949 CYCLES(1); // TODO: correct cycle count
4952 void I386_OPS_BASE::SSEOP(packssdw_r128_rm128)() // Opcode 66 0f 6b
4954 UINT8 modrm = FETCH();
4955 if (modrm >= 0xc0) {
4959 d = (modrm >> 3) & 0x7;
4960 t.q[0] = XMM(s).q[0];
4961 t.q[1] = XMM(s).q[1];
4962 XMM(d).s[0] = SaturatedSignedDwordToSignedWord(XMM(d).i[0]);
4963 XMM(d).s[1] = SaturatedSignedDwordToSignedWord(XMM(d).i[1]);
4964 XMM(d).s[2] = SaturatedSignedDwordToSignedWord(XMM(d).i[2]);
4965 XMM(d).s[3] = SaturatedSignedDwordToSignedWord(XMM(d).i[3]);
4966 XMM(d).s[4] = SaturatedSignedDwordToSignedWord(t.i[0]);
4967 XMM(d).s[5] = SaturatedSignedDwordToSignedWord(t.i[1]);
4968 XMM(d).s[6] = SaturatedSignedDwordToSignedWord(t.i[2]);
4969 XMM(d).s[7] = SaturatedSignedDwordToSignedWord(t.i[3]);
4973 int d = (modrm >> 3) & 0x7;
4974 UINT32 ea = GetEA(modrm, 0, 16);
4976 XMM(d).s[0] = SaturatedSignedDwordToSignedWord(XMM(d).i[0]);
4977 XMM(d).s[1] = SaturatedSignedDwordToSignedWord(XMM(d).i[1]);
4978 XMM(d).s[2] = SaturatedSignedDwordToSignedWord(XMM(d).i[2]);
4979 XMM(d).s[3] = SaturatedSignedDwordToSignedWord(XMM(d).i[3]);
4980 XMM(d).s[4] = SaturatedSignedDwordToSignedWord(s.i[0]);
4981 XMM(d).s[5] = SaturatedSignedDwordToSignedWord(s.i[1]);
4982 XMM(d).s[6] = SaturatedSignedDwordToSignedWord(s.i[2]);
4983 XMM(d).s[7] = SaturatedSignedDwordToSignedWord(s.i[3]);
4985 CYCLES(1); // TODO: correct cycle count
4988 void I386_OPS_BASE::SSEOP(pcmpgtb_r128_rm128)() // Opcode 66 0f 64
4990 UINT8 modrm = FETCH();
4991 if( modrm >= 0xc0 ) {
4994 d=(modrm >> 3) & 0x7;
4995 for (int c=0;c <= 15;c++)
4996 XMM(d).b[c]=(XMM(d).c[c] > XMM(s).c[c]) ? 0xff : 0;
4999 int d=(modrm >> 3) & 0x7;
5000 UINT32 ea = GetEA(modrm, 0, 16);
5002 for (int c=0;c <= 15;c++)
5003 XMM(d).b[c]=(XMM(d).c[c] > s.c[c]) ? 0xff : 0;
5005 CYCLES(1); // TODO: correct cycle count
5008 void I386_OPS_BASE::SSEOP(pcmpgtw_r128_rm128)() // Opcode 66 0f 65
5010 UINT8 modrm = FETCH();
5011 if( modrm >= 0xc0 ) {
5014 d=(modrm >> 3) & 0x7;
5015 for (int c=0;c <= 7;c++)
5016 XMM(d).w[c]=(XMM(d).s[c] > XMM(s).s[c]) ? 0xffff : 0;
5019 int d=(modrm >> 3) & 0x7;
5020 UINT32 ea = GetEA(modrm, 0, 16);
5022 for (int c=0;c <= 7;c++)
5023 XMM(d).w[c]=(XMM(d).s[c] > s.s[c]) ? 0xffff : 0;
5025 CYCLES(1); // TODO: correct cycle count
5028 void I386_OPS_BASE::SSEOP(pcmpgtd_r128_rm128)() // Opcode 66 0f 66
5030 UINT8 modrm = FETCH();
5031 if( modrm >= 0xc0 ) {
5034 d=(modrm >> 3) & 0x7;
5035 for (int c=0;c <= 3;c++)
5036 XMM(d).d[c]=(XMM(d).i[c] > XMM(s).i[c]) ? 0xffffffff : 0;
5039 int d=(modrm >> 3) & 0x7;
5040 UINT32 ea = GetEA(modrm, 0, 16);
5042 for (int c=0;c <= 3;c++)
5043 XMM(d).d[c]=(XMM(d).i[c] > s.i[c]) ? 0xffffffff : 0;
5045 CYCLES(1); // TODO: correct cycle count
5048 void I386_OPS_BASE::SSEOP(packuswb_r128_rm128)() // Opcode 66 0f 67
5050 UINT8 modrm = FETCH();
5051 if( modrm >= 0xc0 ) {
5055 d=(modrm >> 3) & 0x7;
5056 t.q[0] = XMM(s).q[0];
5057 t.q[1] = XMM(s).q[1];
5058 for (int n = 0; n < 8;n++)
5059 XMM(d).b[n]=SaturatedSignedWordToUnsignedByte(XMM(d).s[n]);
5060 for (int n = 0; n < 8;n++)
5061 XMM(d).b[n+8]=SaturatedSignedWordToUnsignedByte(t.s[n]);
5064 int d=(modrm >> 3) & 0x7;
5065 UINT32 ea = GetEA(modrm, 0, 16);
5067 for (int n = 0; n < 8;n++)
5068 XMM(d).b[n]=SaturatedSignedWordToUnsignedByte(XMM(d).s[n]);
5069 for (int n = 0; n < 8;n++)
5070 XMM(d).b[n+8]=SaturatedSignedWordToUnsignedByte(s.s[n]);
5072 CYCLES(1); // TODO: correct cycle count
5075 void I386_OPS_BASE::SSEOP(punpckhbw_r128_rm128)() // Opcode 66 0f 68
5077 UINT8 modrm = FETCH();
5078 if( modrm >= 0xc0 ) {
5082 d=(modrm >> 3) & 0x7;
5083 t.q[1] = XMM(s).q[1];
5084 for (int n = 0; n < 16; n += 2) {
5085 XMM(d).b[n]=XMM(d).b[8+(n >> 1)];
5086 XMM(d).b[n+1]=t.b[8+(n >> 1)];
5090 int d=(modrm >> 3) & 0x7;
5091 UINT32 ea = GetEA(modrm, 0, 16);
5093 for (int n = 0; n < 16; n += 2) {
5094 XMM(d).b[n]=XMM(d).b[8+(n >> 1)];
5095 XMM(d).b[n+1]=s.b[8+(n >> 1)];
5098 CYCLES(1); // TODO: correct cycle count
5101 void I386_OPS_BASE::SSEOP(punpckhwd_r128_rm128)() // Opcode 66 0f 69
5103 UINT8 modrm = FETCH();
5104 if( modrm >= 0xc0 ) {
5108 d=(modrm >> 3) & 0x7;
5109 t.q[1] = XMM(s).q[1];
5110 for (int n = 0; n < 8; n += 2) {
5111 XMM(d).w[n]=XMM(d).w[4+(n >> 1)];
5112 XMM(d).w[n+1]=t.w[4+(n >> 1)];
5116 int d=(modrm >> 3) & 0x7;
5117 UINT32 ea = GetEA(modrm, 0, 16);
5119 for (int n = 0; n < 8; n += 2) {
5120 XMM(d).w[n]=XMM(d).w[4+(n >> 1)];
5121 XMM(d).w[n+1]=s.w[4+(n >> 1)];
5124 CYCLES(1); // TODO: correct cycle count
5127 void I386_OPS_BASE::SSEOP(unpckhdq_r128_rm128)() // Opcode 66 0f 6a
5129 UINT8 modrm = FETCH();
5130 if( modrm >= 0xc0 ) {
5134 d=(modrm >> 3) & 0x7;
5135 t.q[1] = XMM(s).q[1];
5136 XMM(d).d[0]=XMM(d).d[2];
5138 XMM(d).d[2]=XMM(d).d[3];
5142 int d=(modrm >> 3) & 0x7;
5143 UINT32 ea = GetEA(modrm, 0, 16);
5145 XMM(d).d[0]=XMM(d).d[2];
5147 XMM(d).d[2]=XMM(d).d[3];
5150 CYCLES(1); // TODO: correct cycle count
5153 void I386_OPS_BASE::SSEOP(punpckhqdq_r128_rm128)() // Opcode 66 0f 6d
5155 UINT8 modrm = FETCH();
5156 if( modrm >= 0xc0 ) {
5160 d=(modrm >> 3) & 0x7;
5161 t.q[1] = XMM(s).q[1];
5162 XMM(d).q[0]=XMM(d).q[1];
5166 int d=(modrm >> 3) & 0x7;
5167 UINT32 ea = GetEA(modrm, 0, 16);
5169 XMM(d).q[0]=XMM(d).q[1];
5172 CYCLES(1); // TODO: correct cycle count
5175 void I386_OPS_BASE::SSEOP(pcmpeqb_r128_rm128)() // Opcode 66 0f 74
5177 UINT8 modrm = FETCH();
5178 if( modrm >= 0xc0 ) {
5181 d=(modrm >> 3) & 0x7;
5182 for (int c=0;c <= 15;c++)
5183 XMM(d).b[c]=(XMM(d).c[c] == XMM(s).c[c]) ? 0xff : 0;
5186 int d=(modrm >> 3) & 0x7;
5187 UINT32 ea = GetEA(modrm, 0, 16);
5189 for (int c=0;c <= 15;c++)
5190 XMM(d).b[c]=(XMM(d).c[c] == s.c[c]) ? 0xff : 0;
5192 CYCLES(1); // TODO: correct cycle count
5195 void I386_OPS_BASE::SSEOP(pcmpeqw_r128_rm128)() // Opcode 66 0f 75
5197 UINT8 modrm = FETCH();
5198 if( modrm >= 0xc0 ) {
5201 d=(modrm >> 3) & 0x7;
5202 for (int c=0;c <= 7;c++)
5203 XMM(d).w[c]=(XMM(d).s[c] == XMM(s).s[c]) ? 0xffff : 0;
5206 int d=(modrm >> 3) & 0x7;
5207 UINT32 ea = GetEA(modrm, 0, 16);
5209 for (int c=0;c <= 7;c++)
5210 XMM(d).w[c]=(XMM(d).s[c] == s.s[c]) ? 0xffff : 0;
5212 CYCLES(1); // TODO: correct cycle count
5215 void I386_OPS_BASE::SSEOP(pcmpeqd_r128_rm128)() // Opcode 66 0f 76
5217 UINT8 modrm = FETCH();
5218 if( modrm >= 0xc0 ) {
5221 d=(modrm >> 3) & 0x7;
5222 for (int c=0;c <= 3;c++)
5223 XMM(d).d[c]=(XMM(d).i[c] == XMM(s).i[c]) ? 0xffffffff : 0;
5226 int d=(modrm >> 3) & 0x7;
5227 UINT32 ea = GetEA(modrm, 0, 16);
5229 for (int c=0;c <= 3;c++)
5230 XMM(d).d[c]=(XMM(d).i[c] == s.i[c]) ? 0xffffffff : 0;
5232 CYCLES(1); // TODO: correct cycle count
5235 void I386_OPS_BASE::SSEOP(paddq_r128_rm128)() // Opcode 66 0f d4
5237 UINT8 modrm = FETCH();
5238 if( modrm >= 0xc0 ) {
5241 d=(modrm >> 3) & 0x7;
5242 XMM(d).q[0]=XMM(d).q[0]+XMM(s).q[0];
5243 XMM(d).q[1]=XMM(d).q[1]+XMM(s).q[1];
5246 int d=(modrm >> 3) & 0x7;
5247 UINT32 ea = GetEA(modrm, 0, 16);
5249 XMM(d).q[0]=XMM(d).q[0]+src.q[0];
5250 XMM(d).q[1]=XMM(d).q[1]+src.q[1];
5252 CYCLES(1); // TODO: correct cycle count
5255 void I386_OPS_BASE::SSEOP(pmullw_r128_rm128)() // Opcode 66 0f d5
5257 UINT8 modrm = FETCH();
5258 if( modrm >= 0xc0 ) {
5261 d=(modrm >> 3) & 0x7;
5262 for (int n = 0; n < 8;n++)
5263 XMM(d).w[n]=(UINT32)((INT32)XMM(d).s[n]*(INT32)XMM(s).s[n]) & 0xffff;
5267 UINT32 ea = GetEA(modrm, 0, 16);
5269 d=(modrm >> 3) & 0x7;
5270 for (int n = 0; n < 8;n++)
5271 XMM(d).w[n]=(UINT32)((INT32)XMM(d).s[n]*(INT32)src.s[n]) & 0xffff;
5273 CYCLES(1); // TODO: correct cycle count
5276 void I386_OPS_BASE::SSEOP(paddb_r128_rm128)() // Opcode 66 0f fc
5278 UINT8 modrm = FETCH();
5279 if( modrm >= 0xc0 ) {
5280 for (int n=0;n < 16;n++)
5281 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] + XMM(modrm & 7).b[n];
5284 UINT32 ea = GetEA(modrm, 0, 16);
5286 for (int n=0;n < 16;n++)
5287 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] + s.b[n];
5289 CYCLES(1); // TODO: correct cycle count
5292 void I386_OPS_BASE::SSEOP(paddw_r128_rm128)() // Opcode 66 0f fd
5294 UINT8 modrm = FETCH();
5295 if( modrm >= 0xc0 ) {
5296 for (int n=0;n < 8;n++)
5297 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] + XMM(modrm & 7).w[n];
5300 UINT32 ea = GetEA(modrm, 0, 16);
5302 for (int n=0;n < 8;n++)
5303 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] + s.w[n];
5305 CYCLES(1); // TODO: correct cycle count
5308 void I386_OPS_BASE::SSEOP(paddd_r128_rm128)() // Opcode 66 0f fe
5310 UINT8 modrm = FETCH();
5311 if( modrm >= 0xc0 ) {
5312 for (int n=0;n < 4;n++)
5313 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] + XMM(modrm & 7).d[n];
5316 UINT32 ea = GetEA(modrm, 0, 16);
5318 for (int n=0;n < 4;n++)
5319 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] + s.d[n];
5321 CYCLES(1); // TODO: correct cycle count
5324 void I386_OPS_BASE::SSEOP(psubusb_r128_rm128)() // Opcode 66 0f d8
5326 UINT8 modrm = FETCH();
5327 if( modrm >= 0xc0 ) {
5328 for (int n=0;n < 16;n++)
5329 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] < XMM(modrm & 7).b[n] ? 0 : XMM((modrm >> 3) & 0x7).b[n]-XMM(modrm & 7).b[n];
5332 UINT32 ea = GetEA(modrm, 0, 16);
5334 for (int n=0;n < 16;n++)
5335 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : XMM((modrm >> 3) & 0x7).b[n]-src.b[n];
5337 CYCLES(1); // TODO: correct cycle count
5340 void I386_OPS_BASE::SSEOP(psubusw_r128_rm128)() // Opcode 66 0f d9
5342 UINT8 modrm = FETCH();
5343 if( modrm >= 0xc0 ) {
5344 for (int n=0;n < 8;n++)
5345 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] < XMM(modrm & 7).w[n] ? 0 : XMM((modrm >> 3) & 0x7).w[n]-XMM(modrm & 7).w[n];
5348 UINT32 ea = GetEA(modrm, 0, 16);
5350 for (int n=0;n < 8;n++)
5351 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : XMM((modrm >> 3) & 0x7).w[n]-src.w[n];
5353 CYCLES(1); // TODO: correct cycle count
5356 void I386_OPS_BASE::SSEOP(pand_r128_rm128)() // Opcode 66 0f db
5358 UINT8 modrm = FETCH();
5359 if( modrm >= 0xc0 ) {
5360 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 7).q[0];
5361 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 7).q[1];
5364 UINT32 ea = GetEA(modrm, 0, 16);
5366 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
5367 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
5369 CYCLES(1); // TODO: correct cycle count
5372 void I386_OPS_BASE::SSEOP(pandn_r128_rm128)() // Opcode 66 0f df
5374 UINT8 modrm = FETCH();
5375 if( modrm >= 0xc0 ) {
5376 XMM((modrm >> 3) & 0x7).q[0]=(~XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 7).q[0];
5377 XMM((modrm >> 3) & 0x7).q[1]=(~XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 7).q[1];
5380 UINT32 ea = GetEA(modrm, 0, 16);
5382 XMM((modrm >> 3) & 0x7).q[0]=(~XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
5383 XMM((modrm >> 3) & 0x7).q[1]=(~XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
5385 CYCLES(1); // TODO: correct cycle count
5388 void I386_OPS_BASE::SSEOP(paddusb_r128_rm128)() // Opcode 66 0f dc
5390 UINT8 modrm = FETCH();
5391 if( modrm >= 0xc0 ) {
5392 for (int n=0;n < 16;n++)
5393 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] > (0xff-XMM(modrm & 7).b[n]) ? 0xff : XMM((modrm >> 3) & 0x7).b[n]+XMM(modrm & 7).b[n];
5396 UINT32 ea = GetEA(modrm, 0, 16);
5398 for (int n=0;n < 16;n++)
5399 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : XMM((modrm >> 3) & 0x7).b[n]+src.b[n];
5401 CYCLES(1); // TODO: correct cycle count
5404 void I386_OPS_BASE::SSEOP(paddusw_r128_rm128)() // Opcode 66 0f dd
5406 UINT8 modrm = FETCH();
5407 if( modrm >= 0xc0 ) {
5408 for (int n=0;n < 8;n++)
5409 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] > (0xffff-XMM(modrm & 7).w[n]) ? 0xffff : XMM((modrm >> 3) & 0x7).w[n]+XMM(modrm & 7).w[n];
5412 UINT32 ea = GetEA(modrm, 0, 16);
5414 for (int n=0;n < 8;n++)
5415 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : XMM((modrm >> 3) & 0x7).w[n]+src.w[n];
5417 CYCLES(1); // TODO: correct cycle count
5420 void I386_OPS_BASE::SSEOP(pmaxub_r128_rm128)() // Opcode 66 0f de
5422 UINT8 modrm = FETCH();
5423 if( modrm >= 0xc0 ) {
5424 for (int n=0;n < 16;n++)
5425 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] > XMM(modrm & 0x7).b[n] ? XMM((modrm >> 3) & 0x7).b[n] : XMM(modrm & 0x7).b[n];
5428 UINT32 ea = GetEA(modrm, 0, 16);
5430 for (int n=0;n < 16;n++)
5431 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] > s.b[n] ? XMM((modrm >> 3) & 0x7).b[n] : s.b[n];
5433 CYCLES(1); // TODO: correct cycle count
5436 void I386_OPS_BASE::SSEOP(pmulhuw_r128_rm128)() // Opcode 66 0f e4
5438 UINT8 modrm = FETCH();
5439 if( modrm >= 0xc0 ) {
5440 for (int n=0;n < 8;n++)
5441 XMM((modrm >> 3) & 0x7).w[n]=((UINT32)XMM((modrm >> 3) & 0x7).w[n]*(UINT32)XMM(modrm & 7).w[n]) >> 16;
5444 UINT32 ea = GetEA(modrm, 0, 16);
5446 for (int n=0;n < 8;n++)
5447 XMM((modrm >> 3) & 0x7).w[n]=((UINT32)XMM((modrm >> 3) & 0x7).w[n]*(UINT32)s.w[n]) >> 16;
5449 CYCLES(1); // TODO: correct cycle count
5452 void I386_OPS_BASE::SSEOP(pmulhw_r128_rm128)() // Opcode 66 0f e5
5454 UINT8 modrm = FETCH();
5455 if( modrm >= 0xc0 ) {
5456 for (int n=0;n < 8;n++)
5457 XMM((modrm >> 3) & 0x7).w[n]=(UINT32)((INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)XMM(modrm & 7).s[n]) >> 16;
5460 UINT32 ea = GetEA(modrm, 0, 16);
5462 for (int n=0;n < 8;n++)
5463 XMM((modrm >> 3) & 0x7).w[n]=(UINT32)((INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)src.s[n]) >> 16;
5465 CYCLES(1); // TODO: correct cycle count
5468 void I386_OPS_BASE::SSEOP(psubsb_r128_rm128)() // Opcode 66 0f e8
5470 UINT8 modrm = FETCH();
5471 if( modrm >= 0xc0 ) {
5472 for (int n=0;n < 16;n++)
5473 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] - (INT16)XMM(modrm & 7).c[n]);
5476 UINT32 ea = GetEA(modrm, 0, 16);
5478 for (int n=0;n < 16;n++)
5479 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] - (INT16)s.c[n]);
5481 CYCLES(1); // TODO: correct cycle count
5484 void I386_OPS_BASE::SSEOP(psubsw_r128_rm128)() // Opcode 66 0f e9
5486 UINT8 modrm = FETCH();
5487 if( modrm >= 0xc0 ) {
5488 for (int n=0;n < 8;n++)
5489 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] - (INT32)XMM(modrm & 7).s[n]);
5492 UINT32 ea = GetEA(modrm, 0, 16);
5494 for (int n=0;n < 8;n++)
5495 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] - (INT32)s.s[n]);
5497 CYCLES(1); // TODO: correct cycle count
5500 void I386_OPS_BASE::SSEOP(pminsw_r128_rm128)() // Opcode 66 0f ea
5502 UINT8 modrm = FETCH();
5503 if( modrm >= 0xc0 ) {
5504 for (int n=0;n < 8;n++)
5505 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] < XMM(modrm & 0x7).s[n] ? XMM((modrm >> 3) & 0x7).s[n] : XMM(modrm & 0x7).s[n];
5508 UINT32 ea = GetEA(modrm, 0, 16);
5510 for (int n=0;n < 8;n++)
5511 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] < s.s[n] ? XMM((modrm >> 3) & 0x7).s[n] : s.s[n];
5513 CYCLES(1); // TODO: correct cycle count
5516 void I386_OPS_BASE::SSEOP(pmaxsw_r128_rm128)() // Opcode 66 0f ee
5518 UINT8 modrm = FETCH();
5519 if( modrm >= 0xc0 ) {
5520 for (int n=0;n < 8;n++)
5521 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] > XMM(modrm & 0x7).s[n] ? XMM((modrm >> 3) & 0x7).s[n] : XMM(modrm & 0x7).s[n];
5524 UINT32 ea = GetEA(modrm, 0, 16);
5526 for (int n=0;n < 8;n++)
5527 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] > s.s[n] ? XMM((modrm >> 3) & 0x7).s[n] : s.s[n];
5529 CYCLES(1); // TODO: correct cycle count
5532 void I386_OPS_BASE::SSEOP(paddsb_r128_rm128)() // Opcode 66 0f ec
5534 UINT8 modrm = FETCH();
5535 if( modrm >= 0xc0 ) {
5536 for (int n=0;n < 16;n++)
5537 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] + (INT16)XMM(modrm & 7).c[n]);
5540 UINT32 ea = GetEA(modrm, 0, 16);
5542 for (int n=0;n < 16;n++)
5543 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((INT16)XMM((modrm >> 3) & 0x7).c[n] + (INT16)s.c[n]);
5545 CYCLES(1); // TODO: correct cycle count
5548 void I386_OPS_BASE::SSEOP(paddsw_r128_rm128)() // Opcode 66 0f ed
5550 UINT8 modrm = FETCH();
5551 if( modrm >= 0xc0 ) {
5552 for (int n=0;n < 8;n++)
5553 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] + (INT32)XMM(modrm & 7).s[n]);
5556 UINT32 ea = GetEA(modrm, 0, 16);
5558 for (int n=0;n < 8;n++)
5559 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((INT32)XMM((modrm >> 3) & 0x7).s[n] + (INT32)s.s[n]);
5561 CYCLES(1); // TODO: correct cycle count
5564 void I386_OPS_BASE::SSEOP(por_r128_rm128)() // Opcode 66 0f eb
5566 UINT8 modrm = FETCH();
5567 if( modrm >= 0xc0 ) {
5568 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 7).q[0];
5569 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 7).q[1];
5572 UINT32 ea = GetEA(modrm, 0, 16);
5574 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] | s.q[0];
5575 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] | s.q[1];
5577 CYCLES(1); // TODO: correct cycle count
5580 void I386_OPS_BASE::SSEOP(pxor_r128_rm128)() // Opcode 66 0f ef
5582 UINT8 modrm = FETCH();
5583 if( modrm >= 0xc0 ) {
5584 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] ^ XMM(modrm & 7).q[0];
5585 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] ^ XMM(modrm & 7).q[1];
5588 UINT32 ea = GetEA(modrm, 0, 16);
5590 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] ^ s.q[0];
5591 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] ^ s.q[1];
5593 CYCLES(1); // TODO: correct cycle count
5596 void I386_OPS_BASE::SSEOP(pmaddwd_r128_rm128)() // Opcode 66 0f f5
5598 UINT8 modrm = FETCH();
5599 if( modrm >= 0xc0 ) {
5600 for (int n=0;n < 4;n++)
5601 XMM((modrm >> 3) & 0x7).i[n]=(INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)XMM(modrm & 7).s[n]+
5602 (INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)XMM(modrm & 7).s[n];
5605 UINT32 ea = GetEA(modrm, 0, 16);
5607 for (int n=0;n < 4;n++)
5608 XMM((modrm >> 3) & 0x7).i[n]=(INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)s.s[n]+
5609 (INT32)XMM((modrm >> 3) & 0x7).s[n]*(INT32)s.s[n];
5611 CYCLES(1); // TODO: correct cycle count
5614 void I386_OPS_BASE::SSEOP(psubb_r128_rm128)() // Opcode 66 0f f8
5616 UINT8 modrm = FETCH();
5617 if( modrm >= 0xc0 ) {
5618 for (int n=0;n < 16;n++)
5619 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] - XMM(modrm & 7).b[n];
5622 UINT32 ea = GetEA(modrm, 0, 16);
5624 for (int n=0;n < 16;n++)
5625 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] - s.b[n];
5627 CYCLES(1); // TODO: correct cycle count
5630 void I386_OPS_BASE::SSEOP(psubw_r128_rm128)() // Opcode 66 0f f9
5632 UINT8 modrm = FETCH();
5633 if( modrm >= 0xc0 ) {
5634 for (int n=0;n < 8;n++)
5635 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] - XMM(modrm & 7).w[n];
5638 UINT32 ea = GetEA(modrm, 0, 16);
5640 for (int n=0;n < 8;n++)
5641 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] - s.w[n];
5643 CYCLES(1); // TODO: correct cycle count
5646 void I386_OPS_BASE::SSEOP(psubd_r128_rm128)() // Opcode 66 0f fa
5648 UINT8 modrm = FETCH();
5649 if( modrm >= 0xc0 ) {
5650 for (int n=0;n < 4;n++)
5651 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] - XMM(modrm & 7).d[n];
5654 UINT32 ea = GetEA(modrm, 0, 16);
5656 for (int n=0;n < 4;n++)
5657 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] - s.d[n];
5659 CYCLES(1); // TODO: correct cycle count
5662 void I386_OPS_BASE::SSEOP(psadbw_r128_rm128)() // Opcode 66 0f f6
5665 UINT8 modrm = FETCH();
5666 if( modrm >= 0xc0 ) {
5668 for (int n=0;n < 8;n++)
5669 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)XMM(modrm & 0x7).b[n]);
5670 XMM((modrm >> 3) & 0x7).l[0]=(UINT64)temp & 0xffff;
5672 for (int n=8;n < 16;n++)
5673 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)XMM(modrm & 0x7).b[n]);
5674 XMM((modrm >> 3) & 0x7).l[1]=(UINT64)temp & 0xffff;
5677 UINT32 ea = GetEA(modrm, 0, 16);
5680 for (int n=0;n < 8;n++)
5681 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]);
5682 XMM((modrm >> 3) & 0x7).l[0]=(UINT64)temp & 0xffff;
5684 for (int n=8;n < 16;n++)
5685 temp += abs((INT32)XMM((modrm >> 3) & 0x7).b[n] - (INT32)s.b[n]);
5686 XMM((modrm >> 3) & 0x7).l[1]=(UINT64)temp & 0xffff;
5688 CYCLES(1); // TODO: correct cycle count
5691 void I386_OPS_BASE::SSEOP(pavgb_r128_rm128)() // Opcode 66 0f e0
5693 UINT8 modrm = FETCH();
5694 if( modrm >= 0xc0 ) {
5695 for (int n=0;n < 16;n++)
5696 XMM((modrm >> 3) & 0x7).b[n] = ((UINT16)XMM((modrm >> 3) & 0x7).b[n] + (UINT16)XMM(modrm & 0x7).b[n] + 1) >> 1;
5699 UINT32 ea = GetEA(modrm, 0, 16);
5701 for (int n=0;n < 16;n++)
5702 XMM((modrm >> 3) & 0x7).b[n] = ((UINT16)XMM((modrm >> 3) & 0x7).b[n] + (UINT16)s.b[n] + 1) >> 1;
5704 CYCLES(1); // TODO: correct cycle count
5707 void I386_OPS_BASE::SSEOP(pavgw_r128_rm128)() // Opcode 66 0f e3
5709 UINT8 modrm = FETCH();
5710 if( modrm >= 0xc0 ) {
5711 for (int n=0;n < 8;n++)
5712 XMM((modrm >> 3) & 0x7).w[n] = ((UINT32)XMM((modrm >> 3) & 0x7).w[n] + (UINT32)XMM(modrm & 0x7).w[n] + 1) >> 1;
5715 UINT32 ea = GetEA(modrm, 0, 16);
5717 for (int n=0;n < 8;n++)
5718 XMM((modrm >> 3) & 0x7).w[n] = ((UINT32)XMM((modrm >> 3) & 0x7).w[n] + (UINT32)s.w[n] + 1) >> 1;
5720 CYCLES(1); // TODO: correct cycle count
5723 void I386_OPS_BASE::SSEOP(psrlw_r128_rm128)() // Opcode 66 0f d1
5725 UINT8 modrm = FETCH();
5726 if( modrm >= 0xc0 ) {
5727 int count=(int)XMM(modrm & 7).q[0];
5728 for (int n=0; n < 8;n++)
5729 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] >> count;
5732 UINT32 ea = GetEA(modrm, 0, 16);
5734 int count=(int)src.q[0];
5735 for (int n=0; n < 8;n++)
5736 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] >> count;
5738 CYCLES(1); // TODO: correct cycle count
5741 void I386_OPS_BASE::SSEOP(psrld_r128_rm128)() // Opcode 66 0f d2
5743 UINT8 modrm = FETCH();
5744 if( modrm >= 0xc0 ) {
5745 int count=(int)XMM(modrm & 7).q[0];
5746 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] >> count;
5747 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] >> count;
5748 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] >> count;
5749 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] >> count;
5752 UINT32 ea = GetEA(modrm, 0, 16);
5754 int count=(int)src.q[0];
5755 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] >> count;
5756 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] >> count;
5757 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] >> count;
5758 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] >> count;
5760 CYCLES(1); // TODO: correct cycle count
5763 void I386_OPS_BASE::SSEOP(psrlq_r128_rm128)() // Opcode 66 0f d3
5765 UINT8 modrm = FETCH();
5766 if( modrm >= 0xc0 ) {
5767 int count=(int)XMM(modrm & 7).q[0];
5768 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] >> count;
5769 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] >> count;
5772 UINT32 ea = GetEA(modrm, 0, 16);
5774 int count=(int)src.q[0];
5775 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] >> count;
5776 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] >> count;
5778 CYCLES(1); // TODO: correct cycle count
5781 void I386_OPS_BASE::SSEOP(psllw_r128_rm128)() // Opcode 66 0f f1
5783 UINT8 modrm = FETCH();
5784 if( modrm >= 0xc0 ) {
5785 int count=(int)XMM(modrm & 7).q[0];
5786 for (int n=0; n < 8;n++)
5787 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] << count;
5790 UINT32 ea = GetEA(modrm, 0, 16);
5792 int count=(int)s.q[0];
5793 for (int n=0; n < 8;n++)
5794 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] << count;
5796 CYCLES(1); // TODO: correct cycle count
5799 void I386_OPS_BASE::SSEOP(pslld_r128_rm128)() // Opcode 66 0f f2
5801 UINT8 modrm = FETCH();
5802 if( modrm >= 0xc0 ) {
5803 int count=(int)XMM(modrm & 7).q[0];
5804 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] << count;
5805 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] << count;
5806 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] << count;
5807 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] << count;
5810 UINT32 ea = GetEA(modrm, 0, 16);
5812 int count=(int)s.q[0];
5813 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] << count;
5814 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] << count;
5815 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] << count;
5816 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] << count;
5818 CYCLES(1); // TODO: correct cycle count
5821 void I386_OPS_BASE::SSEOP(psllq_r128_rm128)() // Opcode 66 0f f3
5823 UINT8 modrm = FETCH();
5824 if( modrm >= 0xc0 ) {
5825 int count=(int)XMM(modrm & 7).q[0];
5826 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] << count;
5827 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] << count;
5830 UINT32 ea = GetEA(modrm, 0, 16);
5832 int count=(int)s.q[0];
5833 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] << count;
5834 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] << count;
5836 CYCLES(1); // TODO: correct cycle count
5839 void I386_OPS_BASE::SSEOP(psraw_r128_rm128)() // Opcode 66 0f e1
5841 UINT8 modrm = FETCH();
5842 if( modrm >= 0xc0 ) {
5843 int count=(int)XMM(modrm & 7).q[0];
5844 for (int n=0; n < 8;n++)
5845 XMM((modrm >> 3) & 0x7).s[n]=XMM((modrm >> 3) & 0x7).s[n] >> count;
5848 UINT32 ea = GetEA(modrm, 0, 16);
5850 int count=(int)src.q[0];
5851 for (int n=0; n < 8;n++)
5852 XMM((modrm >> 3) & 0x7).s[n]=XMM((modrm >> 3) & 0x7).s[n] >> count;
5854 CYCLES(1); // TODO: correct cycle count
5857 void I386_OPS_BASE::SSEOP(psrad_r128_rm128)() // Opcode 66 0f e2
5859 UINT8 modrm = FETCH();
5860 if( modrm >= 0xc0 ) {
5861 int count=(int)XMM(modrm & 7).q[0];
5862 XMM((modrm >> 3) & 0x7).i[0]=XMM((modrm >> 3) & 0x7).i[0] >> count;
5863 XMM((modrm >> 3) & 0x7).i[1]=XMM((modrm >> 3) & 0x7).i[1] >> count;
5864 XMM((modrm >> 3) & 0x7).i[2]=XMM((modrm >> 3) & 0x7).i[2] >> count;
5865 XMM((modrm >> 3) & 0x7).i[3]=XMM((modrm >> 3) & 0x7).i[3] >> count;
5868 UINT32 ea = GetEA(modrm, 0, 16);
5870 int count=(int)src.q[0];
5871 XMM((modrm >> 3) & 0x7).i[0]=XMM((modrm >> 3) & 0x7).i[0] >> count;
5872 XMM((modrm >> 3) & 0x7).i[1]=XMM((modrm >> 3) & 0x7).i[1] >> count;
5873 XMM((modrm >> 3) & 0x7).i[2]=XMM((modrm >> 3) & 0x7).i[2] >> count;
5874 XMM((modrm >> 3) & 0x7).i[3]=XMM((modrm >> 3) & 0x7).i[3] >> count;
5876 CYCLES(1); // TODO: correct cycle count
5879 void I386_OPS_BASE::SSEOP(movntdq_m128_r128)() // Opcode 66 0f e7
5881 UINT8 modrm = FETCH();
5882 if( modrm >= 0xc0 ) {
5883 CYCLES(1); // unsupported
5885 // since cache is not implemented
5886 UINT32 ea = GetEA(modrm, 0, 16);
5887 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
5888 CYCLES(1); // TODO: correct cycle count
5892 void I386_OPS_BASE::SSEOP(cvttpd2dq_r128_rm128)() // Opcode 66 0f e6
5894 UINT8 modrm = FETCH();
5895 if( modrm >= 0xc0 ) {
5896 XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM((modrm >> 3) & 0x7).f64[0];
5897 XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM((modrm >> 3) & 0x7).f64[1];
5898 XMM((modrm >> 3) & 0x7).q[1] = 0;
5901 UINT32 ea = GetEA(modrm, 0, 16);
5903 XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f64[0];
5904 XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f64[1];
5905 XMM((modrm >> 3) & 0x7).q[1] = 0;
5907 CYCLES(1); // TODO: correct cycle count
5910 void I386_OPS_BASE::SSEOP(movq_r128m64_r128)() // Opcode 66 0f d6
5912 UINT8 modrm = FETCH();
5913 if( modrm >= 0xc0 ) {
5914 XMM(modrm & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0];
5915 XMM(modrm & 0x7).q[1] = 0;
5917 UINT32 ea = GetEA(modrm, 0, 16);
5918 WRITE64( ea, XMM((modrm >> 3) & 0x7).q[0]);
5920 CYCLES(1); // TODO: correct cycle count
5923 void I386_OPS_BASE::SSEOP(addsubpd_r128_rm128)() // Opcode 66 0f d0
5925 UINT8 modrm = FETCH();
5926 if( modrm >= 0xc0 ) {
5929 d=(modrm >> 3) & 0x7;
5930 XMM(d).f64[0]=XMM(d).f64[0]-XMM(s).f64[0];
5931 XMM(d).f64[1]=XMM(d).f64[1]+XMM(s).f64[1];
5935 UINT32 ea = GetEA(modrm, 0, 16);
5936 d=(modrm >> 3) & 0x7;
5938 XMM(d).f64[0]=XMM(d).f64[0]-src.f64[0];
5939 XMM(d).f64[1]=XMM(d).f64[1]+src.f64[1];
5941 CYCLES(1); // TODO: correct cycle count
5944 void I386_OPS_BASE::SSEOP(haddpd_r128_rm128)() // Opcode 66 0f 7c
5946 UINT8 modrm = FETCH();
5947 if( modrm >= 0xc0 ) {
5950 d=(modrm >> 3) & 0x7;
5951 XMM(d).f64[0]=XMM(d).f64[0]+XMM(d).f64[1];
5952 XMM(d).f64[1]=XMM(s).f64[0]+XMM(s).f64[1];
5956 UINT32 ea = GetEA(modrm, 0, 16);
5957 d=(modrm >> 3) & 0x7;
5959 XMM(d).f64[0]=XMM(d).f64[0]+XMM(d).f64[1];
5960 XMM(d).f64[1]=src.f64[0]+src.f64[1];
5962 CYCLES(1); // TODO: correct cycle count
5965 void I386_OPS_BASE::SSEOP(hsubpd_r128_rm128)() // Opcode 66 0f 7d
5967 UINT8 modrm = FETCH();
5968 if( modrm >= 0xc0 ) {
5971 d=(modrm >> 3) & 0x7;
5972 XMM(d).f64[0]=XMM(d).f64[0]-XMM(d).f64[1];
5973 XMM(d).f64[1]=XMM(s).f64[0]-XMM(s).f64[1];
5977 UINT32 ea = GetEA(modrm, 0, 16);
5978 d=(modrm >> 3) & 0x7;
5980 XMM(d).f64[0]=XMM(d).f64[0]-XMM(d).f64[1];
5981 XMM(d).f64[1]=src.f64[0]-src.f64[1];
5983 CYCLES(1); // TODO: correct cycle count
5986 void I386_OPS_BASE::SSEOP(sqrtpd_r128_rm128)() // Opcode 66 0f 51
5988 UINT8 modrm = FETCH();
5989 if( modrm >= 0xc0 ) {
5992 d=(modrm >> 3) & 0x7;
5993 XMM(d).f64[0]=sqrt(XMM(s).f64[0]);
5994 XMM(d).f64[1]=sqrt(XMM(s).f64[1]);
5998 UINT32 ea = GetEA(modrm, 0, 16);
5999 d=(modrm >> 3) & 0x7;
6001 XMM(d).f64[0]=sqrt(src.f64[0]);
6002 XMM(d).f64[1]=sqrt(src.f64[1]);
6004 CYCLES(1); // TODO: correct cycle count
6007 void I386_OPS_BASE::SSEOP(cvtpi2pd_r128_rm64)() // Opcode 66 0f 2a
6009 UINT8 modrm = FETCH();
6010 if( modrm >= 0xc0 ) {
6012 XMM((modrm >> 3) & 0x7).f64[0] = (double)MMX(modrm & 0x7).i[0];
6013 XMM((modrm >> 3) & 0x7).f64[1] = (double)MMX(modrm & 0x7).i[1];
6016 UINT32 ea = GetEA(modrm, 0, 8);
6018 XMM((modrm >> 3) & 0x7).f64[0] = (double)r.i[0];
6019 XMM((modrm >> 3) & 0x7).f64[1] = (double)r.i[1];
6021 CYCLES(1); // TODO: correct cycle count
6024 void I386_OPS_BASE::SSEOP(cvttpd2pi_r64_rm128)() // Opcode 66 0f 2c
6026 UINT8 modrm = FETCH();
6028 if( modrm >= 0xc0 ) {
6029 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f64[0];
6030 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f64[1];
6033 UINT32 ea = GetEA(modrm, 0, 16);
6035 MMX((modrm >> 3) & 0x7).i[0] = r.f64[0];
6036 MMX((modrm >> 3) & 0x7).i[1] = r.f64[1];
6038 CYCLES(1); // TODO: correct cycle count
6041 void I386_OPS_BASE::SSEOP(cvtpd2pi_r64_rm128)() // Opcode 66 0f 2d
6043 UINT8 modrm = FETCH();
6045 if( modrm >= 0xc0 ) {
6046 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f64[0];
6047 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f64[1];
6050 UINT32 ea = GetEA(modrm, 0, 16);
6052 MMX((modrm >> 3) & 0x7).i[0] = r.f64[0];
6053 MMX((modrm >> 3) & 0x7).i[1] = r.f64[1];
6055 CYCLES(1); // TODO: correct cycle count
6058 void I386_OPS_BASE::SSEOP(cvtpd2ps_r128_rm128)() // Opcode 66 0f 5a
6060 UINT8 modrm = FETCH();
6061 if( modrm >= 0xc0 ) {
6062 XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).f64[0];
6063 XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).f64[1];
6064 XMM((modrm >> 3) & 0x7).q[1] = 0;
6067 UINT32 ea = GetEA(modrm, 0, 16);
6069 XMM((modrm >> 3) & 0x7).f[0] = (float)r.f64[0];
6070 XMM((modrm >> 3) & 0x7).f[1] = (float)r.f64[1];
6071 XMM((modrm >> 3) & 0x7).q[1] = 0;
6073 CYCLES(1); // TODO: correct cycle count
6076 void I386_OPS_BASE::SSEOP(cvtps2dq_r128_rm128)() // Opcode 66 0f 5b
6078 UINT8 modrm = FETCH();
6079 if( modrm >= 0xc0 ) {
6080 XMM((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
6081 XMM((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
6082 XMM((modrm >> 3) & 0x7).i[2] = XMM(modrm & 0x7).f[2];
6083 XMM((modrm >> 3) & 0x7).i[3] = XMM(modrm & 0x7).f[3];
6086 UINT32 ea = GetEA(modrm, 0, 16);
6088 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
6089 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
6090 XMM((modrm >> 3) & 0x7).i[2] = r.f[2];
6091 XMM((modrm >> 3) & 0x7).i[3] = r.f[3];
6093 CYCLES(1); // TODO: correct cycle count
6096 void I386_OPS_BASE::SSEOP(addpd_r128_rm128)() // Opcode 66 0f 58
6098 UINT8 modrm = FETCH();
6099 if( modrm >= 0xc0 ) {
6100 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + XMM(modrm & 0x7).f64[0];
6101 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] + XMM(modrm & 0x7).f64[1];
6104 UINT32 ea = GetEA(modrm, 0, 16);
6106 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + src.f64[0];
6107 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] + src.f64[1];
6109 CYCLES(1); // TODO: correct cycle count
6112 void I386_OPS_BASE::SSEOP(mulpd_r128_rm128)() // Opcode 66 0f 59
6114 UINT8 modrm = FETCH();
6115 if( modrm >= 0xc0 ) {
6116 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * XMM(modrm & 0x7).f64[0];
6117 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] * XMM(modrm & 0x7).f64[1];
6120 UINT32 ea = GetEA(modrm, 0, 16);
6122 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * src.f64[0];
6123 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] * src.f64[1];
6125 CYCLES(1); // TODO: correct cycle count
6128 void I386_OPS_BASE::SSEOP(subpd_r128_rm128)() // Opcode 66 0f 5c
6130 UINT8 modrm = FETCH();
6131 if( modrm >= 0xc0 ) {
6132 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - XMM(modrm & 0x7).f64[0];
6133 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] - XMM(modrm & 0x7).f64[1];
6136 UINT32 ea = GetEA(modrm, 0, 16);
6138 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - src.f64[0];
6139 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] - src.f64[1];
6141 CYCLES(1); // TODO: correct cycle count
6144 void I386_OPS_BASE::SSEOP(minpd_r128_rm128)() // Opcode 66 0f 5d
6146 UINT8 modrm = FETCH();
6147 if( modrm >= 0xc0 ) {
6148 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6149 XMM((modrm >> 3) & 0x7).f64[1] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[1], XMM(modrm & 0x7).f64[1]);
6152 UINT32 ea = GetEA(modrm, 0, 16);
6154 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6155 XMM((modrm >> 3) & 0x7).f64[1] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[1], src.f64[1]);
6157 CYCLES(1); // TODO: correct cycle count
6160 void I386_OPS_BASE::SSEOP(divpd_r128_rm128)() // Opcode 66 0f 5e
6162 UINT8 modrm = FETCH();
6163 if( modrm >= 0xc0 ) {
6164 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / XMM(modrm & 0x7).f64[0];
6165 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] / XMM(modrm & 0x7).f64[1];
6168 UINT32 ea = GetEA(modrm, 0, 16);
6170 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / src.f64[0];
6171 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] / src.f64[1];
6173 CYCLES(1); // TODO: correct cycle count
6176 void I386_OPS_BASE::SSEOP(maxpd_r128_rm128)() // Opcode 66 0f 5f
6178 UINT8 modrm = FETCH();
6179 if( modrm >= 0xc0 ) {
6180 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6181 XMM((modrm >> 3) & 0x7).f64[1] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[1], XMM(modrm & 0x7).f64[1]);
6184 UINT32 ea = GetEA(modrm, 0, 16);
6186 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6187 XMM((modrm >> 3) & 0x7).f64[1] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[1], src.f64[1]);
6189 CYCLES(1); // TODO: correct cycle count
6192 void I386_OPS_BASE::SSEOP(movntpd_m128_r128)() // Opcode 66 0f 2b
6194 UINT8 modrm = FETCH();
6195 if( modrm >= 0xc0 ) {
6196 // unsupported by cpu
6197 CYCLES(1); // TODO: correct cycle count
6199 // since cache is not implemented
6200 UINT32 ea = GetEA(modrm, 0, 16);
6201 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
6202 CYCLES(1); // TODO: correct cycle count
6206 void I386_OPS_BASE::SSEOP(movapd_r128_rm128)() // Opcode 66 0f 28
6208 UINT8 modrm = FETCH();
6209 if( modrm >= 0xc0 ) {
6210 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
6212 UINT32 ea = GetEA(modrm, 0, 16);
6213 READXMM( ea, XMM((modrm >> 3) & 0x7));
6215 CYCLES(1); // TODO: correct cycle count
6218 void I386_OPS_BASE::SSEOP(movapd_rm128_r128)() // Opcode 66 0f 29
6220 UINT8 modrm = FETCH();
6221 if( modrm >= 0xc0 ) {
6222 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
6224 UINT32 ea = GetEA(modrm, 0, 16);
6225 WRITEXMM( ea, XMM((modrm >> 3) & 0x7));
6227 CYCLES(1); // TODO: correct cycle count
6230 void I386_OPS_BASE::SSEOP(movsd_r128_r128m64)() // Opcode f2 0f 10
6232 UINT8 modrm = FETCH();
6233 if( modrm >= 0xc0 ) {
6234 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
6236 UINT32 ea = GetEA(modrm, 0, 8);
6237 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
6238 XMM((modrm >> 3) & 0x7).q[1] = 0;
6240 CYCLES(1); // TODO: correct cycle count
6243 void I386_OPS_BASE::SSEOP(movsd_r128m64_r128)() // Opcode f2 0f 11
6245 UINT8 modrm = FETCH();
6246 if( modrm >= 0xc0 ) {
6247 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
6249 UINT32 ea = GetEA(modrm, 0, 8);
6250 WRITEXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
6252 CYCLES(1); // TODO: correct cycle count
6255 void I386_OPS_BASE::SSEOP(movddup_r128_r128m64)() // Opcode f2 0f 12
6257 UINT8 modrm = FETCH();
6258 if( modrm >= 0xc0 ) {
6259 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
6260 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[0];
6262 UINT32 ea = GetEA(modrm, 0, 8);
6263 READXMM_LO64( ea, XMM((modrm >> 3) & 0x7));
6264 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[0];
6266 CYCLES(1); // TODO: correct cycle count
6269 void I386_OPS_BASE::SSEOP(cvtsi2sd_r128_rm32)() // Opcode f2 0f 2a
6271 UINT8 modrm = FETCH();
6272 if( modrm >= 0xc0 ) {
6273 XMM((modrm >> 3) & 0x7).f64[0] = (INT32)LOAD_RM32(modrm);
6275 UINT32 ea = GetEA(modrm, 0, 4);
6276 XMM((modrm >> 3) & 0x7).f64[0] = (INT32)READ32(ea);
6278 CYCLES(1); // TODO: correct cycle count
6281 void I386_OPS_BASE::SSEOP(cvttsd2si_r32_r128m64)() // Opcode f2 0f 2c
6284 UINT8 modrm = FETCH();
6285 if( modrm >= 0xc0 ) {
6286 src = (INT32)XMM(modrm & 0x7).f64[0];
6287 } else { // otherwise is a memory address
6289 UINT32 ea = GetEA(modrm, 0, 8);
6290 READXMM_LO64( ea, t);
6291 src = (INT32)t.f64[0];
6293 STORE_REG32(modrm, (UINT32)src);
6294 CYCLES(1); // TODO: correct cycle count
6297 void I386_OPS_BASE::SSEOP(cvtsd2si_r32_r128m64)() // Opcode f2 0f 2d
6300 UINT8 modrm = FETCH();
6301 if( modrm >= 0xc0 ) {
6302 src = (INT32)XMM(modrm & 0x7).f64[0];
6303 } else { // otherwise is a memory address
6305 UINT32 ea = GetEA(modrm, 0, 8);
6306 READXMM_LO64( ea, t);
6307 src = (INT32)t.f64[0];
6309 STORE_REG32(modrm, (UINT32)src);
6310 CYCLES(1); // TODO: correct cycle count
6313 void I386_OPS_BASE::SSEOP(sqrtsd_r128_r128m64)() // Opcode f2 0f 51
6315 UINT8 modrm = FETCH();
6316 if( modrm >= 0xc0 ) {
6319 d=(modrm >> 3) & 0x7;
6320 XMM(d).f64[0]=sqrt(XMM(s).f64[0]);
6324 UINT32 ea = GetEA(modrm, 0, 16);
6325 d=(modrm >> 3) & 0x7;
6327 XMM(d).f64[0]=sqrt(src.f64[0]);
6329 CYCLES(1); // TODO: correct cycle count
6332 void I386_OPS_BASE::SSEOP(addsd_r128_r128m64)() // Opcode f2 0f 58
6334 UINT8 modrm = FETCH();
6335 if( modrm >= 0xc0 ) {
6336 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + XMM(modrm & 0x7).f64[0];
6339 UINT32 ea = GetEA(modrm, 0, 16);
6341 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + src.f64[0];
6343 CYCLES(1); // TODO: correct cycle count
6346 void I386_OPS_BASE::SSEOP(mulsd_r128_r128m64)() // Opcode f2 0f 59
6348 UINT8 modrm = FETCH();
6349 if( modrm >= 0xc0 ) {
6350 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * XMM(modrm & 0x7).f64[0];
6353 UINT32 ea = GetEA(modrm, 0, 16);
6355 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * src.f64[0];
6357 CYCLES(1); // TODO: correct cycle count
6360 void I386_OPS_BASE::SSEOP(cvtsd2ss_r128_r128m64)() // Opcode f2 0f 5a
6362 UINT8 modrm = FETCH();
6363 if( modrm >= 0xc0 ) {
6364 XMM((modrm >> 3) & 0x7).f[0] = XMM(modrm & 0x7).f64[0];
6367 UINT32 ea = GetEA(modrm, 0, 8);
6368 READXMM_LO64( ea, s);
6369 XMM((modrm >> 3) & 0x7).f[0] = s.f64[0];
6371 CYCLES(1); // TODO: correct cycle count
6374 void I386_OPS_BASE::SSEOP(subsd_r128_r128m64)() // Opcode f2 0f 5c
6376 UINT8 modrm = FETCH();
6377 if( modrm >= 0xc0 ) {
6378 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - XMM(modrm & 0x7).f64[0];
6381 UINT32 ea = GetEA(modrm, 0, 16);
6383 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - src.f64[0];
6385 CYCLES(1); // TODO: correct cycle count
6388 void I386_OPS_BASE::SSEOP(minsd_r128_r128m64)() // Opcode f2 0f 5d
6390 UINT8 modrm = FETCH();
6391 if( modrm >= 0xc0 ) {
6392 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6395 UINT32 ea = GetEA(modrm, 0, 16);
6397 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6399 CYCLES(1); // TODO: correct cycle count
6402 void I386_OPS_BASE::SSEOP(divsd_r128_r128m64)() // Opcode f2 0f 5e
6404 UINT8 modrm = FETCH();
6405 if( modrm >= 0xc0 ) {
6406 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / XMM(modrm & 0x7).f64[0];
6409 UINT32 ea = GetEA(modrm, 0, 16);
6411 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / src.f64[0];
6413 CYCLES(1); // TODO: correct cycle count
6416 void I386_OPS_BASE::SSEOP(maxsd_r128_r128m64)() // Opcode f2 0f 5f
6418 UINT8 modrm = FETCH();
6419 if( modrm >= 0xc0 ) {
6420 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6423 UINT32 ea = GetEA(modrm, 0, 16);
6425 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6427 CYCLES(1); // TODO: correct cycle count
6430 void I386_OPS_BASE::SSEOP(haddps_r128_rm128)() // Opcode f2 0f 7c
6432 UINT8 modrm = FETCH();
6433 if( modrm >= 0xc0 ) {
6435 float f1, f2, f3, f4;
6437 d=(modrm >> 3) & 0x7;
6438 f1=XMM(d).f[0]+XMM(d).f[1];
6439 f2=XMM(d).f[2]+XMM(d).f[3];
6440 f3=XMM(s).f[0]+XMM(s).f[1];
6441 f4=XMM(s).f[2]+XMM(s).f[3];
6450 UINT32 ea = GetEA(modrm, 0, 16);
6451 d=(modrm >> 3) & 0x7;
6453 f1=XMM(d).f[0]+XMM(d).f[1];
6454 f2=XMM(d).f[2]+XMM(d).f[3];
6457 XMM(d).f[2]=src.f[0]+src.f[1];
6458 XMM(d).f[3]=src.f[2]+src.f[3];
6460 CYCLES(1); // TODO: correct cycle count
6463 void I386_OPS_BASE::SSEOP(hsubps_r128_rm128)() // Opcode f2 0f 7d
6465 UINT8 modrm = FETCH();
6466 if( modrm >= 0xc0 ) {
6468 float f1, f2, f3, f4;
6470 d=(modrm >> 3) & 0x7;
6471 f1=XMM(d).f[0]-XMM(d).f[1];
6472 f2=XMM(d).f[2]-XMM(d).f[3];
6473 f3=XMM(s).f[0]-XMM(s).f[1];
6474 f4=XMM(s).f[2]-XMM(s).f[3];
6483 UINT32 ea = GetEA(modrm, 0, 16);
6484 d=(modrm >> 3) & 0x7;
6486 f1=XMM(d).f[0]-XMM(d).f[1];
6487 f2=XMM(d).f[2]-XMM(d).f[3];
6490 XMM(d).f[2]=src.f[0]-src.f[1];
6491 XMM(d).f[3]=src.f[2]-src.f[3];
6493 CYCLES(1); // TODO: correct cycle count
6496 void I386_OPS_BASE::SSEOP(cmpsd_r128_r128m64_i8)() // Opcode f2 0f c2
6498 UINT8 modrm = FETCH();
6499 if( modrm >= 0xc0 ) {
6501 UINT8 imm8 = FETCH();
6503 d=(modrm >> 3) & 0x7;
6504 sse_predicate_compare_double_scalar(imm8, XMM(d), XMM(s));
6508 UINT32 ea = GetEA(modrm, 0, 8);
6509 UINT8 imm8 = FETCH();
6510 READXMM_LO64( ea, s);
6511 d=(modrm >> 3) & 0x7;
6512 sse_predicate_compare_double_scalar(imm8, XMM(d), s);
6514 CYCLES(1); // TODO: correct cycle count
6517 void I386_OPS_BASE::SSEOP(addsubps_r128_rm128)() // Opcode f2 0f d0
6519 UINT8 modrm = FETCH();
6520 if( modrm >= 0xc0 ) {
6521 XMM((modrm >> 3) & 0x7).f[0]=XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
6522 XMM((modrm >> 3) & 0x7).f[1]=XMM((modrm >> 3) & 0x7).f[1] + XMM(modrm & 0x7).f[1];
6523 XMM((modrm >> 3) & 0x7).f[2]=XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2];
6524 XMM((modrm >> 3) & 0x7).f[3]=XMM((modrm >> 3) & 0x7).f[3] + XMM(modrm & 0x7).f[3];
6527 UINT32 ea = GetEA(modrm, 0, 16);
6529 XMM((modrm >> 3) & 0x7).f[0]=XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
6530 XMM((modrm >> 3) & 0x7).f[1]=XMM((modrm >> 3) & 0x7).f[1] + src.f[1];
6531 XMM((modrm >> 3) & 0x7).f[2]=XMM((modrm >> 3) & 0x7).f[2] - src.f[2];
6532 XMM((modrm >> 3) & 0x7).f[3]=XMM((modrm >> 3) & 0x7).f[3] + src.f[3];
6534 CYCLES(1); // TODO: correct cycle count
6537 void I386_OPS_BASE::SSEOP(movdq2q_r64_r128)() // Opcode f2 0f d6
6539 UINT8 modrm = FETCH();
6541 if( modrm >= 0xc0 ) {
6542 MMX((modrm >> 3) & 0x7).q = XMM(modrm & 0x7).q[0];
6543 CYCLES(1); // TODO: correct cycle count
6545 // unsupported by cpu
6546 CYCLES(1); // TODO: correct cycle count
6550 void I386_OPS_BASE::SSEOP(cvtpd2dq_r128_rm128)() // Opcode f2 0f e6
6552 UINT8 modrm = FETCH();
6553 if( modrm >= 0xc0 ) {
6554 XMM((modrm >> 3) & 0x7).i[0]=(INT32)XMM((modrm >> 3) & 0x7).f64[0];
6555 XMM((modrm >> 3) & 0x7).i[1]=(INT32)XMM((modrm >> 3) & 0x7).f64[1];
6556 XMM((modrm >> 3) & 0x7).q[1] = 0;
6559 UINT32 ea = GetEA(modrm, 0, 16);
6561 XMM((modrm >> 3) & 0x7).i[0]=(INT32)src.f64[0];
6562 XMM((modrm >> 3) & 0x7).i[1]=(INT32)src.f64[1];
6563 XMM((modrm >> 3) & 0x7).q[1] = 0;
6565 CYCLES(1); // TODO: correct cycle count
6568 void I386_OPS_BASE::SSEOP(lddqu_r128_m128)() // Opcode f2 0f f0
6570 UINT8 modrm = FETCH();
6571 if( modrm >= 0xc0 ) {
6572 // unsupported by cpu
6573 CYCLES(1); // TODO: correct cycle count
6575 UINT32 ea = GetEA(modrm, 0, 16);
6576 READXMM(ea, XMM((modrm >> 3) & 0x7));