return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
+ case X86::MOVHLPSrr:
+ case X86::UNPCKHPDrr: {
+ if (!Subtarget.hasSSE2())
+ return nullptr;
+
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
+ case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
+ }
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
+ let isCommutable = 1 in
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
PatFrag mem_frag, RegisterClass RC,
X86MemOperand x86memop, string asm,
- Domain d> {
+ Domain d, bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
def rr : PI<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
SSEPackedSingle>, PS;
defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
- SSEPackedDouble>, PD;
+ SSEPackedDouble, 1>, PD;
defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
SSEPackedSingle>, PS;
; CHECK-NEXT: movapd 0, %xmm0
; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; CHECK-NEXT: xorpd %xmm2, %xmm2
-; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
-; CHECK-NEXT: addps %xmm1, %xmm0
-; CHECK-NEXT: movaps %xmm0, 0
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; CHECK-NEXT: addps %xmm1, %xmm2
+; CHECK-NEXT: movaps %xmm2, 0
; CHECK-NEXT: retl
%tmp1 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=2]
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
; X32-LABEL: t3:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movapd (%eax), %xmm0
-; X32-NEXT: xorpd %xmm1, %xmm1
-; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: t3:
; X64: # BB#0:
-; X64-NEXT: movapd (%rdi), %xmm0
-; X64-NEXT: xorpd %xmm1, %xmm1
-; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; X64-NEXT: retq
%tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_11:
; SSE: # BB#0:
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_11:
define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: shuffle_v4f32_6723:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4f32_6723:
define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_test4:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test4:
define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test4:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test4:
define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
; SSE-LABEL: combine_undef_input_test9:
; SSE: # BB#0:
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test9:
define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_undef_input_test14:
; SSE: # BB#0:
-; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test14:
define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
; SSE-LABEL: combine_undef_input_test19:
; SSE: # BB#0:
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test19: