From 04a45948a05d75ed6b0b2ab52fa2516c74f6a987 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 5 Nov 2014 06:43:02 +0000 Subject: [PATCH] Improve logic that decides if its profitable to commute when some of the virtual registers involved have uses/defs chains connecting them to physical register. Fix up the tests that this change improves. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221336 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 19 ++++++++--- test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll | 2 +- test/CodeGen/X86/fma_patterns.ll | 4 +-- test/CodeGen/X86/sse-minmax.ll | 48 ++++++++++------------------ test/CodeGen/X86/sse41.ll | 20 +++++------- test/CodeGen/X86/vec_setcc.ll | 6 ++-- test/CodeGen/X86/vector-shuffle-128-v2.ll | 12 +++---- test/CodeGen/X86/vector-shuffle-128-v4.ll | 18 ++++------- test/CodeGen/X86/vector-shuffle-combining.ll | 6 ++-- test/CodeGen/X86/vselect.ll | 3 +- 10 files changed, 57 insertions(+), 81 deletions(-) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 4ed9bf2fff5..428846a3ce2 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -545,10 +545,21 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, if (ToRegA) { unsigned FromRegB = getMappedReg(regB, SrcRegMap); unsigned FromRegC = getMappedReg(regC, SrcRegMap); - bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI); - bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI); - if (BComp != CComp) - return !BComp && CComp; + bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI); + bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI); + + // Compute if any of the following are true: + // -RegB is not tied to a register and RegC is compatible with RegA. + // -RegB is tied to the wrong physical register, but RegC is. + // -RegB is tied to the wrong physical register, and RegC isn't tied. + if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC))) + return true; + // Don't compute if any of the following are true: + // -RegC is not tied to a register and RegB is compatible with RegA. + // -RegC is tied to the wrong physical register, but RegB is. + // -RegC is tied to the wrong physical register, and RegB isn't tied. + if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB))) + return false; } // If there is a use of regC between its last def (could be livein) and this diff --git a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll index b39978b9d44..369ac96a205 100644 --- a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll +++ b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll @@ -5,7 +5,7 @@ define i32 @t(i32 %a) nounwind { ; CHECK-LABEL: t: ; CHECK: asrs [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], #31 -; CHECK: eors [[REG1]], [[REG2]] +; CHECK: eors [[REG2]], [[REG1]] %tmp0 = ashr i32 %a, 31 %tmp1 = xor i32 %tmp0, %a ret i32 %tmp1 diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll index cfb598df634..9b52db9f14e 100644 --- a/test/CodeGen/X86/fma_patterns.ll +++ b/test/CodeGen/X86/fma_patterns.ll @@ -184,7 +184,7 @@ define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) { ; CHECK: test_x86_fmadd_ps_load ; CHECK: vmovaps (%rdi), %xmm2 -; CHECK: vfmadd213ps %xmm1, %xmm0, %xmm2 +; CHECK: vfmadd213ps %xmm1, %xmm2, %xmm0 ; CHECK: ret ; CHECK_FMA4: test_x86_fmadd_ps_load ; CHECK_FMA4: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 @@ -198,7 +198,7 @@ define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 ; CHECK: test_x86_fmsub_ps_load ; CHECK: vmovaps (%rdi), %xmm2 -; CHECK: fmsub213ps %xmm1, %xmm0, %xmm2 +; CHECK: fmsub213ps %xmm1, %xmm2, %xmm0 ; CHECK: ret ; CHECK_FMA4: test_x86_fmsub_ps_load ; CHECK_FMA4: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 5122c44131a..da36a423286 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -138,8 +138,7 @@ define double @ole_inverse(double %x, double %y) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: ogt_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ogt_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -157,8 +156,7 @@ define double @ogt_x(double %x) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: olt_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: olt_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -177,8 +175,7 @@ define double @olt_x(double %x) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: ogt_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ogt_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -198,8 +195,7 @@ define double @ogt_inverse_x(double %x) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: olt_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: olt_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -217,8 +213,7 @@ define double @olt_inverse_x(double %x) nounwind { ; CHECK-NEXT: andpd ; UNSAFE-LABEL: oge_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: oge_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -235,8 +230,7 @@ define double @oge_x(double %x) nounwind { ; CHECK-NEXT: andpd ; UNSAFE-LABEL: ole_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ole_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -253,8 +247,7 @@ define double @ole_x(double %x) nounwind { ; CHECK-NEXT: andnpd ; UNSAFE-LABEL: oge_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: oge_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -271,8 +264,7 @@ define double @oge_inverse_x(double %x) nounwind { ; CHECK: cmplesd %xmm ; UNSAFE-LABEL: ole_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ole_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -412,8 +404,7 @@ define double @ule_inverse(double %x, double %y) nounwind { ; CHECK-NEXT: andpd ; UNSAFE-LABEL: ugt_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ugt_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -430,8 +421,7 @@ define double @ugt_x(double %x) nounwind { ; CHECK-NEXT: andpd ; UNSAFE-LABEL: ult_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ult_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -448,8 +438,7 @@ define double @ult_x(double %x) nounwind { ; CHECK-NEXT: andnpd ; UNSAFE-LABEL: ugt_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ugt_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -467,8 +456,7 @@ define double @ugt_inverse_x(double %x) nounwind { ; CHECK-NEXT: andnpd ; UNSAFE-LABEL: ult_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ult_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -488,8 +476,7 @@ define double @ult_inverse_x(double %x) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: uge_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: uge_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -508,8 +495,7 @@ define double @uge_x(double %x) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: ule_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ule_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -527,8 +513,7 @@ define double @ule_x(double %x) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: uge_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: minsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: uge_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -547,8 +532,7 @@ define double @uge_inverse_x(double %x) nounwind { ; CHECK-NEXT: ret ; UNSAFE-LABEL: ule_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 -; UNSAFE-NEXT: maxsd %xmm0, %xmm1 -; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE-LABEL: ule_inverse_x: ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index cbeb20e859a..6dc2286869d 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -524,17 +524,15 @@ define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) { ; X32-LABEL: shuf_X00A: ; X32: ## BB#0: ; X32-NEXT: xorps %xmm2, %xmm2 -; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] -; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],zero,zero,xmm1[0] -; X32-NEXT: movaps %xmm2, %xmm0 +; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: shuf_X00A: ; X64: ## BB#0: ; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] -; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],zero,zero,xmm1[0] -; X64-NEXT: movaps %xmm2, %xmm0 +; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; X64-NEXT: retq %vecext = extractelement <4 x float> %x, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 @@ -694,17 +692,15 @@ define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) { ; X32-LABEL: i32_shuf_X00A: ; X32: ## BB#0: ; X32-NEXT: xorps %xmm2, %xmm2 -; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] -; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[0] -; X32-NEXT: movaps %xmm2, %xmm0 +; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: i32_shuf_X00A: ; X64: ## BB#0: ; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] -; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[0] -; X64-NEXT: movaps %xmm2, %xmm0 +; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; X64-NEXT: retq %vecext = extractelement <4 x i32> %x, i32 0 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 diff --git a/test/CodeGen/X86/vec_setcc.ll b/test/CodeGen/X86/vec_setcc.ll index 322dbae0c89..b69f90cd6e2 100644 --- a/test/CodeGen/X86/vec_setcc.ll +++ b/test/CodeGen/X86/vec_setcc.ll @@ -62,8 +62,7 @@ define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone s ; SSE2-LABEL: v8i16_icmp_ule: ; SSE2: psubusw %xmm1, %xmm0 ; SSE2: pxor %xmm1, %xmm1 -; SSE2: pcmpeqw %xmm0, %xmm1 -; SSE2: movdqa %xmm1, %xmm0 +; SSE2: pcmpeqw %xmm1, %xmm0 ; SSE41-LABEL: v8i16_icmp_ule: ; SSE41: pminuw %xmm0, %xmm1 @@ -106,8 +105,7 @@ define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone s ; SSE2: pxor %xmm2, %xmm0 ; SSE2: pcmpgtd %xmm1, %xmm0 ; SSE2: pcmpeqd %xmm1, %xmm1 -; SSE2: pxor %xmm0, %xmm1 -; SSE2: movdqa %xmm1, %xmm0 +; SSE2: pxor %xmm1, %xmm0 ; SSE41-LABEL: v4i32_icmp_ule: ; SSE41: pminud %xmm0, %xmm1 diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll index 6e11c0e1079..9affee91700 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -718,8 +718,7 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { ; SSE41-LABEL: shuffle_v2i64_z1: ; SSE41: # BB#0: ; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: shuffle_v2i64_z1: @@ -806,8 +805,7 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { ; SSE41-LABEL: shuffle_v2f64_z1: ; SSE41: # BB#0: ; SSE41-NEXT: xorpd %xmm1, %xmm1 -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE41-NEXT: movapd %xmm1, %xmm0 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_z1: @@ -903,8 +901,7 @@ define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) { ; SSE41-LABEL: insert_reg_lo_v2i64: ; SSE41: # BB#0: ; SSE41-NEXT: movd %rdi, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: insert_reg_lo_v2i64: @@ -942,8 +939,7 @@ define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) { ; SSE41-LABEL: insert_mem_lo_v2i64: ; SSE41: # BB#0: ; SSE41-NEXT: movq (%rdi), %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: insert_mem_lo_v2i64: diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 019988b3762..833b8225700 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -462,8 +462,7 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) { ; SSE41-LABEL: shuffle_v4f32_4zzz: ; SSE41: # BB#0: ; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_4zzz: @@ -611,8 +610,7 @@ define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) { ; SSE41-LABEL: shuffle_v4f32_zzz7: ; SSE41: # BB#0: ; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] -; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_zzz7: @@ -684,8 +682,7 @@ define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) { ; SSE41-LABEL: shuffle_v4i32_4zzz: ; SSE41: # BB#0: ; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_4zzz: @@ -1160,8 +1157,7 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) { ; SSE41-LABEL: insert_reg_and_zero_v4f32: ; SSE41: # BB#0: ; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; SSE41-NEXT: retq ; ; AVX-LABEL: insert_reg_and_zero_v4f32: @@ -1212,8 +1208,7 @@ define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) { ; SSE41-LABEL: insert_reg_lo_v4i32: ; SSE41: # BB#0: ; SSE41-NEXT: movd %rdi, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: insert_reg_lo_v4i32: @@ -1252,8 +1247,7 @@ define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { ; SSE41-LABEL: insert_mem_lo_v4i32: ; SSE41: # BB#0: ; SSE41-NEXT: movq (%rdi), %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: insert_mem_lo_v4i32: diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll index 446812df50e..f43f4051758 100644 --- a/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/test/CodeGen/X86/vector-shuffle-combining.ll @@ -369,8 +369,7 @@ define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; SSE41: # BB#0: ; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: combine_bitwise_ops_test3b: @@ -495,8 +494,7 @@ define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; SSE41: # BB#0: ; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: combine_bitwise_ops_test6b: diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll index a98b49a688c..3bd1dc4cb97 100644 --- a/test/CodeGen/X86/vselect.ll +++ b/test/CodeGen/X86/vselect.ll @@ -107,8 +107,7 @@ define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) { ; CHECK-NEXT: movaps {{.*#+}} xmm2 = <0,65535,65535,0,u,65535,65535,u> ; CHECK-NEXT: andps %xmm2, %xmm0 ; CHECK-NEXT: andnps %xmm1, %xmm2 -; CHECK-NEXT: orps %xmm0, %xmm2 -; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: orps %xmm2, %xmm0 ; CHECK-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b ret <8 x i16> %1 -- 2.11.0