From df369b21b9919ed30c10b5243202400f2c322c41 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 19 Jan 2016 23:04:56 +0000 Subject: [PATCH] [X86][SSE] Add VZEXT_MOVL target shuffle decoding. Add support for decoding VZEXT_MOVL target shuffle masks, allowing it to be used as a source in target shuffle combines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258215 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 +++++ test/CodeGen/X86/insertps-combine.ll | 8 ++------ test/CodeGen/X86/sse41.ll | 8 ++------ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fa1a5301bb7..15e6037ee0a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3939,6 +3939,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPERMI: case X86ISD::VPERMV: case X86ISD::VPERMV3: + case X86ISD::VZEXT_MOVL: return true; } } @@ -4886,6 +4887,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, DecodePSHUFLWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; + case X86ISD::VZEXT_MOVL: + DecodeZeroMoveLowMask(VT, Mask); + IsUnary = true; + break; case X86ISD::PSHUFB: { IsUnary = true; SDValue MaskNode = N->getOperand(1); diff --git a/test/CodeGen/X86/insertps-combine.ll b/test/CodeGen/X86/insertps-combine.ll index 690707b6870..78bae28762c 100644 --- a/test/CodeGen/X86/insertps-combine.ll +++ b/test/CodeGen/X86/insertps-combine.ll @@ -24,16 +24,12 @@ define <4 x float> @shuffle_v4f32_0z27(<4 x float> %x, <4 x float> %a) { define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %xyzw, <4 x float> %abcd) { ; SSE-LABEL: shuffle_v4f32_0zz4: ; SSE: # BB#0: -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_0zz4: ; AVX: # BB#0: -; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; AVX-NEXT: retq %vecext = extractelement <4 x float> %xyzw, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 0a83a9753b8..16e43f26b67 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -507,16 +507,12 @@ define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) { define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) { ; X32-LABEL: shuf_X00A: ; X32: ## BB#0: -; X32-NEXT: xorps %xmm2, %xmm2 -; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: shuf_X00A: ; X64: ## BB#0: -; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] ; X64-NEXT: retq %vecext = extractelement <4 x float> %x, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 -- 2.11.0