From ec283e5c485daadc9cd3ea1821d793e99b4131ce Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 12 May 2019 15:16:29 +0000 Subject: [PATCH] [X86][AVX] Split VZEXT_MOVL ymm/zmm if the upper elements are not demanded. Removes unnecessary vzeroupper noted in D61806 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360543 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++++++ test/CodeGen/X86/insertelement-zero.ll | 2 -- test/CodeGen/X86/sad.ll | 1 - 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b538694b49b..10632434c3d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33593,6 +33593,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( ExtSizeInBits = SizeInBits / 4; switch (Opc) { + // Zero upper elements. + case X86ISD::VZEXT_MOVL: { + SDLoc DL(Op); + SDValue Ext0 = + extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); + SDValue ExtOp = + TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0); + SDValue UndefVec = TLO.DAG.getUNDEF(VT); + SDValue Insert = + insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); + return TLO.CombineTo(Op, Insert); + } // Byte shifts by immediate. case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: diff --git a/test/CodeGen/X86/insertelement-zero.ll b/test/CodeGen/X86/insertelement-zero.ll index 8f4d23dba0a..0a5965053a2 100644 --- a/test/CodeGen/X86/insertelement-zero.ll +++ b/test/CodeGen/X86/insertelement-zero.ll @@ -586,8 +586,6 @@ define <8 x float> @PR41512_v8f32(float %x, float %y) { ; ; AVX-LABEL: PR41512_v8f32: ; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] ; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] diff --git a/test/CodeGen/X86/sad.ll b/test/CodeGen/X86/sad.ll index 7197c1cdaa0..1f7ec8d22be 100644 --- a/test/CodeGen/X86/sad.ll +++ b/test/CodeGen/X86/sad.ll @@ -1449,7 +1449,6 @@ define i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: sad_unroll_nonzero_initial: -- 2.11.0