From 5702dadf6f574aa2a9dbfe1dfa44023cd37fc696 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 31 Jan 2020 18:02:21 +0000 Subject: [PATCH] [DAG] Enable ISD::INSERT_SUBVECTOR SimplifyMultipleUseDemandedBits handling This allows SimplifyDemandedBits to call SimplifyMultipleUseDemandedBits to create a simpler ISD::INSERT_SUBVECTOR, which is particularly useful for cases where we're splitting into subvectors anyhow. --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 16 +++++++++ llvm/test/CodeGen/X86/horizontal-reduce-smin.ll | 24 +++++++------- llvm/test/CodeGen/X86/insertelement-ones.ll | 3 -- llvm/test/CodeGen/X86/vector-reduce-smin.ll | 42 ++++++++++++------------ 4 files changed, 49 insertions(+), 36 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9c49f71fd1b..a095dfb795f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -947,6 +947,22 @@ bool TargetLowering::SimplifyDemandedBits( Known.One &= KnownBase.One; Known.Zero &= KnownBase.Zero; } + + // Attempt to avoid multi-use src if we don't need anything from it. + if (!DemandedBits.isAllOnesValue() || !SubElts.isAllOnesValue() || + !BaseElts.isAllOnesValue()) { + SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, SubElts, TLO.DAG, Depth + 1); + SDValue NewBase = SimplifyMultipleUseDemandedBits( + Base, DemandedBits, BaseElts, TLO.DAG, Depth + 1); + if (NewSub || NewBase) { + NewSub = NewSub ? NewSub : Sub; + NewBase = NewBase ? NewBase : Base; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewBase, NewSub, + Op.getOperand(2)); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::EXTRACT_SUBVECTOR: { diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll index df7aaf64047..4153e2ffef2 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -1118,14 +1118,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; ; X86-AVX1-LABEL: test_reduce_v8i64: ; X86-AVX1: ## %bb.0: -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 -; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 -; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 -; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 +; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 +; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -1236,14 +1236,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) { ; ; X64-AVX1-LABEL: test_reduce_v8i64: ; X64-AVX1: ## %bb.0: -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 -; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 -; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 -; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 +; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 +; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index d468c56ed04..19659ed5377 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -425,7 +425,6 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; AVX1-NEXT: movl $255, %eax ; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 @@ -437,7 +436,6 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; AVX2-NEXT: movl $255, %eax ; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 @@ -449,7 +447,6 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; AVX512-NEXT: movl $255, %eax ; AVX512-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1 ; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll index 116b2ed18ff..884b59d72c7 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll @@ -309,14 +309,14 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; ; AVX1-LABEL: test_v8i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 -; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 +; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 +; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 @@ -589,24 +589,24 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; ; AVX1-LABEL: test_v16i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm8 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm9 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm11 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 -; AVX1-NEXT: vpcmpgtq %xmm11, %xmm7, %xmm10 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm5, %xmm4, %xmm6 -; AVX1-NEXT: vblendvpd %xmm6, %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vblendvpd %xmm10, %xmm11, %xmm7, %xmm5 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm6 -; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vblendvpd %xmm8, %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm5, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm11, %xmm5, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm9 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm10 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vblendvpd %xmm10, %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm9, %xmm7, %xmm6, %xmm3 +; AVX1-NEXT: vblendvpd %xmm8, %xmm11, %xmm5, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 +; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1 +; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -- 2.11.0