From bacfc66c2eab9eadb6f4e9681ea08d1eab881863 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 13 May 2017 19:57:10 +0000 Subject: [PATCH] [SelectionDAG] Add VECTOR_SHUFFLE support to ComputeNumSignBits git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302993 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 34 +++++++++++++++++++++ test/CodeGen/X86/known-signbits-vector.ll | 50 ++++--------------------------- 2 files changed, 39 insertions(+), 45 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 453f95a697e..c5e5193421b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2855,6 +2855,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarSizeInBits(); + unsigned NumElts = DemandedElts.getBitWidth(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2898,6 +2899,39 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } return Tmp; + case ISD::VECTOR_SHUFFLE: { + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + const ShuffleVectorSDNode *SVN = cast(Op); + assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + int M = SVN->getMaskElt(i); + if (!DemandedElts[i]) + continue; + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + if (M < 0) + return 1; + if ((unsigned)M < NumElts) + DemandedLHS.setBit((unsigned)M % NumElts); + else + DemandedRHS.setBit((unsigned)M % NumElts); + } + Tmp = UINT_MAX; + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits fall-back. + if (Tmp == 1) + break; + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + case ISD::SIGN_EXTEND: case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); diff --git a/test/CodeGen/X86/known-signbits-vector.ll b/test/CodeGen/X86/known-signbits-vector.ll index b69589c2dde..60c466166b4 100644 --- a/test/CodeGen/X86/known-signbits-vector.ll +++ b/test/CodeGen/X86/known-signbits-vector.ll @@ -141,46 +141,15 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: signbits_sext_shuffle_sitofp: ; X32: # BB#0: -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $64, %esp ; X32-NEXT: vpmovsxdq %xmm0, %xmm1 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X32-NEXT: vpmovsxdq %xmm0, %xmm0 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; X32-NEXT: vpextrd $1, %xmm0, %eax -; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 -; X32-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: vpextrd $3, %xmm0, %eax -; X32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 -; X32-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X32-NEXT: vpextrd $1, %xmm0, %eax -; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 -; X32-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: vpextrd $3, %xmm0, %eax -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl {{[0-9]+}}(%esp) -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl {{[0-9]+}}(%esp) -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl {{[0-9]+}}(%esp) -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; X32-NEXT: vcvtdq2pd %xmm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: signbits_sext_shuffle_sitofp: @@ -192,17 +161,8 @@ define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) ; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] ; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 -; X64-NEXT: vpextrq $1, %xmm1, %rax -; X64-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 -; X64-NEXT: vmovq %xmm1, %rax -; X64-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; X64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X64-NEXT: vpextrq $1, %xmm0, %rax -; X64-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 -; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; X64-NEXT: vcvtdq2pd %xmm0, %ymm0 ; X64-NEXT: retq %1 = sext <4 x i32> %a0 to <4 x i64> %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> -- 2.11.0