From de0c5e9f2563eaf26aea734b96ea8a6b566f4b5d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 8 Feb 2019 17:19:01 +0000 Subject: [PATCH] [TargetLowering] Add SimplifyDemandedBits funnel shift support git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353539 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp | 39 +++++++++++++++++++++++++++++ test/CodeGen/X86/funnel-shift.ll | 22 ++++++---------- 3 files changed, 50 insertions(+), 15 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5a933dd8be2..b462aabad2f 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7144,6 +7144,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { if (N0 == N1 && hasOperation(RotOpc, VT)) return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); + // Simplify, based on bits shifted out of N0/N1. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 550fab1bda6..c8b66d05a01 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1025,6 +1025,45 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::FSHL: + case ISD::FSHR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + bool IsFSHL = (Op.getOpcode() == ISD::FSHL); + + if (ConstantSDNode *SA = isConstOrConstSplat(Op2)) { + unsigned Amt = SA->getAPIntValue().urem(BitWidth); + + // For fshl, 0-shift returns the 1st arg. + // For fshr, 0-shift returns the 2nd arg. + if (Amt == 0) { + if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) + return true; + break; + } + + // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt)) + // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt) + APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt)); + APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt); + if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO, + Depth + 1)) + return true; + + Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known.One |= Known2.One; + Known.Zero |= Known2.Zero; + } + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast(Op.getOperand(1))->getVT(); diff --git a/test/CodeGen/X86/funnel-shift.ll b/test/CodeGen/X86/funnel-shift.ll index ab6acabded9..9feab00516b 100644 --- a/test/CodeGen/X86/funnel-shift.ll +++ b/test/CodeGen/X86/funnel-shift.ll @@ -323,19 +323,15 @@ define i7 @fshr_i7_const_fold() nounwind { define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind { ; X32-SSE2-LABEL: fshl_i32_demandedbits: ; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X32-SSE2-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: orl $1, %eax -; X32-SSE2-NEXT: shrdl $23, %ecx, %eax +; X32-SSE2-NEXT: shldl $9, %ecx, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i32_demandedbits: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: movl %esi, %eax -; X64-AVX2-NEXT: orl $-2147483648, %edi # imm = 0x80000000 -; X64-AVX2-NEXT: orl $1, %eax -; X64-AVX2-NEXT: shrdl $23, %edi, %eax +; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shldl $9, %esi, %eax ; X64-AVX2-NEXT: retq %x = or i32 %a0, 2147483648 %y = or i32 %a1, 1 @@ -346,19 +342,15 @@ define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind { define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind { ; X32-SSE2-LABEL: fshr_i32_demandedbits: ; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X32-SSE2-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: orl $1, %eax ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i32_demandedbits: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: movl %esi, %eax -; X64-AVX2-NEXT: orl $-2147483648, %edi # imm = 0x80000000 -; X64-AVX2-NEXT: orl $1, %eax -; X64-AVX2-NEXT: shrdl $9, %edi, %eax +; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shldl $23, %esi, %eax ; X64-AVX2-NEXT: retq %x = or i32 %a0, 2147483648 %y = or i32 %a1, 1 -- 2.11.0