From 0607c8df7faf71bd726e9d18bafd2f7566984e35 Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Mon, 6 Jul 2020 11:47:31 +0000 Subject: [PATCH] [PowerPC] Legalize SREM/UREM directly on P9. Summary: As Bugzilla-35090 reported, the rationale for using custom lowering SREM/UREM should no longer be true. At the IR level, the div-rem-pairs pass performs the transformation where the remainder is computed from the result of the division when both a required. We should now be able to lower these directly on P9. And the pass also fixed the problem that divide is in a different block than the remainder. This is a patch to remove redundant code and make SREM/UREM legal directly on P9. Reviewed By: lkail Differential Revision: https://reviews.llvm.org/D82145 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 32 ++++++++--------------------- llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 - llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll | 30 ++++++++++++++++----------- 3 files changed, 27 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 532e2659eae..a31b3fef2ab 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -261,15 +261,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // PowerPC has no SREM/UREM instructions unless we are on P9 // On P9 we may use a hardware instruction to compute the remainder. - // The instructions are not legalized directly because in the cases where the - // result of both the remainder and the division is required it is more - // efficient to compute the remainder from the result of the division rather - // than use the remainder instruction. + // When the result of both the remainder and the division is required it is + // more efficient to compute the remainder from the result of the division + // rather than use the remainder instruction. The instructions are legalized + // directly because the DivRemPairsPass performs the transformation at the IR + // level. if (Subtarget.isISA3_0()) { - setOperationAction(ISD::SREM, MVT::i32, Custom); - setOperationAction(ISD::UREM, MVT::i32, Custom); - setOperationAction(ISD::SREM, MVT::i64, Custom); - setOperationAction(ISD::UREM, MVT::i64, Custom); + setOperationAction(ISD::SREM, MVT::i32, Legal); + setOperationAction(ISD::UREM, MVT::i32, Legal); + setOperationAction(ISD::SREM, MVT::i64, Legal); + setOperationAction(ISD::UREM, MVT::i64, Legal); } else { setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); @@ -10492,18 +10493,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, return SDValue(); } -SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { - // Check for a DIV with the same operands as this REM. - for (auto UI : Op.getOperand(1)->uses()) { - if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || - (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) - if (UI->getOperand(0) == Op.getOperand(0) && - UI->getOperand(1) == Op.getOperand(1)) - return SDValue(); - } - return Op; -} - // Lower scalar BSWAP64 to xxbrd. SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11121,9 +11110,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); - case ISD::SREM: - case ISD::UREM: - return LowerREM(Op, DAG); case ISD::BSWAP: return LowerBSWAP(Op, DAG); case ISD::ATOMIC_CMP_SWAP: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index b3f309693e1..98256ae0c35 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1119,7 +1119,6 @@ namespace llvm { SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll index e3dcf8e5491..e99074e7f90 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll @@ -88,13 +88,16 @@ entry: store i32 %div, i32* @div_resultsw, align 4 ret void ; CHECK-LABEL: modulo_div_sw -; CHECK-NOT: modsw -; CHECK: div -; CHECK-NOT: modsw -; CHECK: mull -; CHECK-NOT: modsw -; CHECK: sub +; CHECK: modsw {{[0-9]+}}, 3, 4 ; CHECK: blr +; CHECK-DRP-LABEL: modulo_div_sw +; CHECK-DRP-NOT: modsw +; CHECK-DRP: div +; CHECK-DRP-NOT: modsw +; CHECK-DRP: mull +; CHECK-DRP-NOT: modsw +; CHECK-DRP: sub +; CHECK-DRP: blr ; CHECK-PWR8-LABEL: modulo_div_sw ; CHECK-PWR8: div ; CHECK-PWR8: mull @@ -129,13 +132,16 @@ entry: store i32 %div, i32* @div_resultuw, align 4 ret void ; CHECK-LABEL: modulo_div_uw -; CHECK-NOT: modsw -; CHECK: div -; CHECK-NOT: modsw -; CHECK: mull -; CHECK-NOT: modsw -; CHECK: sub +; CHECK: moduw {{[0-9]+}}, 3, 4 ; CHECK: blr +; CHECK-DRP-LABEL: modulo_div_uw +; CHECK-DRP-NOT: moduw +; CHECK-DRP: div +; CHECK-DRP-NOT: moduw +; CHECK-DRP: mull +; CHECK-DRP-NOT: moduw +; CHECK-DRP: sub +; CHECK-DRP: blr ; CHECK-PWR8-LABEL: modulo_div_uw ; CHECK-PWR8: div ; CHECK-PWR8: mull -- 2.11.0