From 180a2008449f84cdd6d08138aa7b87b36a622d2a Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Fri, 10 Mar 2017 12:41:33 +0000 Subject: [PATCH] Refactor the multiply-accumulate combines to act on ARMISD::ADD[CE] nodes, instead of the generic ISD::ADD[CE]. Summary: This allows for some simplification because the combines are no longer limited to just one go at the node before it gets legalized into an ARM target-specific one. Reviewers: jmolloy, rogfer01 Subscribers: aemerson, llvm-commits, rengolin Differential Revision: https://reviews.llvm.org/D30401 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297453 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 32 --------- lib/Target/ARM/ARMISelLowering.cpp | 140 +++++++++++++++++-------------------- 2 files changed, 64 insertions(+), 108 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index e44256b837a..d4bc4767e6a 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3095,38 +3095,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } case ARMISD::UMLAL:{ - // UMAAL is similar to UMLAL but it adds two 32-bit values to the - // 64-bit multiplication result. - if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && - N->getOperand(2).getOpcode() == ARMISD::ADDC && - N->getOperand(3).getOpcode() == ARMISD::ADDE) { - - SDValue Addc = N->getOperand(2); - SDValue Adde = N->getOperand(3); - - if (Adde.getOperand(2).getNode() == Addc.getNode()) { - - ConstantSDNode *Op0 = dyn_cast(Adde.getOperand(0)); - ConstantSDNode *Op1 = dyn_cast(Adde.getOperand(1)); - - if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0) - { - // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm - // RdLo = one operand to be added, lower 32-bits of res - // RdHi = other operand to be added, upper 32-bits of res - // Rn = first multiply operand - // Rm = second multiply operand - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - Addc.getOperand(0), Addc.getOperand(1), - getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32) }; - unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; - CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops); - return; - } - } - } - if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e7cfbba98b1..598ed1ae481 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -724,10 +724,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } - // ARM and Thumb2 support UMLAL/SMLAL. - if (!Subtarget->isThumb1Only()) - setTargetDAGCombine(ISD::ADDC); - if (Subtarget->isFPOnlySP()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which @@ -9465,9 +9461,8 @@ static SDValue findMUL_LOHI(SDValue V) { return SDValue(); } -static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { +static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, + TargetLowering::DAGCombinerInfo &DCI) { // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is @@ -9482,7 +9477,17 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // \ / // ADDC <- hiAdd // - assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); + assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); + + assert(AddeNode->getNumOperands() == 3 && + AddeNode->getOperand(2).getValueType() == MVT::i32 && + "ADDE node has the wrong inputs"); + + // Check that we have a glued ADDC node. + SDNode* AddcNode = AddeNode->getOperand(2).getNode(); + if (AddcNode->getOpcode() != ARMISD::ADDC) + return SDValue(); + SDValue AddcOp0 = AddcNode->getOperand(0); SDValue AddcOp1 = AddcNode->getOperand(1); @@ -9494,10 +9499,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, AddcNode->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"); - // Check that we have a glued ADDC node. - if (AddcNode->getValueType(1) != MVT::Glue) - return SDValue(); - // Check that the ADDC adds the low result of the S/UMUL_LOHI. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && AddcOp0->getOpcode() != ISD::SMUL_LOHI && @@ -9505,19 +9506,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, AddcOp1->getOpcode() != ISD::SMUL_LOHI) return SDValue(); - // Look for the glued ADDE. - SDNode* AddeNode = AddcNode->getGluedUser(); - if (!AddeNode) - return SDValue(); - - // Make sure it is really an ADDE. - if (AddeNode->getOpcode() != ISD::ADDE) - return SDValue(); - - assert(AddeNode->getNumOperands() == 3 && - AddeNode->getOperand(2).getValueType() == MVT::Glue && - "ADDE node has the wrong inputs"); - // Check for the triangle shape. SDValue AddeOp0 = AddeNode->getOperand(0); SDValue AddeOp1 = AddeNode->getOperand(1); @@ -9591,38 +9579,25 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); // Return original node to notify the driver to stop replacing. - SDValue resNode(AddcNode, 0); - return resNode; + return SDValue(AddeNode, 0); } -static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, +static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // UMAAL is similar to UMLAL except that it adds two unsigned values. // While trying to combine for the other MLAL nodes, first search for the - // chance to use UMAAL. Check if Addc uses another addc node which can first - // be combined into a UMLAL. The other pattern is AddcNode being combined - // into an UMLAL and then using another addc is handled in ISelDAGToDAG. - - if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || - (Subtarget->isThumb() && !Subtarget->hasThumb2())) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); - - SDNode *PrevAddc = nullptr; - if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) - PrevAddc = AddcNode->getOperand(0).getNode(); - else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) - PrevAddc = AddcNode->getOperand(1).getNode(); - - // If there's no addc chains, just return a search for any MLAL. - if (PrevAddc == nullptr) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); - - // Try to convert the addc operand to an MLAL and if that fails try to - // combine AddcNode. - SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); - if (MLAL != SDValue(PrevAddc, 0)) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + // chance to use UMAAL. Check if Addc uses a node which has already + // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde + // as the addend, and it's handled in PerformUMLALCombine. + + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + return AddCombineTo64bitMLAL(AddeNode, DCI); + + // Check that we have a glued ADDC node. + SDNode* AddcNode = AddeNode->getOperand(2).getNode(); + if (AddcNode->getOpcode() != ARMISD::ADDC) + return SDValue(); // Find the converted UMAAL or quit if it doesn't exist. SDNode *UmlalNode = nullptr; @@ -9634,29 +9609,18 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, UmlalNode = AddcNode->getOperand(1).getNode(); AddHi = AddcNode->getOperand(0); } else { - return SDValue(); + return AddCombineTo64bitMLAL(AddeNode, DCI); } // The ADDC should be glued to an ADDE node, which uses the same UMLAL as // the ADDC as well as Zero. - auto *Zero = dyn_cast(UmlalNode->getOperand(3)); - - if (!Zero || Zero->getZExtValue() != 0) + if (!isNullConstant(UmlalNode->getOperand(3))) return SDValue(); - // Check that we have a glued ADDC node. - if (AddcNode->getValueType(1) != MVT::Glue) - return SDValue(); - - // Look for the glued ADDE. - SDNode* AddeNode = AddcNode->getGluedUser(); - if (!AddeNode) - return SDValue(); - - if ((AddeNode->getOperand(0).getNode() == Zero && + if ((isNullConstant(AddeNode->getOperand(0)) && AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && - AddeNode->getOperand(1).getNode() == Zero)) { + isNullConstant(AddeNode->getOperand(1)))) { SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), @@ -9669,11 +9633,33 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); // Return original node to notify the driver to stop replacing. - return SDValue(AddcNode, 0); + return SDValue(AddeNode, 0); } return SDValue(); } +static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + return SDValue(); + + // Check that we have a pair of ADDC and ADDE as operands. + // Both addends of the ADDE must be zero. + SDNode* AddcNode = N->getOperand(2).getNode(); + SDNode* AddeNode = N->getOperand(3).getNode(); + if ((AddcNode->getOpcode() == ARMISD::ADDC) && + (AddeNode->getOpcode() == ARMISD::ADDE) && + isNullConstant(AddeNode->getOperand(0)) && + isNullConstant(AddeNode->getOperand(1)) && + (AddeNode->getOperand(2).getNode() == AddcNode)) + return DAG.getNode(ARMISD::UMAAL, SDLoc(N), + DAG.getVTList(MVT::i32, MVT::i32), + {N->getOperand(0), N->getOperand(1), + AddcNode->getOperand(0), AddcNode->getOperand(1)}); + else + return SDValue(); +} + static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (Subtarget->isThumb1Only()) { @@ -9698,13 +9684,15 @@ static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// PerformADDCCombine - Target-specific dag combine transform from -/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or -/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL -static SDValue PerformADDCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb1Only()) return SDValue(); +/// PerformADDECombine - Target-specific dag combine transform from +/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or +/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL +static SDValue PerformADDECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Only ARM and Thumb2 support UMLAL/SMLAL. + if (Subtarget->isThumb1Only()) + return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); // Only perform the checks after legalize when the pattern is available. if (DCI.isBeforeLegalize()) return SDValue(); @@ -11733,14 +11721,14 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); + case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); + case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); - case ARMISD::ADDE: case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); -- 2.11.0