From 77ce0e565d784065c8b3b588a02fa6bd7a985e97 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 12 Jun 2018 12:49:36 +0000 Subject: [PATCH] [SelectionDAG] Provide default expansion for rotates Implement default legalization of rotates: either in terms of the rotation in the opposite direction (if legal), or in terms of shifts and ors. Implement generating of rotate instructions for Hexagon. Hexagon only supports rotates by an immediate value, so implement custom lowering of ROTL/ROTR on Hexagon. If a rotate is not legal, use the default expansion. Differential Revision: https://reviews.llvm.org/D47725 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334497 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 40 ++++++ lib/Target/Hexagon/HexagonISelLowering.cpp | 12 ++ lib/Target/Hexagon/HexagonISelLowering.h | 1 + lib/Target/Hexagon/HexagonPatterns.td | 19 +++ test/CodeGen/Hexagon/rotate.ll | 205 +++++++++++++++++++++++++++++ test/CodeGen/Hexagon/rotl-i64.ll | 4 +- 7 files changed, 291 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/Hexagon/rotate.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 99340ca3388..da46a847490 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -568,6 +568,16 @@ namespace { /// single-use) and if missed an empty SDValue is returned. SDValue distributeTruncateThroughAnd(SDNode *N); + /// Helper function to determine whether the target supports operation + /// given by \p Opcode for type \p VT, that is, whether the operation + /// is legal or custom before legalizing operations, and whether is + /// legal (but not custom) after legalization. + bool hasOperation(unsigned Opcode, EVT VT) { + if (LegalOperations) + return TLI.isOperationLegal(Opcode, VT); + return TLI.isOperationLegalOrCustom(Opcode, VT); + } + public: /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -5050,8 +5060,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. - bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); - bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + bool HasROTL = hasOperation(ISD::ROTL, VT); + bool HasROTR = hasOperation(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; // Check for truncated rotate. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 4fe55f790d4..c98d03f70a1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3899,6 +3899,46 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { ReplaceNode(SDValue(Node, 0), Result); break; } + case ISD::ROTL: + case ISD::ROTR: { + bool IsLeft = Node->getOpcode() == ISD::ROTL; + SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); + EVT ResVT = Node->getValueType(0); + EVT OpVT = Op0.getValueType(); + assert(OpVT == ResVT && + "The result and the operand types of rotate should match"); + EVT ShVT = Op1.getValueType(); + SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT); + + // If a rotate in the other direction is legal, use it. + unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegal(RevRot, ResVT)) { + SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); + break; + } + + // Otherwise, + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) + // + assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) && + "Expecting the type bitwidth to be a power of 2"); + unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; + unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; + SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, + Width, DAG.getConstant(1, dl, ShVT)); + SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); + SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); + + SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, + DAG.getNode(ShOpc, dl, ResVT, Op0, And0), + DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); + Results.push_back(Or); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 9f75fddc316..4fe6e3a9621 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1507,6 +1507,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Subtarget-specific operation actions. // + if (Subtarget.hasV60TOps()) { + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTL, MVT::i64, Custom); + } if (Subtarget.hasV5TOps()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FADD, MVT::f64, Expand); @@ -2092,6 +2096,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { } SDValue +HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { + if (isa(Op.getOperand(1).getNode())) + return Op; + return SDValue(); +} + +SDValue HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); @@ -2792,6 +2803,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ROTL: return LowerROTL(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 6dd193231e5..3d94bd1ff6e 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -159,6 +159,7 @@ namespace HexagonISD { SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index f94ca9b1d9b..30ee2fb89f4 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -300,6 +300,7 @@ def Add: pf2; def And: pf2; def Sra: pf2; def Sub: pf2; def Or: pf2; def Srl: pf2; def Mul: pf2; def Xor: pf2; def Shl: pf2; +def Rol: pf2; // --(1) Immediate ------------------------------------------------------- // @@ -988,6 +989,10 @@ def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; +let Predicates = [HasV60T] in { + def: OpR_RI_pat; + def: OpR_RI_pat; +} def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; @@ -1033,6 +1038,20 @@ let AddedComplexity = 100 in { def: AccRRI_pat, I64, u6_0ImmPred>; def: AccRRI_pat, I64, u6_0ImmPred>; def: AccRRI_pat, I64, u6_0ImmPred>; + + let Predicates = [HasV60T] in { + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + } } let AddedComplexity = 100 in { diff --git a/test/CodeGen/Hexagon/rotate.ll b/test/CodeGen/Hexagon/rotate.ll new file mode 100644 index 00000000000..bcc978fdce6 --- /dev/null +++ b/test/CodeGen/Hexagon/rotate.ll @@ -0,0 +1,205 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: f0 +; CHECK: r0 = rol(r0,#7) +define i32 @f0(i32 %a0) #0 { +b0: + %v0 = shl i32 %a0, 7 + %v1 = lshr i32 %a0, 25 + %v2 = or i32 %v0, %v1 + ret i32 %v2 +} + +; CHECK-LABEL: f1 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate left by %a1(r1). +; CHECK: r[[R10:[0-9]+]] = sub(#32,r1) +; CHECK: r[[R11:[0-9]+]] = and(r1,#31) +; CHECK: r[[R12:[0-9]+]] = and(r[[R10]],#31) +; CHECK: r[[R13:[0-9]+]] = asl(r0,r[[R11]]) +; CHECK: r[[R13]] |= lsr(r0,r[[R12]]) +define i32 @f1(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a0, %a1 + %v1 = sub i32 32, %a1 + %v2 = lshr i32 %a0, %v1 + %v3 = or i32 %v2, %v0 + ret i32 %v3 +} + +; CHECK-LABEL: f2 +; CHECK: r0 = rol(r0,#25) +define i32 @f2(i32 %a0) #0 { +b0: + %v0 = lshr i32 %a0, 7 + %v1 = shl i32 %a0, 25 + %v2 = or i32 %v0, %v1 + ret i32 %v2 +} + +; CHECK-LABEL: f3 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate right by %a1(r1) that became a rotate left by 32-%a1. +; CHECK: r[[R30:[0-9]+]] = sub(#32,r1) +; CHECK: r[[R31:[0-9]+]] = and(r1,#31) +; CHECK: r[[R32:[0-9]+]] = and(r[[R30]],#31) +; CHECK: r[[R33:[0-9]+]] = asl(r0,r[[R32]]) +; CHECK: r[[R33]] |= lsr(r0,r[[R31]]) +define i32 @f3(i32 %a0, i32 %a1) #0 { +b0: + %v0 = lshr i32 %a0, %a1 + %v1 = sub i32 32, %a1 + %v2 = shl i32 %a0, %v1 + %v3 = or i32 %v2, %v0 + ret i32 %v3 +} + +; CHECK-LABEL: f4 +; CHECK: r1:0 = rol(r1:0,#7) +define i64 @f4(i64 %a0) #0 { +b0: + %v0 = shl i64 %a0, 7 + %v1 = lshr i64 %a0, 57 + %v2 = or i64 %v0, %v1 + ret i64 %v2 +} + +; CHECK-LABEL: f5 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate left by %a1(r2). +; CHECK: r[[R50:[0-9]+]] = sub(#64,r2) +; CHECK: r[[R51:[0-9]+]] = and(r2,#63) +; CHECK: r[[R52:[0-9]+]] = and(r[[R50]],#63) +; CHECK: r[[R53:[0-9]+]]:[[R54:[0-9]+]] = asl(r1:0,r[[R51]]) +; CHECK: r[[R53]]:[[R54]] |= lsr(r1:0,r[[R52]]) +define i64 @f5(i64 %a0, i32 %a1) #0 { +b0: + %v0 = zext i32 %a1 to i64 + %v1 = shl i64 %a0, %v0 + %v2 = sub i32 64, %a1 + %v3 = zext i32 %v2 to i64 + %v4 = lshr i64 %a0, %v3 + %v5 = or i64 %v4, %v1 + ret i64 %v5 +} + +; CHECK-LABEL: f6 +; CHECK: r1:0 = rol(r1:0,#57) +define i64 @f6(i64 %a0) #0 { +b0: + %v0 = lshr i64 %a0, 7 + %v1 = shl i64 %a0, 57 + %v2 = or i64 %v0, %v1 + ret i64 %v2 +} + +; CHECK-LABEL: f7 +; No variable-shift rotates. Check for the default expansion code. +; This is a rotate right by %a1(r2) that became a rotate left by 64-%a1. +; CHECK: r[[R70:[0-9]+]] = sub(#64,r2) +; CHECK: r[[R71:[0-9]+]] = and(r2,#63) +; CHECK: r[[R72:[0-9]+]] = and(r[[R70]],#63) +; CHECK: r[[R73:[0-9]+]]:[[R75:[0-9]+]] = asl(r1:0,r[[R72]]) +; CHECK: r[[R73]]:[[R75]] |= lsr(r1:0,r[[R71]]) +define i64 @f7(i64 %a0, i32 %a1) #0 { +b0: + %v0 = zext i32 %a1 to i64 + %v1 = lshr i64 %a0, %v0 + %v2 = sub i32 64, %a1 + %v3 = zext i32 %v2 to i64 + %v4 = shl i64 %a0, %v3 + %v5 = or i64 %v4, %v1 + ret i64 %v5 +} + +; CHECK-LABEL: f8 +; CHECK: r0 += rol(r1,#7) +define i32 @f8(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = add i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f9 +; CHECK: r0 -= rol(r1,#7) +define i32 @f9(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = sub i32 %a0, %v2 + ret i32 %v3 +} + +; CHECK-LABEL: f10 +; CHECK: r0 &= rol(r1,#7) +define i32 @f10(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = and i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f12 +; CHECK: r0 ^= rol(r1,#7) +define i32 @f12(i32 %a0, i32 %a1) #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v0, %v1 + %v3 = xor i32 %v2, %a0 + ret i32 %v3 +} + +; CHECK-LABEL: f13 +; CHECK: r1:0 += rol(r3:2,#7) +define i64 @f13(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = add i64 %v2, %a0 + ret i64 %v3 +} + +; CHECK-LABEL: f14 +; CHECK: r1:0 -= rol(r3:2,#7) +define i64 @f14(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = sub i64 %a0, %v2 + ret i64 %v3 +} + +; CHECK-LABEL: f15 +; CHECK: r1:0 &= rol(r3:2,#7) +define i64 @f15(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = and i64 %v2, %a0 + ret i64 %v3 +} + +; CHECK-LABEL: f17 +; CHECK: r1:0 ^= rol(r3:2,#7) +define i64 @f17(i64 %a0, i64 %a1) #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v0, %v1 + %v3 = xor i64 %v2, %a0 + ret i64 %v3 +} + +attributes #0 = { norecurse nounwind readnone "target-cpu"="hexagonv60" "target-features"="-packets" } diff --git a/test/CodeGen/Hexagon/rotl-i64.ll b/test/CodeGen/Hexagon/rotl-i64.ll index b9e59198c47..31e5d5c43bb 100644 --- a/test/CodeGen/Hexagon/rotl-i64.ll +++ b/test/CodeGen/Hexagon/rotl-i64.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK: asl +; CHECK: rol ; Function Attrs: nounwind define fastcc void @f0() #0 { @@ -24,7 +24,7 @@ b3: ; preds = %b3, %b2 br label %b3 } -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-cpu"="hexagonv60" } !0 = !{!1, !1, i64 0} !1 = !{!"long long", !2} -- 2.11.0