[Pipeliner] Add two pragmas to control software pipelining optimization

author Brendon Cahoon <bcahoon@codeaurora.org>

Wed, 23 Jan 2019 03:26:10 +0000 (03:26 +0000)

committer Brendon Cahoon <bcahoon@codeaurora.org>

Wed, 23 Jan 2019 03:26:10 +0000 (03:26 +0000)
author Brendon Cahoon <bcahoon@codeaurora.org>
Wed, 23 Jan 2019 03:26:10 +0000 (03:26 +0000)
committer Brendon Cahoon <bcahoon@codeaurora.org>
Wed, 23 Jan 2019 03:26:10 +0000 (03:26 +0000)
diff --git a/include/llvm/CodeGen/MachinePipeliner.h b/include/llvm/CodeGen/MachinePipeliner.h

index 02e01e1..a30e4b9 100644 (file)
--- a/include/llvm/CodeGen/MachinePipeliner.h
+++ b/include/llvm/CodeGen/MachinePipeliner.h
@@ -62,6 +62,8 @@ public:
    const InstrItineraryData *InstrItins;
    const TargetInstrInfo *TII = nullptr;
    RegisterClassInfo RegClassInfo;
+  bool disabledByPragma = false;
+  unsigned II_setByPragma = 0;
  
  #ifndef NDEBUG
    static int NumTries;
@@ -99,6 +101,7 @@ private:
    bool canPipelineLoop(MachineLoop &L);
    bool scheduleLoop(MachineLoop &L);
    bool swingModuloScheduler(MachineLoop &L);
+  void setPragmaPipelineOptions(MachineLoop &L);
  };
  
  /// This class builds the dependence graph for the instructions in a loop,
@@ -107,11 +110,14 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
    MachinePipeliner &Pass;
    /// The minimum initiation interval between iterations for this schedule.
    unsigned MII = 0;
+  /// The maximum initiation interval between iterations for this schedule.
+  unsigned MAX_II = 0;
    /// Set to true if a valid pipelined schedule is found for the loop.
    bool Scheduled = false;
    MachineLoop &Loop;
    LiveIntervals &LIS;
    const RegisterClassInfo &RegClassInfo;
+  unsigned II_setByPragma = 0;
  
    /// A toplogical ordering of the SUnits, which is needed for changing
    /// dependences and iterating over the SUnits.
@@ -189,9 +195,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
  
  public:
    SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
-                    const RegisterClassInfo &rci)
+                    const RegisterClassInfo &rci, unsigned II)
        : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
-        RegClassInfo(rci), Topo(SUnits, &ExitSU) {
+        RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
      P.MF->getSubtarget().getSMSMutations(Mutations);
      if (SwpEnableCopyToPhi)
        Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
@@ -252,9 +258,6 @@ public:
      return 0;
    }
  
-  /// Set the Minimum Initiation Interval for this schedule attempt.
-  void setMII(unsigned mii) { MII = mii; }
-
    void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
  
    void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
@@ -345,6 +348,10 @@ private:
                               unsigned &OffsetPos, unsigned &NewBase,
                               int64_t &NewOffset);
    void postprocessDAG();
+  /// Set the Minimum Initiation Interval for this schedule attempt.
+  void setMII(unsigned ResMII, unsigned RecMII);
+  /// Set the Maximum Initiation Interval for this schedule attempt.
+  void setMAX_II();
  };
  
  /// A NodeSet contains a set of SUnit DAG nodes with additional information
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp

index 6e62cb5..90a54e4 100644 (file)
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -210,8 +210,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
    }
  #endif
  
-  if (!canPipelineLoop(L))
+  setPragmaPipelineOptions(L);
+  if (!canPipelineLoop(L)) {
+    LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
      return Changed;
+  }
  
    ++NumTrytoPipeline;
  
@@ -220,6 +223,50 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
    return Changed;
  }
  
+void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
+  MachineBasicBlock *LBLK = L.getTopBlock();
+
+  if (LBLK == nullptr)
+    return;
+
+  const BasicBlock *BBLK = LBLK->getBasicBlock();
+  if (BBLK == nullptr)
+    return;
+
+  const Instruction *TI = BBLK->getTerminator();
+  if (TI == nullptr)
+    return;
+
+  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
+  if (LoopID == nullptr)
+    return;
+
+  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
+  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
+
+  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+
+    if (MD == nullptr)
+      continue;
+
+    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+
+    if (S == nullptr)
+      continue;
+
+    if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
+      assert(MD->getNumOperands() == 2 &&
+             "Pipeline initiation interval hint metadata should have two operands.");
+      II_setByPragma =
+          mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+      assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
+    } else if (S->getString() == "llvm.loop.pipeline.disable") {
+      disabledByPragma = true;
+    }
+  }
+}
+
  /// Return true if the loop can be software pipelined.  The algorithm is
  /// restricted to loops with a single basic block.  Make sure that the
  /// branch in the loop can be analyzed.
@@ -227,6 +274,9 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
    if (L.getNumBlocks() != 1)
      return false;
  
+  if (disabledByPragma)
+    return false;
+
    // Check if the branch can't be understood because we can't do pipelining
    // if that's the case.
    LI.TBB = nullptr;
@@ -285,7 +335,8 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
  bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
    assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
  
-  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
+  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
+                        II_setByPragma);
  
    MachineBasicBlock *MBB = L.getHeader();
    // The kernel should not include any terminator instructions.  These
@@ -308,6 +359,20 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
    return SMS.hasNewSchedule();
  }
  
+void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
+  if (II_setByPragma > 0)
+    MII = II_setByPragma;
+  else
+    MII = std::max(ResMII, RecMII);
+}
+
+void SwingSchedulerDAG::setMAX_II() {
+  if (II_setByPragma > 0)
+    MAX_II = II_setByPragma;
+  else
+    MAX_II = MII + 10;
+}
+
  /// We override the schedule function in ScheduleDAGInstrs to implement the
  /// scheduling part of the Swing Modulo Scheduling algorithm.
  void SwingSchedulerDAG::schedule() {
@@ -334,9 +399,11 @@ void SwingSchedulerDAG::schedule() {
    if (SwpIgnoreRecMII)
      RecMII = 0;
  
-  MII = std::max(ResMII, RecMII);
-  LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
-                    << ", res=" << ResMII << ")\n");
+  setMII(ResMII, RecMII);
+  setMAX_II();
+
+  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
+                    << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
  
    // Can't schedule a loop without a valid MII.
    if (MII == 0)
@@ -1744,8 +1811,9 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
      return false;
  
    bool scheduleFound = false;
+  unsigned II = 0;
    // Keep increasing II until a valid schedule is found.
-  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
+  for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
      Schedule.reset();
      Schedule.setInitiationInterval(II);
      LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
@@ -1817,7 +1885,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
        scheduleFound = Schedule.isValidSchedule(this);
    }
  
-  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
+                    << ")\n");
  
    if (scheduleFound)
      Schedule.finalizeSchedule(this);
diff --git a/test/CodeGen/Hexagon/swp-pragma-disable.ii b/test/CodeGen/Hexagon/swp-pragma-disable.ii

new file mode 100644 (file)

index 0000000..80494f5
--- /dev/null
+++ b/test/CodeGen/Hexagon/swp-pragma-disable.ii
@@ -0,0 +1,49 @@
+; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner  \
+; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null | FileCheck %s
+; REQUIRES: asserts
+;
+; Test that checks if pipeliner disabled by pragma 
+
+; CHECK: Can not pipeline loop 
+
+; Function Attrs: nounwind
+define void @f0(i32* nocapture %a0, i32 %a1) #0 {
+b0:
+  %v0 = icmp sgt i32 %a1, 1
+  br i1 %v0, label %b1, label %b4
+
+b1:                                               ; preds = %b0
+  %v1 = load i32, i32* %a0, align 4
+  %v2 = add i32 %v1, 10
+  %v3 = getelementptr i32, i32* %a0, i32 1
+  %v4 = add i32 %a1, -1
+  br label %b2
+
+b2:                                               ; preds = %b2, %b1
+  %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ]
+  %v6 = phi i32* [ %v11, %b2 ], [ %v3, %b1 ]
+  %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ]
+  store i32 %v7, i32* %v6, align 4
+  %v8 = add i32 %v7, 10
+  %v9 = getelementptr i32, i32* %v6, i32 -1
+  store i32 %v8, i32* %v9, align 4
+  %v10 = add i32 %v7, 10
+  %v11 = getelementptr i32, i32* %v6, i32 1
+  %v12 = add i32 %v5, -1
+  %v13 = icmp eq i32 %v12, 0
+  br i1 %v13, label %b3, label %b2
+
+b3:                                               ; preds = %b2
+  br label %b4 , !llvm.loop !2
+
+b4:                                               ; preds = %b3, %b0
+  ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!2, !2, i64 0}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.pipeline.disable", i1 true}
+
diff --git a/test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii b/test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii

new file mode 100644 (file)

index 0000000..6a4ba7e
--- /dev/null
+++ b/test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii
@@ -0,0 +1,49 @@
+; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner  \
+; RUN:     -debug-only=pipeliner < %s 2>&1 > /dev/null | FileCheck %s
+; REQUIRES: asserts
+;
+; Test that checks if the II set by pragma was taken by pipeliner.
+
+; CHECK: MII = 2 MAX_II = 2 
+
+; Function Attrs: nounwind
+define void @f0(i32* nocapture %a0, i32 %a1) #0 {
+b0:
+  %v0 = icmp sgt i32 %a1, 1
+  br i1 %v0, label %b1, label %b4
+
+b1:                                               ; preds = %b0
+  %v1 = load i32, i32* %a0, align 4
+  %v2 = add i32 %v1, 10
+  %v3 = getelementptr i32, i32* %a0, i32 1
+  %v4 = add i32 %a1, -1
+  br label %b2
+
+b2:                                               ; preds = %b2, %b1
+  %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ]
+  %v6 = phi i32* [ %v11, %b2 ], [ %v3, %b1 ]
+  %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ]
+  store i32 %v7, i32* %v6, align 4
+  %v8 = add i32 %v7, 10
+  %v9 = getelementptr i32, i32* %v6, i32 -1
+  store i32 %v8, i32* %v9, align 4
+  %v10 = add i32 %v7, 10
+  %v11 = getelementptr i32, i32* %v6, i32 1
+  %v12 = add i32 %v5, -1
+  %v13 = icmp eq i32 %v12, 0
+  br i1 %v13, label %b3, label %b2
+
+b3:                                               ; preds = %b2
+  br label %b4 , !llvm.loop !2
+
+b4:                                               ; preds = %b3, %b0
+  ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!2, !2, i64 0}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.pipeline.initiationinterval", i32 2}
+
diff --git a/test/CodeGen/Hexagon/swp-resmii-1.ll b/test/CodeGen/Hexagon/swp-resmii-1.ll

index ca9bcff..1a97348 100644 (file)
--- a/test/CodeGen/Hexagon/swp-resmii-1.ll
+++ b/test/CodeGen/Hexagon/swp-resmii-1.ll
@@ -3,7 +3,7 @@
  
  ; Test that checks that we compute the correct ResMII for haar.
  
-; CHECK: MII = 4 (rec=1, res=4)
+; CHECK: MII = 4 MAX_II = 14 (rec=1, res=4)
  
  ; Function Attrs: nounwind
  define void @f0(i16* noalias nocapture readonly %a0, i32 %a1, i32 %a2, i32 %a3, i8* noalias nocapture %a4, i32 %a5) #0 {
diff --git a/test/CodeGen/Hexagon/swp-resmii.ll b/test/CodeGen/Hexagon/swp-resmii.ll

index 9cf3b1e..851d82e 100644 (file)
--- a/test/CodeGen/Hexagon/swp-resmii.ll
+++ b/test/CodeGen/Hexagon/swp-resmii.ll
@@ -4,7 +4,7 @@
  ;
  ; Test that checks if the ResMII is 1.
  
-; CHECK: MII = 1 (rec=1, res=1)
+; CHECK: MII = 1 MAX_II = 11 (rec=1, res=1)
  
  ; Function Attrs: nounwind
  define void @f0(i32* nocapture %a0, i32 %a1) #0 {
author	Brendon Cahoon <bcahoon@codeaurora.org>
	Wed, 23 Jan 2019 03:26:10 +0000 (03:26 +0000)
committer	Brendon Cahoon <bcahoon@codeaurora.org>
	Wed, 23 Jan 2019 03:26:10 +0000 (03:26 +0000)
include/llvm/CodeGen/MachinePipeliner.h		patch \| blob \| history
lib/CodeGen/MachinePipeliner.cpp		patch \| blob \| history
test/CodeGen/Hexagon/swp-pragma-disable.ii	[new file with mode: 0644]	patch \| blob
test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii	[new file with mode: 0644]	patch \| blob
test/CodeGen/Hexagon/swp-resmii-1.ll		patch \| blob \| history
test/CodeGen/Hexagon/swp-resmii.ll		patch \| blob \| history