[vectorize] Initial version of respecting PGO in the vectorizer: treat

author Chandler Carruth <chandlerc@gmail.com>

Mon, 27 Jan 2014 13:11:50 +0000 (13:11 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Mon, 27 Jan 2014 13:11:50 +0000 (13:11 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Mon, 27 Jan 2014 13:11:50 +0000 (13:11 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Mon, 27 Jan 2014 13:11:50 +0000 (13:11 +0000)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index 750fc40..5d1f85f 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -56,6 +56,7 @@
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/LoopIterator.h"
  #include "llvm/Analysis/LoopPass.h"
@@ -78,6 +79,7 @@
  #include "llvm/IR/Value.h"
  #include "llvm/IR/Verifier.h"
  #include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/PatternMatch.h"
@@ -980,18 +982,27 @@ struct LoopVectorize : public FunctionPass {
    LoopInfo *LI;
    TargetTransformInfo *TTI;
    DominatorTree *DT;
+  BlockFrequencyInfo *BFI;
    TargetLibraryInfo *TLI;
    bool DisableUnrolling;
    bool AlwaysVectorize;
  
+  BlockFrequency ColdEntryFreq;
+
    virtual bool runOnFunction(Function &F) {
      SE = &getAnalysis<ScalarEvolution>();
      DL = getAnalysisIfAvailable<DataLayout>();
      LI = &getAnalysis<LoopInfo>();
      TTI = &getAnalysis<TargetTransformInfo>();
      DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    BFI = &getAnalysis<BlockFrequencyInfo>();
      TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
  
+    // Compute some weights outside of the loop over the loops. Compute this
+    // using a BranchProbability to re-use its scaling math.
+    const BranchProbability ColdProb(1, 5); // 20%
+    ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
+
      // If the target claims to have no vector registers don't attempt
      // vectorization.
      if (!TTI->getNumberOfRegisters(true))
@@ -1064,6 +1075,13 @@ struct LoopVectorize : public FunctionPass {
      bool OptForSize =
          Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize);
  
+    // Compute the weighted frequency of this loop being executed and see if it
+    // is less than 20% of the function entry baseline frequency. Note that we
+    // always have a canonical loop here because we think we *can* vectoriez.
+    BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
+    if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
+      OptForSize = true;
+
      // Check the function attributes to see if implicit floats are allowed.a
      // FIXME: This check doesn't seem possibly correct -- what if the loop is
      // an integer loop and the vector instructions selected are purely integer
@@ -1109,6 +1127,7 @@ struct LoopVectorize : public FunctionPass {
    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
      AU.addRequiredID(LoopSimplifyID);
      AU.addRequiredID(LCSSAID);
+    AU.addRequired<BlockFrequencyInfo>();
      AU.addRequired<DominatorTreeWrapperPass>();
      AU.addRequired<LoopInfo>();
      AU.addRequired<ScalarEvolution>();
@@ -5469,6 +5488,7 @@ char LoopVectorize::ID = 0;
  static const char lv_name[] = "Loop Vectorization";
  INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
  INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  INITIALIZE_PASS_DEPENDENCY(LCSSA)
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll

index 14ac417..1d46366 100644 (file)
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -115,6 +115,31 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
    ret void
  }
  
+; N is unknown, we need a tail. Can't vectorize because the loop is cold.
+;CHECK-LABEL: @example4(
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) {
+  %1 = icmp eq i32 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph, !prof !0
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+  %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+  %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+  %2 = add nsw i32 %.05, -1
+  %3 = getelementptr inbounds i32* %.023, i64 1
+  %4 = load i32* %.023, align 16
+  %5 = getelementptr inbounds i32* %.014, i64 1
+  store i32 %4, i32* %.014, align 16
+  %6 = icmp eq i32 %2, 0
+  br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
  
  ; We can't vectorize this one because we need a runtime ptr check.
  ;CHECK-LABEL: @example23(
author	Chandler Carruth <chandlerc@gmail.com>
	Mon, 27 Jan 2014 13:11:50 +0000 (13:11 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Mon, 27 Jan 2014 13:11:50 +0000 (13:11 +0000)
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/X86/small-size.ll		patch \| blob \| history