#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/PatternMatch.h"
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
+ BlockFrequencyInfo *BFI;
TargetLibraryInfo *TLI;
bool DisableUnrolling;
bool AlwaysVectorize;
+ BlockFrequency ColdEntryFreq;
+
virtual bool runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolution>();
DL = getAnalysisIfAvailable<DataLayout>();
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ BFI = &getAnalysis<BlockFrequencyInfo>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ // Compute some weights outside of the loop over the loops. Compute this
+ // using a BranchProbability to re-use its scaling math.
+ const BranchProbability ColdProb(1, 5); // 20%
+ ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
+
// If the target claims to have no vector registers don't attempt
// vectorization.
if (!TTI->getNumberOfRegisters(true))
bool OptForSize =
Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize);
+ // Compute the weighted frequency of this loop being executed and see if it
+ // is less than 20% of the function entry baseline frequency. Note that we
+ // always have a canonical loop here because we think we *can* vectoriez.
+ BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
+ if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
+ OptForSize = true;
+
// Check the function attributes to see if implicit floats are allowed.a
// FIXME: This check doesn't seem possibly correct -- what if the loop is
// an integer loop and the vector instructions selected are purely integer
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
+ AU.addRequired<BlockFrequencyInfo>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
ret void
}
+; N is unknown, we need a tail. Can't vectorize because the loop is cold.
+;CHECK-LABEL: @example4(
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) {
+ %1 = icmp eq i32 %n, 0
+ br i1 %1, label %._crit_edge, label %.lr.ph, !prof !0
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+ %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+ %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+ %2 = add nsw i32 %.05, -1
+ %3 = getelementptr inbounds i32* %.023, i64 1
+ %4 = load i32* %.023, align 16
+ %5 = getelementptr inbounds i32* %.014, i64 1
+ store i32 %4, i32* %.014, align 16
+ %6 = icmp eq i32 %2, 0
+ br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
; We can't vectorize this one because we need a runtime ptr check.
;CHECK-LABEL: @example23(