--- /dev/null
+//===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file defines the LoopVectorizationLegality class. Original code
+/// in Loop Vectorizer has been moved out to its own file for modularity
+/// and reusability.
+///
+/// Currently, it works for innermost loop vectorization. Extending this to
+/// outer loop vectorization is a TODO item.
+///
+/// Also provides:
+/// 1) LoopVectorizeHints class which keeps a number of loop annotations
+/// locally for easy look up. It has the ability to write them back as
+/// loop metadata, upon request.
+/// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
+/// of reporting useful failure to vectorize message.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
+#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+namespace llvm {
+
+/// Create an analysis remark that explains why vectorization failed
+///
+/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
+/// RemarkName is the identifier for the remark. If \p I is passed it is an
+/// instruction that prevents vectorization. Otherwise \p TheLoop is used for
+/// the location of the remark. \return the remark object that can be
+/// streamed to.
+OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
+ StringRef RemarkName,
+ Loop *TheLoop,
+ Instruction *I = nullptr);
+
+/// Utility class for getting and setting loop vectorizer hints in the form
+/// of loop metadata.
+/// This class keeps a number of loop annotations locally (as member variables)
+/// and can, upon request, write them back as metadata on the loop. It will
+/// initially scan the loop for existing metadata, and will update the local
+/// values based on information in the loop.
+/// We cannot write all values to metadata, as the mere presence of some info,
+/// for example 'force', means a decision has been made. So, we need to be
+/// careful NOT to add them if the user hasn't specifically asked so.
+class LoopVectorizeHints {
+ enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
+
+ /// Hint - associates name and validation with the hint value.
+ struct Hint {
+ const char *Name;
+ unsigned Value; // This may have to change for non-numeric values.
+ HintKind Kind;
+
+ Hint(const char *Name, unsigned Value, HintKind Kind)
+ : Name(Name), Value(Value), Kind(Kind) {}
+
+ bool validate(unsigned Val);
+ };
+
+ /// Vectorization width.
+ Hint Width;
+
+ /// Vectorization interleave factor.
+ Hint Interleave;
+
+ /// Vectorization forced
+ Hint Force;
+
+ /// Already Vectorized
+ Hint IsVectorized;
+
+ /// Return the loop metadata prefix.
+ static StringRef Prefix() { return "llvm.loop."; }
+
+ /// True if there is any unsafe math in the loop.
+ bool PotentiallyUnsafe = false;
+
+public:
+ enum ForceKind {
+ FK_Undefined = -1, ///< Not selected.
+ FK_Disabled = 0, ///< Forcing disabled.
+ FK_Enabled = 1, ///< Forcing enabled.
+ };
+
+ LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+ OptimizationRemarkEmitter &ORE);
+
+ /// Mark the loop L as already vectorized by setting the width to 1.
+ void setAlreadyVectorized() {
+ IsVectorized.Value = 1;
+ Hint Hints[] = {IsVectorized};
+ writeHintsToMetadata(Hints);
+ }
+
+ bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const;
+
+ /// Dumps all the hint information.
+ void emitRemarkWithHints() const;
+
+ unsigned getWidth() const { return Width.Value; }
+ unsigned getInterleave() const { return Interleave.Value; }
+ unsigned getIsVectorized() const { return IsVectorized.Value; }
+ enum ForceKind getForce() const { return (ForceKind)Force.Value; }
+
+ /// \brief If hints are provided that force vectorization, use the AlwaysPrint
+ /// pass name to force the frontend to print the diagnostic.
+ const char *vectorizeAnalysisPassName() const;
+
+ bool allowReordering() const {
+ // When enabling loop hints are provided we allow the vectorizer to change
+ // the order of operations that is given by the scalar loop. This is not
+ // enabled by default because can be unsafe or inefficient. For example,
+ // reordering floating-point operations will change the way round-off
+ // error accumulates in the loop.
+ return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+ }
+
+ bool isPotentiallyUnsafe() const {
+ // Avoid FP vectorization if the target is unsure about proper support.
+ // This may be related to the SIMD unit in the target not handling
+ // IEEE 754 FP ops properly, or bad single-to-double promotions.
+ // Otherwise, a sequence of vectorized loops, even without reduction,
+ // could lead to different end results on the destination vectors.
+ return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
+ }
+
+ void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
+
+private:
+ /// Find hints specified in the loop metadata and update local values.
+ void getHintsFromMetadata();
+
+ /// Checks string hint with one operand and set value if valid.
+ void setHint(StringRef Name, Metadata *Arg);
+
+ /// Create a new hint from name / value pair.
+ MDNode *createHintMetadata(StringRef Name, unsigned V) const;
+
+ /// Matches metadata with hint name.
+ bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
+
+ /// Sets current hints into loop metadata, keeping other values intact.
+ void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
+
+ /// The loop these hints belong to.
+ const Loop *TheLoop;
+
+ /// Interface to emit optimization remarks.
+ OptimizationRemarkEmitter &ORE;
+};
+
+/// \brief This holds vectorization requirements that must be verified late in
+/// the process. The requirements are set by legalize and costmodel. Once
+/// vectorization has been determined to be possible and profitable the
+/// requirements can be verified by looking for metadata or compiler options.
+/// For example, some loops require FP commutativity which is only allowed if
+/// vectorization is explicitly specified or if the fast-math compiler option
+/// has been provided.
+/// Late evaluation of these requirements allows helpful diagnostics to be
+/// composed that tells the user what need to be done to vectorize the loop. For
+/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
+/// evaluation should be used only when diagnostics can generated that can be
+/// followed by a non-expert user.
+class LoopVectorizationRequirements {
+public:
+ LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
+
+ void addUnsafeAlgebraInst(Instruction *I) {
+ // First unsafe algebra instruction.
+ if (!UnsafeAlgebraInst)
+ UnsafeAlgebraInst = I;
+ }
+
+ void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
+
+ bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
+
+private:
+ unsigned NumRuntimePointerChecks = 0;
+ Instruction *UnsafeAlgebraInst = nullptr;
+
+ /// Interface to emit optimization remarks.
+ OptimizationRemarkEmitter &ORE;
+};
+
+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
+/// to what vectorization factor.
+/// This class does not look at the profitability of vectorization, only the
+/// legality. This class has two main kinds of checks:
+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
+/// will change the order of memory accesses in a way that will change the
+/// correctness of the program.
+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
+/// checks for a number of different conditions, such as the availability of a
+/// single induction variable, that all types are supported and vectorize-able,
+/// etc. This code reflects the capabilities of InnerLoopVectorizer.
+/// This class is also used by InnerLoopVectorizer for identifying
+/// induction variable and the different reduction variables.
+class LoopVectorizationLegality {
+public:
+ LoopVectorizationLegality(
+ Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
+ TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
+ std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
+ OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
+ LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
+ : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
+ ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
+
+ /// ReductionList contains the reduction descriptors for all
+ /// of the reductions that were found in the loop.
+ using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
+
+ /// InductionList saves induction variables and maps them to the
+ /// induction descriptor.
+ using InductionList = MapVector<PHINode *, InductionDescriptor>;
+
+ /// RecurrenceSet contains the phi nodes that are recurrences other than
+ /// inductions and reductions.
+ using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
+
+ /// Returns true if it is legal to vectorize this loop.
+ /// This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so.
+ /// Temporarily taking UseVPlanNativePath parameter. If true, take
+ /// the new code path being implemented for outer loop vectorization
+ /// (should be functional for inner loop vectorization) based on VPlan.
+ /// If false, good old LV code.
+ bool canVectorize(bool UseVPlanNativePath);
+
+ /// Returns the primary induction variable.
+ PHINode *getPrimaryInduction() { return PrimaryInduction; }
+
+ /// Returns the reduction variables found in the loop.
+ ReductionList *getReductionVars() { return &Reductions; }
+
+ /// Returns the induction variables found in the loop.
+ InductionList *getInductionVars() { return &Inductions; }
+
+ /// Return the first-order recurrences found in the loop.
+ RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
+
+ /// Return the set of instructions to sink to handle first-order recurrences.
+ DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
+
+ /// Returns the widest induction type.
+ Type *getWidestInductionType() { return WidestIndTy; }
+
+ /// Returns True if V is a Phi node of an induction variable in this loop.
+ bool isInductionPhi(const Value *V);
+
+ /// Returns True if V is a cast that is part of an induction def-use chain,
+ /// and had been proven to be redundant under a runtime guard (in other
+ /// words, the cast has the same SCEV expression as the induction phi).
+ bool isCastedInductionVariable(const Value *V);
+
+ /// Returns True if V can be considered as an induction variable in this
+ /// loop. V can be the induction phi, or some redundant cast in the def-use
+ /// chain of the inducion phi.
+ bool isInductionVariable(const Value *V);
+
+ /// Returns True if PN is a reduction variable in this loop.
+ bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
+
+ /// Returns True if Phi is a first-order recurrence in this loop.
+ bool isFirstOrderRecurrence(const PHINode *Phi);
+
+ /// Return true if the block BB needs to be predicated in order for the loop
+ /// to be vectorized.
+ bool blockNeedsPredication(BasicBlock *BB);
+
+ /// Check if this pointer is consecutive when vectorizing. This happens
+ /// when the last index of the GEP is the induction variable, or that the
+ /// pointer itself is an induction variable.
+ /// This check allows us to vectorize A[idx] into a wide load/store.
+ /// Returns:
+ /// 0 - Stride is unknown or non-consecutive.
+ /// 1 - Address is consecutive.
+ /// -1 - Address is consecutive, and decreasing.
+ /// NOTE: This method must only be used before modifying the original scalar
+ /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
+ int isConsecutivePtr(Value *Ptr);
+
+ /// Returns true if the value V is uniform within the loop.
+ bool isUniform(Value *V);
+
+ /// Returns the information that we collected about runtime memory check.
+ const RuntimePointerChecking *getRuntimePointerChecking() const {
+ return LAI->getRuntimePointerChecking();
+ }
+
+ const LoopAccessInfo *getLAI() const { return LAI; }
+
+ unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
+
+ uint64_t getMaxSafeRegisterWidth() const {
+ return LAI->getDepChecker().getMaxSafeRegisterWidth();
+ }
+
+ bool hasStride(Value *V) { return LAI->hasStride(V); }
+
+ /// Returns true if vector representation of the instruction \p I
+ /// requires mask.
+ bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
+
+ unsigned getNumStores() const { return LAI->getNumStores(); }
+ unsigned getNumLoads() const { return LAI->getNumLoads(); }
+
+ // Returns true if the NoNaN attribute is set on the function.
+ bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
+
+private:
+ /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
+ /// its nested loops are considered legal for vectorization. These legal
+ /// checks are common for inner and outer loop vectorization.
+ /// Temporarily taking UseVPlanNativePath parameter. If true, take
+ /// the new code path being implemented for outer loop vectorization
+ /// (should be functional for inner loop vectorization) based on VPlan.
+ /// If false, good old LV code.
+ bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
+
+ /// Return true if the pre-header, exiting and latch blocks of \p Lp
+ /// (non-recursive) are considered legal for vectorization.
+ /// Temporarily taking UseVPlanNativePath parameter. If true, take
+ /// the new code path being implemented for outer loop vectorization
+ /// (should be functional for inner loop vectorization) based on VPlan.
+ /// If false, good old LV code.
+ bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
+
+ /// Check if a single basic block loop is vectorizable.
+ /// At this point we know that this is a loop with a constant trip count
+ /// and we only need to check individual instructions.
+ bool canVectorizeInstrs();
+
+ /// When we vectorize loops we may change the order in which
+ /// we read and write from memory. This method checks if it is
+ /// legal to vectorize the code, considering only memory constrains.
+ /// Returns true if the loop is vectorizable
+ bool canVectorizeMemory();
+
+ /// Return true if we can vectorize this loop using the IF-conversion
+ /// transformation.
+ bool canVectorizeWithIfConvert();
+
+ /// Return true if we can vectorize this outer loop. The method performs
+ /// specific checks for outer loop vectorization.
+ bool canVectorizeOuterLoop();
+
+ /// Return true if all of the instructions in the block can be speculatively
+ /// executed. \p SafePtrs is a list of addresses that are known to be legal
+ /// and we know that we can read from them without segfault.
+ bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
+
+ /// Updates the vectorization state by adding \p Phi to the inductions list.
+ /// This can set \p Phi as the main induction of the loop if \p Phi is a
+ /// better choice for the main induction than the existing one.
+ void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
+ SmallPtrSetImpl<Value *> &AllowedExit);
+
+ /// Create an analysis remark that explains why vectorization failed
+ ///
+ /// \p RemarkName is the identifier for the remark. If \p I is passed it is
+ /// an instruction that prevents vectorization. Otherwise the loop is used
+ /// for the location of the remark. \return the remark object that can be
+ /// streamed to.
+ OptimizationRemarkAnalysis
+ createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
+ return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
+ RemarkName, TheLoop, I);
+ }
+
+ /// \brief If an access has a symbolic strides, this maps the pointer value to
+ /// the stride symbol.
+ const ValueToValueMap *getSymbolicStrides() {
+ // FIXME: Currently, the set of symbolic strides is sometimes queried before
+ // it's collected. This happens from canVectorizeWithIfConvert, when the
+ // pointer is checked to reference consecutive elements suitable for a
+ // masked access.
+ return LAI ? &LAI->getSymbolicStrides() : nullptr;
+ }
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+
+ /// Loop Info analysis.
+ LoopInfo *LI;
+
+ /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
+ /// Applies dynamic knowledge to simplify SCEV expressions in the context
+ /// of existing SCEV assumptions. The analysis will also add a minimal set
+ /// of new predicates if this is required to enable vectorization and
+ /// unrolling.
+ PredicatedScalarEvolution &PSE;
+
+ /// Target Library Info.
+ TargetLibraryInfo *TLI;
+
+ /// Dominator Tree.
+ DominatorTree *DT;
+
+ // LoopAccess analysis.
+ std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
+
+ // And the loop-accesses info corresponding to this loop. This pointer is
+ // null until canVectorizeMemory sets it up.
+ const LoopAccessInfo *LAI = nullptr;
+
+ /// Interface to emit optimization remarks.
+ OptimizationRemarkEmitter *ORE;
+
+ // --- vectorization state --- //
+
+ /// Holds the primary induction variable. This is the counter of the
+ /// loop.
+ PHINode *PrimaryInduction = nullptr;
+
+ /// Holds the reduction variables.
+ ReductionList Reductions;
+
+ /// Holds all of the induction variables that we found in the loop.
+ /// Notice that inductions don't need to start at zero and that induction
+ /// variables can be pointers.
+ InductionList Inductions;
+
+ /// Holds all the casts that participate in the update chain of the induction
+ /// variables, and that have been proven to be redundant (possibly under a
+ /// runtime guard). These casts can be ignored when creating the vectorized
+ /// loop body.
+ SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
+
+ /// Holds the phi nodes that are first-order recurrences.
+ RecurrenceSet FirstOrderRecurrences;
+
+ /// Holds instructions that need to sink past other instructions to handle
+ /// first-order recurrences.
+ DenseMap<Instruction *, Instruction *> SinkAfter;
+
+ /// Holds the widest induction type encountered.
+ Type *WidestIndTy = nullptr;
+
+ /// Allowed outside users. This holds the induction and reduction
+ /// vars which can be accessed from outside the loop.
+ SmallPtrSet<Value *, 4> AllowedExit;
+
+ /// Can we assume the absence of NaNs.
+ bool HasFunNoNaNAttr = false;
+
+ /// Vectorization requirements that will go through late-evaluation.
+ LoopVectorizationRequirements *Requirements;
+
+ /// Used to emit an analysis of any legality issues.
+ LoopVectorizeHints *Hints;
+
+ /// The demanded bits analsyis is used to compute the minimum type size in
+ /// which a reduction can be computed.
+ DemandedBits *DB;
+
+ /// The assumption cache analysis is used to compute the minimum type size in
+ /// which a reduction can be computed.
+ AssumptionCache *AC;
+
+ /// While vectorizing these instructions we have to generate a
+ /// call to the appropriate masked intrinsic
+ SmallPtrSet<const Instruction *, 8> MaskedOp;
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
--- /dev/null
+//===- LoopVectorizationLegality.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides loop vectorization legality analysis. Original code
+// resided in LoopVectorize.cpp for a long time.
+//
+// At this point, it is implemented as a utility class, not as an analysis
+// pass. It should be easy to create an analysis pass around it if there
+// is a need (but D45420 needs to happen first).
+//
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/IntrinsicInst.h"
+
+using namespace llvm;
+
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
+static cl::opt<bool>
+ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+ cl::desc("Enable if-conversion during vectorization."));
+
+static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
+ "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum allowed number of runtime memory checks with a "
+ "vectorize(enable) pragma."));
+
+static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
+ "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed."));
+
+static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
+ "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed with a "
+ "vectorize(enable) pragma"));
+
+/// Maximum vectorization interleave count.
+static const unsigned MaxInterleaveFactor = 16;
+
+namespace llvm {
+
+OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
+ StringRef RemarkName,
+ Loop *TheLoop,
+ Instruction *I) {
+ Value *CodeRegion = TheLoop->getHeader();
+ DebugLoc DL = TheLoop->getStartLoc();
+
+ if (I) {
+ CodeRegion = I->getParent();
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ }
+
+ OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
+ R << "loop not vectorized: ";
+ return R;
+}
+
+bool LoopVectorizeHints::Hint::validate(unsigned Val) {
+ switch (Kind) {
+ case HK_WIDTH:
+ return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
+ case HK_UNROLL:
+ return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
+ case HK_FORCE:
+ return (Val <= 1);
+ case HK_ISVECTORIZED:
+ return (Val == 0 || Val == 1);
+ }
+ return false;
+}
+
+LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+ OptimizationRemarkEmitter &ORE)
+ : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
+ Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
+ Force("vectorize.enable", FK_Undefined, HK_FORCE),
+ IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
+ // Populate values with existing loop metadata.
+ getHintsFromMetadata();
+
+ // force-vector-interleave overrides DisableInterleaving.
+ if (VectorizerParams::isInterleaveForced())
+ Interleave.Value = VectorizerParams::VectorizationInterleave;
+
+ if (IsVectorized.Value != 1)
+ // If the vectorization width and interleaving count are both 1 then
+ // consider the loop to have been already vectorized because there's
+ // nothing more that we can do.
+ IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
+ DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+ << "LV: Interleaving disabled by the pass manager\n");
+}
+
+bool LoopVectorizeHints::allowVectorization(Function *F, Loop *L,
+ bool AlwaysVectorize) const {
+ if (getForce() == LoopVectorizeHints::FK_Disabled) {
+ DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ emitRemarkWithHints();
+ return false;
+ }
+
+ if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
+ DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ emitRemarkWithHints();
+ return false;
+ }
+
+ if (getIsVectorized() == 1) {
+ DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
+ // FIXME: Add interleave.disable metadata. This will allow
+ // vectorize.disable to be used without disabling the pass and errors
+ // to differentiate between disabled vectorization and a width of 1.
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
+ "AllDisabled", L->getStartLoc(),
+ L->getHeader())
+ << "loop not vectorized: vectorization and interleaving are "
+ "explicitly disabled, or the loop has already been "
+ "vectorized";
+ });
+ return false;
+ }
+
+ return true;
+}
+
+void LoopVectorizeHints::emitRemarkWithHints() const {
+ using namespace ore;
+
+ ORE.emit([&]() {
+ if (Force.Value == LoopVectorizeHints::FK_Disabled)
+ return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "loop not vectorized: vectorization is explicitly disabled";
+ else {
+ OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
+ TheLoop->getStartLoc(), TheLoop->getHeader());
+ R << "loop not vectorized";
+ if (Force.Value == LoopVectorizeHints::FK_Enabled) {
+ R << " (Force=" << NV("Force", true);
+ if (Width.Value != 0)
+ R << ", Vector Width=" << NV("VectorWidth", Width.Value);
+ if (Interleave.Value != 0)
+ R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
+ R << ")";
+ }
+ return R;
+ }
+ });
+}
+
+const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
+ if (getWidth() == 1)
+ return LV_NAME;
+ if (getForce() == LoopVectorizeHints::FK_Disabled)
+ return LV_NAME;
+ if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
+ return LV_NAME;
+ return OptimizationRemarkAnalysis::AlwaysPrint;
+}
+
+void LoopVectorizeHints::getHintsFromMetadata() {
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (!LoopID)
+ return;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ const MDString *S = nullptr;
+ SmallVector<Metadata *, 4> Args;
+
+ // The expected hint is either a MDString or a MDNode with the first
+ // operand a MDString.
+ if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
+ if (!MD || MD->getNumOperands() == 0)
+ continue;
+ S = dyn_cast<MDString>(MD->getOperand(0));
+ for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
+ Args.push_back(MD->getOperand(i));
+ } else {
+ S = dyn_cast<MDString>(LoopID->getOperand(i));
+ assert(Args.size() == 0 && "too many arguments for MDString");
+ }
+
+ if (!S)
+ continue;
+
+ // Check if the hint starts with the loop metadata prefix.
+ StringRef Name = S->getString();
+ if (Args.size() == 1)
+ setHint(Name, Args[0]);
+ }
+}
+
+void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
+ if (!Name.startswith(Prefix()))
+ return;
+ Name = Name.substr(Prefix().size(), StringRef::npos);
+
+ const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
+ if (!C)
+ return;
+ unsigned Val = C->getZExtValue();
+
+ Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
+ for (auto H : Hints) {
+ if (Name == H->Name) {
+ if (H->validate(Val))
+ H->Value = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
+ break;
+ }
+ }
+}
+
+MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name,
+ unsigned V) const {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Metadata *MDs[] = {
+ MDString::get(Context, Name),
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
+ return MDNode::get(Context, MDs);
+}
+
+bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node,
+ ArrayRef<Hint> HintTypes) {
+ MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
+ if (!Name)
+ return false;
+
+ for (auto H : HintTypes)
+ if (Name->getString().endswith(H.Name))
+ return true;
+ return false;
+}
+
+void LoopVectorizeHints::writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
+ if (HintTypes.empty())
+ return;
+
+ // Reserve the first element to LoopID (see below).
+ SmallVector<Metadata *, 4> MDs(1);
+ // If the loop already has metadata, then ignore the existing operands.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ // If node in update list, ignore old value.
+ if (!matchesHintMetadataName(Node, HintTypes))
+ MDs.push_back(Node);
+ }
+ }
+
+ // Now, add the missing hints.
+ for (auto H : HintTypes)
+ MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
+
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+
+ TheLoop->setLoopID(NewLoopID);
+}
+
+bool LoopVectorizationRequirements::doesNotMeet(
+ Function *F, Loop *L, const LoopVectorizeHints &Hints) {
+ const char *PassName = Hints.vectorizeAnalysisPassName();
+ bool Failed = false;
+ if (UnsafeAlgebraInst && !Hints.allowReordering()) {
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysisFPCommute(
+ PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
+ UnsafeAlgebraInst->getParent())
+ << "loop not vectorized: cannot prove it is safe to reorder "
+ "floating-point operations";
+ });
+ Failed = true;
+ }
+
+ // Test if runtime memcheck thresholds are exceeded.
+ bool PragmaThresholdReached =
+ NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
+ bool ThresholdReached =
+ NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
+ if ((ThresholdReached && !Hints.allowReordering()) ||
+ PragmaThresholdReached) {
+ ORE.emit([&]() {
+ return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
+ L->getStartLoc(),
+ L->getHeader())
+ << "loop not vectorized: cannot prove it is safe to reorder "
+ "memory operations";
+ });
+ DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
+ Failed = true;
+ }
+
+ return Failed;
+}
+
+// Return true if the inner loop \p Lp is uniform with regard to the outer loop
+// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
+// executing the inner loop will execute the same iterations). This check is
+// very constrained for now but it will be relaxed in the future. \p Lp is
+// considered uniform if it meets all the following conditions:
+// 1) it has a canonical IV (starting from 0 and with stride 1),
+// 2) its latch terminator is a conditional branch and,
+// 3) its latch condition is a compare instruction whose operands are the
+// canonical IV and an OuterLp invariant.
+// This check doesn't take into account the uniformity of other conditions not
+// related to the loop latch because they don't affect the loop uniformity.
+//
+// NOTE: We decided to keep all these checks and its associated documentation
+// together so that we can easily have a picture of the current supported loop
+// nests. However, some of the current checks don't depend on \p OuterLp and
+// would be redundantly executed for each \p Lp if we invoked this function for
+// different candidate outer loops. This is not the case for now because we
+// don't currently have the infrastructure to evaluate multiple candidate outer
+// loops and \p OuterLp will be a fixed parameter while we only support explicit
+// outer loop vectorization. It's also very likely that these checks go away
+// before introducing the aforementioned infrastructure. However, if this is not
+// the case, we should move the \p OuterLp independent checks to a separate
+// function that is only executed once for each \p Lp.
+static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
+ assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
+
+ // If Lp is the outer loop, it's uniform by definition.
+ if (Lp == OuterLp)
+ return true;
+ assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
+
+ // 1.
+ PHINode *IV = Lp->getCanonicalInductionVariable();
+ if (!IV) {
+ DEBUG(dbgs() << "LV: Canonical IV not found.\n");
+ return false;
+ }
+
+ // 2.
+ BasicBlock *Latch = Lp->getLoopLatch();
+ auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBr || LatchBr->isUnconditional()) {
+ DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
+ return false;
+ }
+
+ // 3.
+ auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
+ if (!LatchCmp) {
+ DEBUG(dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
+ return false;
+ }
+
+ Value *CondOp0 = LatchCmp->getOperand(0);
+ Value *CondOp1 = LatchCmp->getOperand(1);
+ Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
+ if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
+ !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
+ DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
+ return false;
+ }
+
+ return true;
+}
+
+// Return true if \p Lp and all its nested loops are uniform with regard to \p
+// OuterLp.
+static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
+ if (!isUniformLoop(Lp, OuterLp))
+ return false;
+
+ // Check if nested loops are uniform.
+ for (Loop *SubLp : *Lp)
+ if (!isUniformLoopNest(SubLp, OuterLp))
+ return false;
+
+ return true;
+}
+
+/// \brief Check whether it is safe to if-convert this phi node.
+///
+/// Phi nodes with constant expressions that can trap are not safe to if
+/// convert.
+static bool canIfConvertPHINodes(BasicBlock *BB) {
+ for (PHINode &Phi : BB->phis()) {
+ for (Value *V : Phi.incoming_values())
+ if (auto *C = dyn_cast<Constant>(V))
+ if (C->canTrap())
+ return false;
+ }
+ return true;
+}
+
+static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
+ if (Ty->isPointerTy())
+ return DL.getIntPtrType(Ty);
+
+ // It is possible that char's or short's overflow when we ask for the loop's
+ // trip count, work around this by changing the type size.
+ if (Ty->getScalarSizeInBits() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+
+ return Ty;
+}
+
+static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
+ Ty0 = convertPointerToIntegerType(DL, Ty0);
+ Ty1 = convertPointerToIntegerType(DL, Ty1);
+ if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
+ return Ty0;
+ return Ty1;
+}
+
+/// \brief Check that the instruction has outside loop users and is not an
+/// identified reduction variable.
+static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
+ SmallPtrSetImpl<Value *> &AllowedExit) {
+ // Reduction and Induction instructions are allowed to have exit users. All
+ // other instructions must not have external users.
+ if (!AllowedExit.count(Inst))
+ // Check that all of the users of the loop are inside the BB.
+ for (User *U : Inst->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ // This user may be a reduction exit value.
+ if (!TheLoop->contains(UI)) {
+ DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
+int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+ const ValueToValueMap &Strides =
+ getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
+
+ int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
+ if (Stride == 1 || Stride == -1)
+ return Stride;
+ return 0;
+}
+
+bool LoopVectorizationLegality::isUniform(Value *V) {
+ return LAI->isUniform(V);
+}
+
+bool LoopVectorizationLegality::canVectorizeOuterLoop() {
+ assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
+ // Store the result and return it at the end instead of exiting early, in case
+ // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
+ bool Result = true;
+ bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ // Check whether the BB terminator is a BranchInst. Any other terminator is
+ // not supported yet.
+ auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!Br) {
+ DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by vectorizer");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // Check whether the BranchInst is a supported one. Only unconditional
+ // branches, conditional branches with an outer loop invariant condition or
+ // backedges are supported.
+ if (Br && Br->isConditional() &&
+ !TheLoop->isLoopInvariant(Br->getCondition()) &&
+ !LI->isLoopHeader(Br->getSuccessor(0)) &&
+ !LI->isLoopHeader(Br->getSuccessor(1))) {
+ DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by vectorizer");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+ }
+
+ // Check whether inner loops are uniform. At this point, we only support
+ // simple outer loops scenarios with uniform nested loops.
+ if (!isUniformLoopNest(TheLoop /*loop nest*/,
+ TheLoop /*context outer loop*/)) {
+ DEBUG(dbgs()
+ << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by vectorizer");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ return Result;
+}
+
+void LoopVectorizationLegality::addInductionPhi(
+ PHINode *Phi, const InductionDescriptor &ID,
+ SmallPtrSetImpl<Value *> &AllowedExit) {
+ Inductions[Phi] = ID;
+
+ // In case this induction also comes with casts that we know we can ignore
+ // in the vectorized loop body, record them here. All casts could be recorded
+ // here for ignoring, but suffices to record only the first (as it is the
+ // only one that may bw used outside the cast sequence).
+ const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
+ if (!Casts.empty())
+ InductionCastsToIgnore.insert(*Casts.begin());
+
+ Type *PhiTy = Phi->getType();
+ const DataLayout &DL = Phi->getModule()->getDataLayout();
+
+ // Get the widest type.
+ if (!PhiTy->isFloatingPointTy()) {
+ if (!WidestIndTy)
+ WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
+ else
+ WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
+ }
+
+ // Int inductions are special because we only allow one IV.
+ if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
+ ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
+ isa<Constant>(ID.getStartValue()) &&
+ cast<Constant>(ID.getStartValue())->isNullValue()) {
+
+ // Use the phi node with the widest type as induction. Use the last
+ // one if there are multiple (no good reason for doing this other
+ // than it is expedient). We've checked that it begins at zero and
+ // steps by one, so this is a canonical induction variable.
+ if (!PrimaryInduction || PhiTy == WidestIndTy)
+ PrimaryInduction = Phi;
+ }
+
+ // Both the PHI node itself, and the "post-increment" value feeding
+ // back into the PHI node may have external users.
+ // We can allow those uses, except if the SCEVs we have for them rely
+ // on predicates that only hold within the loop, since allowing the exit
+ // currently means re-using this SCEV outside the loop.
+ if (PSE.getUnionPredicate().isAlwaysTrue()) {
+ AllowedExit.insert(Phi);
+ AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+ }
+
+ DEBUG(dbgs() << "LV: Found an induction variable.\n");
+}
+
+bool LoopVectorizationLegality::canVectorizeInstrs() {
+ BasicBlock *Header = TheLoop->getHeader();
+
+ // Look for the attribute signaling the absence of NaNs.
+ Function &F = *Header->getParent();
+ HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+
+ // For each block in the loop.
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ // Scan the instructions in the block and look for hazards.
+ for (Instruction &I : *BB) {
+ if (auto *Phi = dyn_cast<PHINode>(&I)) {
+ Type *PhiTy = Phi->getType();
+ // Check that this PHI type is allowed.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy()) {
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
+ << "loop control flow is not understood by vectorizer");
+ DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
+ return false;
+ }
+
+ // If this PHINode is not in the header block, then we know that we
+ // can convert it to select during if-conversion. No need to check if
+ // the PHIs in this block are induction or reduction variables.
+ if (BB != Header) {
+ // Check that this instruction has no outside users or is an
+ // identified reduction value with an outside user.
+ if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
+ continue;
+ ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
+ << "value could not be identified as "
+ "an induction or reduction variable");
+ return false;
+ }
+
+ // We only allow if-converted PHIs with exactly two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
+ << "control flow not understood by vectorizer");
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
+
+ RecurrenceDescriptor RedDes;
+ if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
+ DT)) {
+ if (RedDes.hasUnsafeAlgebra())
+ Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
+ AllowedExit.insert(RedDes.getLoopExitInstr());
+ Reductions[Phi] = RedDes;
+ continue;
+ }
+
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
+ addInductionPhi(Phi, ID, AllowedExit);
+ if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
+ Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
+ continue;
+ }
+
+ if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
+ SinkAfter, DT)) {
+ FirstOrderRecurrences.insert(Phi);
+ continue;
+ }
+
+ // As a last resort, coerce the PHI to a AddRec expression
+ // and re-try classifying it a an induction PHI.
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
+ addInductionPhi(Phi, ID, AllowedExit);
+ continue;
+ }
+
+ ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
+ << "value that could not be identified as "
+ "reduction is used outside the loop");
+ DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
+ return false;
+ } // end of PHI handling
+
+ // We handle calls that:
+ // * Are debug info intrinsics.
+ // * Have a mapping to an IR intrinsic.
+ // * Have a vector version available.
+ auto *CI = dyn_cast<CallInst>(&I);
+ if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
+ !isa<DbgInfoIntrinsic>(CI) &&
+ !(CI->getCalledFunction() && TLI &&
+ TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
+ ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
+ << "call instruction cannot be vectorized");
+ DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
+ return false;
+ }
+
+ // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
+ // second argument is the same (i.e. loop invariant)
+ if (CI && hasVectorInstrinsicScalarOpd(
+ getVectorIntrinsicIDForCall(CI, TLI), 1)) {
+ auto *SE = PSE.getSE();
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
+ ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
+ << "intrinsic instruction cannot be vectorized");
+ DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
+ return false;
+ }
+ }
+
+ // Check that the instruction return type is vectorizable.
+ // Also, we can't vectorize extractelement instructions.
+ if ((!VectorType::isValidElementType(I.getType()) &&
+ !I.getType()->isVoidTy()) ||
+ isa<ExtractElementInst>(I)) {
+ ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
+ << "instruction return type cannot be vectorized");
+ DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
+ return false;
+ }
+
+ // Check that the stored type is vectorizable.
+ if (auto *ST = dyn_cast<StoreInst>(&I)) {
+ Type *T = ST->getValueOperand()->getType();
+ if (!VectorType::isValidElementType(T)) {
+ ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
+ << "store instruction cannot be vectorized");
+ return false;
+ }
+
+ // FP instructions can allow unsafe algebra, thus vectorizable by
+ // non-IEEE-754 compliant SIMD units.
+ // This applies to floating-point math operations and calls, not memory
+ // operations, shuffles, or casts, as they don't change precision or
+ // semantics.
+ } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
+ !I.isFast()) {
+ DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
+ Hints->setPotentiallyUnsafe();
+ }
+
+ // Reduction instructions are allowed to have exit users.
+ // All other instructions must not have external users.
+ if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
+ ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
+ << "value cannot be used outside the loop");
+ return false;
+ }
+ } // next instr.
+ }
+
+ if (!PrimaryInduction) {
+ DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
+ if (Inductions.empty()) {
+ ORE->emit(createMissedAnalysis("NoInductionVariable")
+ << "loop induction variable could not be identified");
+ return false;
+ }
+ }
+
+ // Now we know the widest induction type, check if our found induction
+ // is the same size. If it's not, unset it here and InnerLoopVectorizer
+ // will create another.
+ if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
+ PrimaryInduction = nullptr;
+
+ return true;
+}
+
+bool LoopVectorizationLegality::canVectorizeMemory() {
+ LAI = &(*GetLAA)(*TheLoop);
+ const OptimizationRemarkAnalysis *LAR = LAI->getReport();
+ if (LAR) {
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
+ "loop not vectorized: ", *LAR);
+ });
+ }
+ if (!LAI->canVectorizeMemory())
+ return false;
+
+ if (LAI->hasStoreToLoopInvariantAddress()) {
+ ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
+ << "write to a loop invariant address could not be vectorized");
+ DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+ return false;
+ }
+
+ Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
+ PSE.addPredicate(LAI->getPSE().getUnionPredicate());
+
+ return true;
+}
+
+bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
+ Value *In0 = const_cast<Value *>(V);
+ PHINode *PN = dyn_cast_or_null<PHINode>(In0);
+ if (!PN)
+ return false;
+
+ return Inductions.count(PN);
+}
+
+bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
+ auto *Inst = dyn_cast<Instruction>(V);
+ return (Inst && InductionCastsToIgnore.count(Inst));
+}
+
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+ return isInductionPhi(V) || isCastedInductionVariable(V);
+}
+
+bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
+ return FirstOrderRecurrences.count(Phi);
+}
+
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
+ return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
+}
+
+bool LoopVectorizationLegality::blockCanBePredicated(
+ BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
+ const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
+ for (Instruction &I : *BB) {
+ // Check that we don't have a constant expression that can trap as operand.
+ for (Value *Operand : I.operands()) {
+ if (auto *C = dyn_cast<Constant>(Operand))
+ if (C->canTrap())
+ return false;
+ }
+ // We might be able to hoist the load.
+ if (I.mayReadFromMemory()) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ if (!LI)
+ return false;
+ if (!SafePtrs.count(LI->getPointerOperand())) {
+ // !llvm.mem.parallel_loop_access implies if-conversion safety.
+ // Otherwise, record that the load needs (real or emulated) masking
+ // and let the cost model decide.
+ if (!IsAnnotatedParallel)
+ MaskedOp.insert(LI);
+ continue;
+ }
+ }
+
+ if (I.mayWriteToMemory()) {
+ auto *SI = dyn_cast<StoreInst>(&I);
+ if (!SI)
+ return false;
+ // Predicated store requires some form of masking:
+ // 1) masked store HW instruction,
+ // 2) emulation via load-blend-store (only if safe and legal to do so,
+ // be aware on the race conditions), or
+ // 3) element-by-element predicate check and scalar store.
+ MaskedOp.insert(SI);
+ continue;
+ }
+ if (I.mayThrow())
+ return false;
+ }
+
+ return true;
+}
+
+bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
+ if (!EnableIfConversion) {
+ ORE->emit(createMissedAnalysis("IfConversionDisabled")
+ << "if-conversion is disabled");
+ return false;
+ }
+
+ assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
+
+ // A list of pointers that we can safely read and write to.
+ SmallPtrSet<Value *, 8> SafePointes;
+
+ // Collect safe addresses.
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ if (blockNeedsPredication(BB))
+ continue;
+
+ for (Instruction &I : *BB)
+ if (auto *Ptr = getLoadStorePointerOperand(&I))
+ SafePointes.insert(Ptr);
+ }
+
+ // Collect the blocks that need predication.
+ BasicBlock *Header = TheLoop->getHeader();
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ // We don't support switch statements inside loops.
+ if (!isa<BranchInst>(BB->getTerminator())) {
+ ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
+ << "loop contains a switch statement");
+ return false;
+ }
+
+ // We must be able to predicate all blocks that need to be predicated.
+ if (blockNeedsPredication(BB)) {
+ if (!blockCanBePredicated(BB, SafePointes)) {
+ ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+ << "control flow cannot be substituted for a select");
+ return false;
+ }
+ } else if (BB != Header && !canIfConvertPHINodes(BB)) {
+ ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+ << "control flow cannot be substituted for a select");
+ return false;
+ }
+ }
+
+ // We can if-convert this loop.
+ return true;
+}
+
+// Helper function to canVectorizeLoopNestCFG.
+bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
+ bool UseVPlanNativePath) {
+ assert((UseVPlanNativePath || Lp->empty()) &&
+ "VPlan-native path is not enabled.");
+
+ // TODO: ORE should be improved to show more accurate information when an
+ // outer loop can't be vectorized because a nested loop is not understood or
+ // legal. Something like: "outer_loop_location: loop not vectorized:
+ // (inner_loop_location) loop control flow is not understood by vectorizer".
+
+ // Store the result and return it at the end instead of exiting early, in case
+ // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
+ bool Result = true;
+ bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+
+ // We must have a loop in canonical form. Loops with indirectbr in them cannot
+ // be canonicalized.
+ if (!Lp->getLoopPreheader()) {
+ DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by vectorizer");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // We must have a single backedge.
+ if (Lp->getNumBackEdges() != 1) {
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by vectorizer");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // We must have a single exiting block.
+ if (!Lp->getExitingBlock()) {
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by vectorizer");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // We only handle bottom-tested loops, i.e. loop in which the condition is
+ // checked at the end of each iteration. With that we can assume that all
+ // instructions in the loop are executed the same number of times.
+ if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
+ ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by vectorizer");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ return Result;
+}
+
+bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
+ Loop *Lp, bool UseVPlanNativePath) {
+ // Store the result and return it at the end instead of exiting early, in case
+ // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
+ bool Result = true;
+ bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+ if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // Recursively check whether the loop control flow of nested loops is
+ // understood.
+ for (Loop *SubLp : *Lp)
+ if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ return Result;
+}
+
+bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
+ // Store the result and return it at the end instead of exiting early, in case
+ // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
+ bool Result = true;
+
+ bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+ // Check whether the loop-related control flow in the loop nest is expected by
+ // vectorizer.
+ if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // We need to have a loop header.
+ DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
+ << '\n');
+
+ // Specific checks for outer loops. We skip the remaining legal checks at this
+ // point because they don't support outer loops.
+ if (!TheLoop->empty()) {
+ assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
+
+ if (!canVectorizeOuterLoop()) {
+ DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
+ // TODO: Implement DoExtraAnalysis when subsequent legal checks support
+ // outer loops.
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
+ return Result;
+ }
+
+ assert(TheLoop->empty() && "Inner loop expected.");
+ // Check if we can if-convert non-single-bb loops.
+ unsigned NumBlocks = TheLoop->getNumBlocks();
+ if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
+ DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // Check if we can vectorize the instructions and CFG in this loop.
+ if (!canVectorizeInstrs()) {
+ DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // Go over each instruction and look at memory deps.
+ if (!canVectorizeMemory()) {
+ DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: We can vectorize this loop"
+ << (LAI->getRuntimePointerChecking()->Need
+ ? " (with a runtime bound check)"
+ : "")
+ << "!\n");
+
+ unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
+ if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
+ SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
+
+ if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
+ ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
+ << "Too many SCEV assumptions need to be made and checked "
+ << "at runtime");
+ DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ }
+
+ // Okay! We've done all the tests. If any have failed, return false. Otherwise
+ // we can vectorize, and at this point we don't have any other mem analysis
+ // which may limit our maximum vectorization factor, so just return true with
+ // no restrictions.
+ return Result;
+}
+
+} // namespace llvm
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
-static cl::opt<bool>
- EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
- cl::desc("Enable if-conversion during vectorization."));
-
/// Loops with a known constant trip count below this number are vectorized only
/// if no scalar iteration overheads are incurred.
static cl::opt<unsigned> TinyTripCountVectorThreshold(
"force-target-num-vector-regs", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's number of vector registers."));
-/// Maximum vectorization interleave count.
-static const unsigned MaxInterleaveFactor = 16;
-
static cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor(
"force-target-max-scalar-interleave", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's max interleave factor for "
cl::desc("The maximum interleave count to use when interleaving a scalar "
"reduction in a nested loop."));
-static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
- "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
- cl::desc("The maximum allowed number of runtime memory checks with a "
- "vectorize(enable) pragma."));
-
-static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
- "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
- cl::desc("The maximum number of SCEV checks allowed."));
-
-static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
- "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
- cl::desc("The maximum number of SCEV checks allowed with a "
- "vectorize(enable) pragma"));
-
static cl::opt<bool> EnableVPlanNativePath(
"enable-vplan-native-path", cl::init(false), cl::Hidden,
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));
-/// Create an analysis remark that explains why vectorization failed
-///
-/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
-/// RemarkName is the identifier for the remark. If \p I is passed it is an
-/// instruction that prevents vectorization. Otherwise \p TheLoop is used for
-/// the location of the remark. \return the remark object that can be
-/// streamed to.
-static OptimizationRemarkAnalysis
-createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop,
- Instruction *I = nullptr) {
- Value *CodeRegion = TheLoop->getHeader();
- DebugLoc DL = TheLoop->getStartLoc();
-
- if (I) {
- CodeRegion = I->getParent();
- // If there is no debug location attached to the instruction, revert back to
- // using the loop's.
- if (I->getDebugLoc())
- DL = I->getDebugLoc();
- }
-
- OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
- R << "loop not vectorized: ";
- return R;
-}
-
-namespace {
-
-class LoopVectorizationRequirements;
-
-} // end anonymous namespace
-
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
}
};
-/// Utility class for getting and setting loop vectorizer hints in the form
-/// of loop metadata.
-/// This class keeps a number of loop annotations locally (as member variables)
-/// and can, upon request, write them back as metadata on the loop. It will
-/// initially scan the loop for existing metadata, and will update the local
-/// values based on information in the loop.
-/// We cannot write all values to metadata, as the mere presence of some info,
-/// for example 'force', means a decision has been made. So, we need to be
-/// careful NOT to add them if the user hasn't specifically asked so.
-class LoopVectorizeHints {
- enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
-
- /// Hint - associates name and validation with the hint value.
- struct Hint {
- const char *Name;
- unsigned Value; // This may have to change for non-numeric values.
- HintKind Kind;
-
- Hint(const char *Name, unsigned Value, HintKind Kind)
- : Name(Name), Value(Value), Kind(Kind) {}
-
- bool validate(unsigned Val) {
- switch (Kind) {
- case HK_WIDTH:
- return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
- case HK_UNROLL:
- return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
- case HK_FORCE:
- return (Val <= 1);
- case HK_ISVECTORIZED:
- return (Val==0 || Val==1);
- }
- return false;
- }
- };
-
- /// Vectorization width.
- Hint Width;
-
- /// Vectorization interleave factor.
- Hint Interleave;
-
- /// Vectorization forced
- Hint Force;
-
- /// Already Vectorized
- Hint IsVectorized;
-
- /// Return the loop metadata prefix.
- static StringRef Prefix() { return "llvm.loop."; }
-
- /// True if there is any unsafe math in the loop.
- bool PotentiallyUnsafe = false;
-
-public:
- enum ForceKind {
- FK_Undefined = -1, ///< Not selected.
- FK_Disabled = 0, ///< Forcing disabled.
- FK_Enabled = 1, ///< Forcing enabled.
- };
-
- LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
- OptimizationRemarkEmitter &ORE)
- : Width("vectorize.width", VectorizerParams::VectorizationFactor,
- HK_WIDTH),
- Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
- Force("vectorize.enable", FK_Undefined, HK_FORCE),
- IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
- // Populate values with existing loop metadata.
- getHintsFromMetadata();
-
- // force-vector-interleave overrides DisableInterleaving.
- if (VectorizerParams::isInterleaveForced())
- Interleave.Value = VectorizerParams::VectorizationInterleave;
-
- if (IsVectorized.Value != 1)
- // If the vectorization width and interleaving count are both 1 then
- // consider the loop to have been already vectorized because there's
- // nothing more that we can do.
- IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
- DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
- << "LV: Interleaving disabled by the pass manager\n");
- }
-
- /// Mark the loop L as already vectorized by setting the width to 1.
- void setAlreadyVectorized() {
- IsVectorized.Value = 1;
- Hint Hints[] = {IsVectorized};
- writeHintsToMetadata(Hints);
- }
-
- bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
- if (getForce() == LoopVectorizeHints::FK_Disabled) {
- DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
- emitRemarkWithHints();
- return false;
- }
-
- if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
- DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
- emitRemarkWithHints();
- return false;
- }
-
- if (getIsVectorized() == 1) {
- DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
- // FIXME: Add interleave.disable metadata. This will allow
- // vectorize.disable to be used without disabling the pass and errors
- // to differentiate between disabled vectorization and a width of 1.
- ORE.emit([&]() {
- return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
- "AllDisabled", L->getStartLoc(),
- L->getHeader())
- << "loop not vectorized: vectorization and interleaving are "
- "explicitly disabled, or the loop has already been "
- "vectorized";
- });
- return false;
- }
-
- return true;
- }
-
- /// Dumps all the hint information.
- void emitRemarkWithHints() const {
- using namespace ore;
-
- ORE.emit([&]() {
- if (Force.Value == LoopVectorizeHints::FK_Disabled)
- return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
- TheLoop->getStartLoc(),
- TheLoop->getHeader())
- << "loop not vectorized: vectorization is explicitly disabled";
- else {
- OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
- TheLoop->getStartLoc(),
- TheLoop->getHeader());
- R << "loop not vectorized";
- if (Force.Value == LoopVectorizeHints::FK_Enabled) {
- R << " (Force=" << NV("Force", true);
- if (Width.Value != 0)
- R << ", Vector Width=" << NV("VectorWidth", Width.Value);
- if (Interleave.Value != 0)
- R << ", Interleave Count="
- << NV("InterleaveCount", Interleave.Value);
- R << ")";
- }
- return R;
- }
- });
- }
-
- unsigned getWidth() const { return Width.Value; }
- unsigned getInterleave() const { return Interleave.Value; }
- unsigned getIsVectorized() const { return IsVectorized.Value; }
- enum ForceKind getForce() const { return (ForceKind)Force.Value; }
-
- /// \brief If hints are provided that force vectorization, use the AlwaysPrint
- /// pass name to force the frontend to print the diagnostic.
- const char *vectorizeAnalysisPassName() const {
- if (getWidth() == 1)
- return LV_NAME;
- if (getForce() == LoopVectorizeHints::FK_Disabled)
- return LV_NAME;
- if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
- return LV_NAME;
- return OptimizationRemarkAnalysis::AlwaysPrint;
- }
-
- bool allowReordering() const {
- // When enabling loop hints are provided we allow the vectorizer to change
- // the order of operations that is given by the scalar loop. This is not
- // enabled by default because can be unsafe or inefficient. For example,
- // reordering floating-point operations will change the way round-off
- // error accumulates in the loop.
- return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
- }
-
- bool isPotentiallyUnsafe() const {
- // Avoid FP vectorization if the target is unsure about proper support.
- // This may be related to the SIMD unit in the target not handling
- // IEEE 754 FP ops properly, or bad single-to-double promotions.
- // Otherwise, a sequence of vectorized loops, even without reduction,
- // could lead to different end results on the destination vectors.
- return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
- }
-
- void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
-
-private:
- /// Find hints specified in the loop metadata and update local values.
- void getHintsFromMetadata() {
- MDNode *LoopID = TheLoop->getLoopID();
- if (!LoopID)
- return;
-
- // First operand should refer to the loop id itself.
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
-
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- const MDString *S = nullptr;
- SmallVector<Metadata *, 4> Args;
-
- // The expected hint is either a MDString or a MDNode with the first
- // operand a MDString.
- if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
- if (!MD || MD->getNumOperands() == 0)
- continue;
- S = dyn_cast<MDString>(MD->getOperand(0));
- for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
- Args.push_back(MD->getOperand(i));
- } else {
- S = dyn_cast<MDString>(LoopID->getOperand(i));
- assert(Args.size() == 0 && "too many arguments for MDString");
- }
-
- if (!S)
- continue;
-
- // Check if the hint starts with the loop metadata prefix.
- StringRef Name = S->getString();
- if (Args.size() == 1)
- setHint(Name, Args[0]);
- }
- }
-
- /// Checks string hint with one operand and set value if valid.
- void setHint(StringRef Name, Metadata *Arg) {
- if (!Name.startswith(Prefix()))
- return;
- Name = Name.substr(Prefix().size(), StringRef::npos);
-
- const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
- if (!C)
- return;
- unsigned Val = C->getZExtValue();
-
- Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
- for (auto H : Hints) {
- if (Name == H->Name) {
- if (H->validate(Val))
- H->Value = Val;
- else
- DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
- break;
- }
- }
- }
-
- /// Create a new hint from name / value pair.
- MDNode *createHintMetadata(StringRef Name, unsigned V) const {
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- Metadata *MDs[] = {MDString::get(Context, Name),
- ConstantAsMetadata::get(
- ConstantInt::get(Type::getInt32Ty(Context), V))};
- return MDNode::get(Context, MDs);
- }
-
- /// Matches metadata with hint name.
- bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
- MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
- if (!Name)
- return false;
-
- for (auto H : HintTypes)
- if (Name->getString().endswith(H.Name))
- return true;
- return false;
- }
-
- /// Sets current hints into loop metadata, keeping other values intact.
- void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
- if (HintTypes.empty())
- return;
-
- // Reserve the first element to LoopID (see below).
- SmallVector<Metadata *, 4> MDs(1);
- // If the loop already has metadata, then ignore the existing operands.
- MDNode *LoopID = TheLoop->getLoopID();
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
- // If node in update list, ignore old value.
- if (!matchesHintMetadataName(Node, HintTypes))
- MDs.push_back(Node);
- }
- }
-
- // Now, add the missing hints.
- for (auto H : HintTypes)
- MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
-
- // Replace current metadata node with new one.
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
-
- TheLoop->setLoopID(NewLoopID);
- }
-
- /// The loop these hints belong to.
- const Loop *TheLoop;
-
- /// Interface to emit optimization remarks.
- OptimizationRemarkEmitter &ORE;
-};
-
} // end anonymous namespace
static void emitMissedWarning(Function *F, Loop *L,
namespace llvm {
-/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
-/// to what vectorization factor.
-/// This class does not look at the profitability of vectorization, only the
-/// legality. This class has two main kinds of checks:
-/// * Memory checks - The code in canVectorizeMemory checks if vectorization
-/// will change the order of memory accesses in a way that will change the
-/// correctness of the program.
-/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
-/// checks for a number of different conditions, such as the availability of a
-/// single induction variable, that all types are supported and vectorize-able,
-/// etc. This code reflects the capabilities of InnerLoopVectorizer.
-/// This class is also used by InnerLoopVectorizer for identifying
-/// induction variable and the different reduction variables.
-class LoopVectorizationLegality {
-public:
- LoopVectorizationLegality(
- Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
- TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
- std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
- OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
- LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
- : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
- ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
-
- /// ReductionList contains the reduction descriptors for all
- /// of the reductions that were found in the loop.
- using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
-
- /// InductionList saves induction variables and maps them to the
- /// induction descriptor.
- using InductionList = MapVector<PHINode *, InductionDescriptor>;
-
- /// RecurrenceSet contains the phi nodes that are recurrences other than
- /// inductions and reductions.
- using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
-
- /// Returns true if it is legal to vectorize this loop.
- /// This does not mean that it is profitable to vectorize this
- /// loop, only that it is legal to do so.
- bool canVectorize();
-
- /// Returns the primary induction variable.
- PHINode *getPrimaryInduction() { return PrimaryInduction; }
-
- /// Returns the reduction variables found in the loop.
- ReductionList *getReductionVars() { return &Reductions; }
-
- /// Returns the induction variables found in the loop.
- InductionList *getInductionVars() { return &Inductions; }
-
- /// Return the first-order recurrences found in the loop.
- RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
-
- /// Return the set of instructions to sink to handle first-order recurrences.
- DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
-
- /// Returns the widest induction type.
- Type *getWidestInductionType() { return WidestIndTy; }
-
- /// Returns True if V is a Phi node of an induction variable in this loop.
- bool isInductionPhi(const Value *V);
-
- /// Returns True if V is a cast that is part of an induction def-use chain,
- /// and had been proven to be redundant under a runtime guard (in other
- /// words, the cast has the same SCEV expression as the induction phi).
- bool isCastedInductionVariable(const Value *V);
-
- /// Returns True if V can be considered as an induction variable in this
- /// loop. V can be the induction phi, or some redundant cast in the def-use
- /// chain of the inducion phi.
- bool isInductionVariable(const Value *V);
-
- /// Returns True if PN is a reduction variable in this loop.
- bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
-
- /// Returns True if Phi is a first-order recurrence in this loop.
- bool isFirstOrderRecurrence(const PHINode *Phi);
-
- /// Return true if the block BB needs to be predicated in order for the loop
- /// to be vectorized.
- bool blockNeedsPredication(BasicBlock *BB);
-
- /// Check if this pointer is consecutive when vectorizing. This happens
- /// when the last index of the GEP is the induction variable, or that the
- /// pointer itself is an induction variable.
- /// This check allows us to vectorize A[idx] into a wide load/store.
- /// Returns:
- /// 0 - Stride is unknown or non-consecutive.
- /// 1 - Address is consecutive.
- /// -1 - Address is consecutive, and decreasing.
- /// NOTE: This method must only be used before modifying the original scalar
- /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
- int isConsecutivePtr(Value *Ptr);
-
- /// Returns true if the value V is uniform within the loop.
- bool isUniform(Value *V);
-
- /// Returns the information that we collected about runtime memory check.
- const RuntimePointerChecking *getRuntimePointerChecking() const {
- return LAI->getRuntimePointerChecking();
- }
-
- const LoopAccessInfo *getLAI() const { return LAI; }
-
- unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
-
- uint64_t getMaxSafeRegisterWidth() const {
- return LAI->getDepChecker().getMaxSafeRegisterWidth();
- }
-
- bool hasStride(Value *V) { return LAI->hasStride(V); }
-
- /// Returns true if vector representation of the instruction \p I
- /// requires mask.
- bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
-
- unsigned getNumStores() const { return LAI->getNumStores(); }
- unsigned getNumLoads() const { return LAI->getNumLoads(); }
-
- // Returns true if the NoNaN attribute is set on the function.
- bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
-
-private:
- /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
- /// its nested loops are considered legal for vectorization. These legal
- /// checks are common for inner and outer loop vectorization.
- bool canVectorizeLoopNestCFG(Loop *Lp);
-
- /// Return true if the pre-header, exiting and latch blocks of \p Lp
- /// (non-recursive) are considered legal for vectorization.
- bool canVectorizeLoopCFG(Loop *Lp);
-
- /// Check if a single basic block loop is vectorizable.
- /// At this point we know that this is a loop with a constant trip count
- /// and we only need to check individual instructions.
- bool canVectorizeInstrs();
-
- /// When we vectorize loops we may change the order in which
- /// we read and write from memory. This method checks if it is
- /// legal to vectorize the code, considering only memory constrains.
- /// Returns true if the loop is vectorizable
- bool canVectorizeMemory();
-
- /// Return true if we can vectorize this loop using the IF-conversion
- /// transformation.
- bool canVectorizeWithIfConvert();
-
- /// Return true if we can vectorize this outer loop. The method performs
- /// specific checks for outer loop vectorization.
- bool canVectorizeOuterLoop();
-
- /// Return true if all of the instructions in the block can be speculatively
- /// executed. \p SafePtrs is a list of addresses that are known to be legal
- /// and we know that we can read from them without segfault.
- bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
-
- /// Updates the vectorization state by adding \p Phi to the inductions list.
- /// This can set \p Phi as the main induction of the loop if \p Phi is a
- /// better choice for the main induction than the existing one.
- void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
- SmallPtrSetImpl<Value *> &AllowedExit);
-
- /// Create an analysis remark that explains why vectorization failed
- ///
- /// \p RemarkName is the identifier for the remark. If \p I is passed it is
- /// an instruction that prevents vectorization. Otherwise the loop is used
- /// for the location of the remark. \return the remark object that can be
- /// streamed to.
- OptimizationRemarkAnalysis
- createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
- return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(),
- RemarkName, TheLoop, I);
- }
-
- /// \brief If an access has a symbolic strides, this maps the pointer value to
- /// the stride symbol.
- const ValueToValueMap *getSymbolicStrides() {
- // FIXME: Currently, the set of symbolic strides is sometimes queried before
- // it's collected. This happens from canVectorizeWithIfConvert, when the
- // pointer is checked to reference consecutive elements suitable for a
- // masked access.
- return LAI ? &LAI->getSymbolicStrides() : nullptr;
- }
-
- /// The loop that we evaluate.
- Loop *TheLoop;
-
- /// Loop Info analysis.
- LoopInfo *LI;
-
- /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
- /// Applies dynamic knowledge to simplify SCEV expressions in the context
- /// of existing SCEV assumptions. The analysis will also add a minimal set
- /// of new predicates if this is required to enable vectorization and
- /// unrolling.
- PredicatedScalarEvolution &PSE;
-
- /// Target Library Info.
- TargetLibraryInfo *TLI;
-
- /// Dominator Tree.
- DominatorTree *DT;
-
- // LoopAccess analysis.
- std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
-
- // And the loop-accesses info corresponding to this loop. This pointer is
- // null until canVectorizeMemory sets it up.
- const LoopAccessInfo *LAI = nullptr;
-
- /// Interface to emit optimization remarks.
- OptimizationRemarkEmitter *ORE;
-
- // --- vectorization state --- //
-
- /// Holds the primary induction variable. This is the counter of the
- /// loop.
- PHINode *PrimaryInduction = nullptr;
-
- /// Holds the reduction variables.
- ReductionList Reductions;
-
- /// Holds all of the induction variables that we found in the loop.
- /// Notice that inductions don't need to start at zero and that induction
- /// variables can be pointers.
- InductionList Inductions;
-
- /// Holds all the casts that participate in the update chain of the induction
- /// variables, and that have been proven to be redundant (possibly under a
- /// runtime guard). These casts can be ignored when creating the vectorized
- /// loop body.
- SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
-
- /// Holds the phi nodes that are first-order recurrences.
- RecurrenceSet FirstOrderRecurrences;
-
- /// Holds instructions that need to sink past other instructions to handle
- /// first-order recurrences.
- DenseMap<Instruction *, Instruction *> SinkAfter;
-
- /// Holds the widest induction type encountered.
- Type *WidestIndTy = nullptr;
-
- /// Allowed outside users. This holds the induction and reduction
- /// vars which can be accessed from outside the loop.
- SmallPtrSet<Value *, 4> AllowedExit;
-
- /// Can we assume the absence of NaNs.
- bool HasFunNoNaNAttr = false;
-
- /// Vectorization requirements that will go through late-evaluation.
- LoopVectorizationRequirements *Requirements;
-
- /// Used to emit an analysis of any legality issues.
- LoopVectorizeHints *Hints;
-
- /// The demanded bits analsyis is used to compute the minimum type size in
- /// which a reduction can be computed.
- DemandedBits *DB;
-
- /// The assumption cache analysis is used to compute the minimum type size in
- /// which a reduction can be computed.
- AssumptionCache *AC;
-
- /// While vectorizing these instructions we have to generate a
- /// call to the appropriate masked intrinsic
- SmallPtrSet<const Instruction *, 8> MaskedOp;
-};
-
/// LoopVectorizationCostModel - estimates the expected speedups due to
/// vectorization.
/// In many cases vectorization is not profitable. This can happen because of
/// \p RemarkName is the identifier for the remark. \return the remark object
/// that can be streamed to.
OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) {
- return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(),
+ return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
RemarkName, TheLoop);
}
} // end namespace llvm
-namespace {
-
-/// \brief This holds vectorization requirements that must be verified late in
-/// the process. The requirements are set by legalize and costmodel. Once
-/// vectorization has been determined to be possible and profitable the
-/// requirements can be verified by looking for metadata or compiler options.
-/// For example, some loops require FP commutativity which is only allowed if
-/// vectorization is explicitly specified or if the fast-math compiler option
-/// has been provided.
-/// Late evaluation of these requirements allows helpful diagnostics to be
-/// composed that tells the user what need to be done to vectorize the loop. For
-/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
-/// evaluation should be used only when diagnostics can generated that can be
-/// followed by a non-expert user.
-class LoopVectorizationRequirements {
-public:
- LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
-
- void addUnsafeAlgebraInst(Instruction *I) {
- // First unsafe algebra instruction.
- if (!UnsafeAlgebraInst)
- UnsafeAlgebraInst = I;
- }
-
- void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
-
- bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
- const char *PassName = Hints.vectorizeAnalysisPassName();
- bool Failed = false;
- if (UnsafeAlgebraInst && !Hints.allowReordering()) {
- ORE.emit([&]() {
- return OptimizationRemarkAnalysisFPCommute(
- PassName, "CantReorderFPOps",
- UnsafeAlgebraInst->getDebugLoc(),
- UnsafeAlgebraInst->getParent())
- << "loop not vectorized: cannot prove it is safe to reorder "
- "floating-point operations";
- });
- Failed = true;
- }
-
- // Test if runtime memcheck thresholds are exceeded.
- bool PragmaThresholdReached =
- NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
- bool ThresholdReached =
- NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
- if ((ThresholdReached && !Hints.allowReordering()) ||
- PragmaThresholdReached) {
- ORE.emit([&]() {
- return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
- L->getStartLoc(),
- L->getHeader())
- << "loop not vectorized: cannot prove it is safe to reorder "
- "memory operations";
- });
- DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
- Failed = true;
- }
-
- return Failed;
- }
-
-private:
- unsigned NumRuntimePointerChecks = 0;
- Instruction *UnsafeAlgebraInst = nullptr;
-
- /// Interface to emit optimization remarks.
- OptimizationRemarkEmitter &ORE;
-};
-
-} // end anonymous namespace
-
// Return true if \p OuterLp is an outer loop annotated with hints for explicit
// vectorization. The loop needs to be annotated with #pragma omp simd
// simdlen(#) or #pragma clang vectorize(enable) vectorize_width(#). If the
}
}
-int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
- const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() :
- ValueToValueMap();
-
- int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
- if (Stride == 1 || Stride == -1)
- return Stride;
- return 0;
-}
-
-bool LoopVectorizationLegality::isUniform(Value *V) {
- return LAI->isUniform(V);
-}
-
Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
assert(V != Induction && "The new induction variable should not be used.");
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
}
-/// \brief Check whether it is safe to if-convert this phi node.
-///
-/// Phi nodes with constant expressions that can trap are not safe to if
-/// convert.
-static bool canIfConvertPHINodes(BasicBlock *BB) {
- for (PHINode &Phi : BB->phis()) {
- for (Value *V : Phi.incoming_values())
- if (auto *C = dyn_cast<Constant>(V))
- if (C->canTrap())
- return false;
- }
- return true;
-}
-
-bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
- if (!EnableIfConversion) {
- ORE->emit(createMissedAnalysis("IfConversionDisabled")
- << "if-conversion is disabled");
- return false;
- }
-
- assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
-
- // A list of pointers that we can safely read and write to.
- SmallPtrSet<Value *, 8> SafePointes;
-
- // Collect safe addresses.
- for (BasicBlock *BB : TheLoop->blocks()) {
- if (blockNeedsPredication(BB))
- continue;
-
- for (Instruction &I : *BB)
- if (auto *Ptr = getLoadStorePointerOperand(&I))
- SafePointes.insert(Ptr);
- }
-
- // Collect the blocks that need predication.
- BasicBlock *Header = TheLoop->getHeader();
- for (BasicBlock *BB : TheLoop->blocks()) {
- // We don't support switch statements inside loops.
- if (!isa<BranchInst>(BB->getTerminator())) {
- ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
- << "loop contains a switch statement");
- return false;
- }
-
- // We must be able to predicate all blocks that need to be predicated.
- if (blockNeedsPredication(BB)) {
- if (!blockCanBePredicated(BB, SafePointes)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
- return false;
- }
- } else if (BB != Header && !canIfConvertPHINodes(BB)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
- return false;
- }
- }
-
- // We can if-convert this loop.
- return true;
-}
-
-// Helper function to canVectorizeLoopNestCFG.
-bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp) {
- assert((EnableVPlanNativePath || Lp->empty()) &&
- "VPlan-native path is not enabled.");
-
- // TODO: ORE should be improved to show more accurate information when an
- // outer loop can't be vectorized because a nested loop is not understood or
- // legal. Something like: "outer_loop_location: loop not vectorized:
- // (inner_loop_location) loop control flow is not understood by vectorizer".
-
- // Store the result and return it at the end instead of exiting early, in case
- // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
- bool Result = true;
- bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
-
- // We must have a loop in canonical form. Loops with indirectbr in them cannot
- // be canonicalized.
- if (!Lp->getLoopPreheader()) {
- DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // We must have a single backedge.
- if (Lp->getNumBackEdges() != 1) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // We must have a single exiting block.
- if (!Lp->getExitingBlock()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // We only handle bottom-tested loops, i.e. loop in which the condition is
- // checked at the end of each iteration. With that we can assume that all
- // instructions in the loop are executed the same number of times.
- if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- return Result;
-}
-
-bool LoopVectorizationLegality::canVectorizeLoopNestCFG(Loop *Lp) {
- // Store the result and return it at the end instead of exiting early, in case
- // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
- bool Result = true;
- bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
- if (!canVectorizeLoopCFG(Lp)) {
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // Recursively check whether the loop control flow of nested loops is
- // understood.
- for (Loop *SubLp : *Lp)
- if (!canVectorizeLoopNestCFG(SubLp)) {
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- return Result;
-}
-
-bool LoopVectorizationLegality::canVectorize() {
- // Store the result and return it at the end instead of exiting early, in case
- // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
- bool Result = true;
-
- bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
- // Check whether the loop-related control flow in the loop nest is expected by
- // vectorizer.
- if (!canVectorizeLoopNestCFG(TheLoop)) {
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // We need to have a loop header.
- DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
- << '\n');
-
- // Specific checks for outer loops. We skip the remaining legal checks at this
- // point because they don't support outer loops.
- if (!TheLoop->empty()) {
- assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
-
- if (!canVectorizeOuterLoop()) {
- DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
- // TODO: Implement DoExtraAnalysis when subsequent legal checks support
- // outer loops.
- return false;
- }
-
- DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
- return Result;
- }
-
- assert(TheLoop->empty() && "Inner loop expected.");
- // Check if we can if-convert non-single-bb loops.
- unsigned NumBlocks = TheLoop->getNumBlocks();
- if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
- DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // Check if we can vectorize the instructions and CFG in this loop.
- if (!canVectorizeInstrs()) {
- DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // Go over each instruction and look at memory deps.
- if (!canVectorizeMemory()) {
- DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- DEBUG(dbgs() << "LV: We can vectorize this loop"
- << (LAI->getRuntimePointerChecking()->Need
- ? " (with a runtime bound check)"
- : "")
- << "!\n");
-
- unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
- if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
- SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
-
- if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
- ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
- << "Too many SCEV assumptions need to be made and checked "
- << "at runtime");
- DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // Okay! We've done all the tests. If any have failed, return false. Otherwise
- // we can vectorize, and at this point we don't have any other mem analysis
- // which may limit our maximum vectorization factor, so just return true with
- // no restrictions.
- return Result;
-}
-
-// Return true if the inner loop \p Lp is uniform with regard to the outer loop
-// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
-// executing the inner loop will execute the same iterations). This check is
-// very constrained for now but it will be relaxed in the future. \p Lp is
-// considered uniform if it meets all the following conditions:
-// 1) it has a canonical IV (starting from 0 and with stride 1),
-// 2) its latch terminator is a conditional branch and,
-// 3) its latch condition is a compare instruction whose operands are the
-// canonical IV and an OuterLp invariant.
-// This check doesn't take into account the uniformity of other conditions not
-// related to the loop latch because they don't affect the loop uniformity.
-//
-// NOTE: We decided to keep all these checks and its associated documentation
-// together so that we can easily have a picture of the current supported loop
-// nests. However, some of the current checks don't depend on \p OuterLp and
-// would be redundantly executed for each \p Lp if we invoked this function for
-// different candidate outer loops. This is not the case for now because we
-// don't currently have the infrastructure to evaluate multiple candidate outer
-// loops and \p OuterLp will be a fixed parameter while we only support explicit
-// outer loop vectorization. It's also very likely that these checks go away
-// before introducing the aforementioned infrastructure. However, if this is not
-// the case, we should move the \p OuterLp independent checks to a separate
-// function that is only executed once for each \p Lp.
-static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
- assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
-
- // If Lp is the outer loop, it's uniform by definition.
- if (Lp == OuterLp)
- return true;
- assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
-
- // 1.
- PHINode *IV = Lp->getCanonicalInductionVariable();
- if (!IV) {
- DEBUG(dbgs() << "LV: Canonical IV not found.\n");
- return false;
- }
-
- // 2.
- BasicBlock *Latch = Lp->getLoopLatch();
- auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!LatchBr || LatchBr->isUnconditional()) {
- DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
- return false;
- }
-
- // 3.
- auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
- if (!LatchCmp) {
- DEBUG(dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
- return false;
- }
-
- Value *CondOp0 = LatchCmp->getOperand(0);
- Value *CondOp1 = LatchCmp->getOperand(1);
- Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
- if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
- !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
- DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
- return false;
- }
-
- return true;
-}
-
-// Return true if \p Lp and all its nested loops are uniform with regard to \p
-// OuterLp.
-static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
- if (!isUniformLoop(Lp, OuterLp))
- return false;
-
- // Check if nested loops are uniform.
- for (Loop *SubLp : *Lp)
- if (!isUniformLoopNest(SubLp, OuterLp))
- return false;
-
- return true;
-}
-
-bool LoopVectorizationLegality::canVectorizeOuterLoop() {
- assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
- // Store the result and return it at the end instead of exiting early, in case
- // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
- bool Result = true;
- bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
-
- for (BasicBlock *BB : TheLoop->blocks()) {
- // Check whether the BB terminator is a BranchInst. Any other terminator is
- // not supported yet.
- auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
- if (!Br) {
- DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- // Check whether the BranchInst is a supported one. Only unconditional
- // branches, conditional branches with an outer loop invariant condition or
- // backedges are supported.
- if (Br && Br->isConditional() &&
- !TheLoop->isLoopInvariant(Br->getCondition()) &&
- !LI->isLoopHeader(Br->getSuccessor(0)) &&
- !LI->isLoopHeader(Br->getSuccessor(1))) {
- DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
- }
-
- // Check whether inner loops are uniform. At this point, we only support
- // simple outer loops scenarios with uniform nested loops.
- if (!isUniformLoopNest(TheLoop /*loop nest*/,
- TheLoop /*context outer loop*/)) {
- DEBUG(dbgs()
- << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
-
- return Result;
-}
-
-static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
- if (Ty->isPointerTy())
- return DL.getIntPtrType(Ty);
-
- // It is possible that char's or short's overflow when we ask for the loop's
- // trip count, work around this by changing the type size.
- if (Ty->getScalarSizeInBits() < 32)
- return Type::getInt32Ty(Ty->getContext());
-
- return Ty;
-}
-
-static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
- Ty0 = convertPointerToIntegerType(DL, Ty0);
- Ty1 = convertPointerToIntegerType(DL, Ty1);
- if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
- return Ty0;
- return Ty1;
-}
-
-/// \brief Check that the instruction has outside loop users and is not an
-/// identified reduction variable.
-static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
- SmallPtrSetImpl<Value *> &AllowedExit) {
- // Reduction and Induction instructions are allowed to have exit users. All
- // other instructions must not have external users.
- if (!AllowedExit.count(Inst))
- // Check that all of the users of the loop are inside the BB.
- for (User *U : Inst->users()) {
- Instruction *UI = cast<Instruction>(U);
- // This user may be a reduction exit value.
- if (!TheLoop->contains(UI)) {
- DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
- return true;
- }
- }
- return false;
-}
-
-void LoopVectorizationLegality::addInductionPhi(
- PHINode *Phi, const InductionDescriptor &ID,
- SmallPtrSetImpl<Value *> &AllowedExit) {
- Inductions[Phi] = ID;
-
- // In case this induction also comes with casts that we know we can ignore
- // in the vectorized loop body, record them here. All casts could be recorded
- // here for ignoring, but suffices to record only the first (as it is the
- // only one that may bw used outside the cast sequence).
- const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
- if (!Casts.empty())
- InductionCastsToIgnore.insert(*Casts.begin());
-
- Type *PhiTy = Phi->getType();
- const DataLayout &DL = Phi->getModule()->getDataLayout();
-
- // Get the widest type.
- if (!PhiTy->isFloatingPointTy()) {
- if (!WidestIndTy)
- WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
- else
- WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
- }
-
- // Int inductions are special because we only allow one IV.
- if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
- ID.getConstIntStepValue() &&
- ID.getConstIntStepValue()->isOne() &&
- isa<Constant>(ID.getStartValue()) &&
- cast<Constant>(ID.getStartValue())->isNullValue()) {
-
- // Use the phi node with the widest type as induction. Use the last
- // one if there are multiple (no good reason for doing this other
- // than it is expedient). We've checked that it begins at zero and
- // steps by one, so this is a canonical induction variable.
- if (!PrimaryInduction || PhiTy == WidestIndTy)
- PrimaryInduction = Phi;
- }
-
- // Both the PHI node itself, and the "post-increment" value feeding
- // back into the PHI node may have external users.
- // We can allow those uses, except if the SCEVs we have for them rely
- // on predicates that only hold within the loop, since allowing the exit
- // currently means re-using this SCEV outside the loop.
- if (PSE.getUnionPredicate().isAlwaysTrue()) {
- AllowedExit.insert(Phi);
- AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
- }
-
- DEBUG(dbgs() << "LV: Found an induction variable.\n");
-}
-
-bool LoopVectorizationLegality::canVectorizeInstrs() {
- BasicBlock *Header = TheLoop->getHeader();
-
- // Look for the attribute signaling the absence of NaNs.
- Function &F = *Header->getParent();
- HasFunNoNaNAttr =
- F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-
- // For each block in the loop.
- for (BasicBlock *BB : TheLoop->blocks()) {
- // Scan the instructions in the block and look for hazards.
- for (Instruction &I : *BB) {
- if (auto *Phi = dyn_cast<PHINode>(&I)) {
- Type *PhiTy = Phi->getType();
- // Check that this PHI type is allowed.
- if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
- !PhiTy->isPointerTy()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
- << "loop control flow is not understood by vectorizer");
- DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
- return false;
- }
-
- // If this PHINode is not in the header block, then we know that we
- // can convert it to select during if-conversion. No need to check if
- // the PHIs in this block are induction or reduction variables.
- if (BB != Header) {
- // Check that this instruction has no outside users or is an
- // identified reduction value with an outside user.
- if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
- continue;
- ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
- << "value could not be identified as "
- "an induction or reduction variable");
- return false;
- }
-
- // We only allow if-converted PHIs with exactly two incoming values.
- if (Phi->getNumIncomingValues() != 2) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
- << "control flow not understood by vectorizer");
- DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
- return false;
- }
-
- RecurrenceDescriptor RedDes;
- if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
- DT)) {
- if (RedDes.hasUnsafeAlgebra())
- Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
- AllowedExit.insert(RedDes.getLoopExitInstr());
- Reductions[Phi] = RedDes;
- continue;
- }
-
- InductionDescriptor ID;
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
- addInductionPhi(Phi, ID, AllowedExit);
- if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
- Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
- continue;
- }
-
- if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
- SinkAfter, DT)) {
- FirstOrderRecurrences.insert(Phi);
- continue;
- }
-
- // As a last resort, coerce the PHI to a AddRec expression
- // and re-try classifying it a an induction PHI.
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
- addInductionPhi(Phi, ID, AllowedExit);
- continue;
- }
-
- ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
- << "value that could not be identified as "
- "reduction is used outside the loop");
- DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
- return false;
- } // end of PHI handling
-
- // We handle calls that:
- // * Are debug info intrinsics.
- // * Have a mapping to an IR intrinsic.
- // * Have a vector version available.
- auto *CI = dyn_cast<CallInst>(&I);
- if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
- !isa<DbgInfoIntrinsic>(CI) &&
- !(CI->getCalledFunction() && TLI &&
- TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
- ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
- << "call instruction cannot be vectorized");
- DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
- return false;
- }
-
- // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
- // second argument is the same (i.e. loop invariant)
- if (CI && hasVectorInstrinsicScalarOpd(
- getVectorIntrinsicIDForCall(CI, TLI), 1)) {
- auto *SE = PSE.getSE();
- if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
- ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
- << "intrinsic instruction cannot be vectorized");
- DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
- return false;
- }
- }
-
- // Check that the instruction return type is vectorizable.
- // Also, we can't vectorize extractelement instructions.
- if ((!VectorType::isValidElementType(I.getType()) &&
- !I.getType()->isVoidTy()) ||
- isa<ExtractElementInst>(I)) {
- ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
- << "instruction return type cannot be vectorized");
- DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
- return false;
- }
-
- // Check that the stored type is vectorizable.
- if (auto *ST = dyn_cast<StoreInst>(&I)) {
- Type *T = ST->getValueOperand()->getType();
- if (!VectorType::isValidElementType(T)) {
- ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
- << "store instruction cannot be vectorized");
- return false;
- }
-
- // FP instructions can allow unsafe algebra, thus vectorizable by
- // non-IEEE-754 compliant SIMD units.
- // This applies to floating-point math operations and calls, not memory
- // operations, shuffles, or casts, as they don't change precision or
- // semantics.
- } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
- !I.isFast()) {
- DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
- Hints->setPotentiallyUnsafe();
- }
-
- // Reduction instructions are allowed to have exit users.
- // All other instructions must not have external users.
- if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
- ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
- << "value cannot be used outside the loop");
- return false;
- }
- } // next instr.
- }
-
- if (!PrimaryInduction) {
- DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- if (Inductions.empty()) {
- ORE->emit(createMissedAnalysis("NoInductionVariable")
- << "loop induction variable could not be identified");
- return false;
- }
- }
-
- // Now we know the widest induction type, check if our found induction
- // is the same size. If it's not, unset it here and InnerLoopVectorizer
- // will create another.
- if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
- PrimaryInduction = nullptr;
-
- return true;
-}
-
void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
Uniforms[VF].insert(Worklist.begin(), Worklist.end());
}
-bool LoopVectorizationLegality::canVectorizeMemory() {
- LAI = &(*GetLAA)(*TheLoop);
- const OptimizationRemarkAnalysis *LAR = LAI->getReport();
- if (LAR) {
- ORE->emit([&]() {
- return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
- "loop not vectorized: ", *LAR);
- });
- }
- if (!LAI->canVectorizeMemory())
- return false;
-
- if (LAI->hasStoreToLoopInvariantAddress()) {
- ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
- << "write to a loop invariant address could not be vectorized");
- DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
- return false;
- }
-
- Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
- PSE.addPredicate(LAI->getPSE().getUnionPredicate());
-
- return true;
-}
-
-bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
- Value *In0 = const_cast<Value *>(V);
- PHINode *PN = dyn_cast_or_null<PHINode>(In0);
- if (!PN)
- return false;
-
- return Inductions.count(PN);
-}
-
-bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
- auto *Inst = dyn_cast<Instruction>(V);
- return (Inst && InductionCastsToIgnore.count(Inst));
-}
-
-bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
- return isInductionPhi(V) || isCastedInductionVariable(V);
-}
-
-bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
- return FirstOrderRecurrences.count(Phi);
-}
-
-bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
- return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
-}
-
-bool LoopVectorizationLegality::blockCanBePredicated(
- BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
- const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
-
- for (Instruction &I : *BB) {
- // Check that we don't have a constant expression that can trap as operand.
- for (Value *Operand : I.operands()) {
- if (auto *C = dyn_cast<Constant>(Operand))
- if (C->canTrap())
- return false;
- }
- // We might be able to hoist the load.
- if (I.mayReadFromMemory()) {
- auto *LI = dyn_cast<LoadInst>(&I);
- if (!LI)
- return false;
- if (!SafePtrs.count(LI->getPointerOperand())) {
- // !llvm.mem.parallel_loop_access implies if-conversion safety.
- // Otherwise, record that the load needs (real or emulated) masking
- // and let the cost model decide.
- if (!IsAnnotatedParallel)
- MaskedOp.insert(LI);
- continue;
- }
- }
-
- if (I.mayWriteToMemory()) {
- auto *SI = dyn_cast<StoreInst>(&I);
- if (!SI)
- return false;
- // Predicated store requires some form of masking:
- // 1) masked store HW instruction,
- // 2) emulation via load-blend-store (only if safe and legal to do so,
- // be aware on the race conditions), or
- // 3) element-by-element predicate check and scalar store.
- MaskedOp.insert(SI);
- continue;
- }
- if (I.mayThrow())
- return false;
- }
-
- return true;
-}
-
void InterleavedAccessInfo::collectConstStrideAccesses(
MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
const ValueToValueMap &Strides) {
LoopVectorizationRequirements Requirements(*ORE);
LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE,
&Requirements, &Hints, DB, AC);
- if (!LVL.canVectorize()) {
+ if (!LVL.canVectorize(EnableVPlanNativePath)) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints, ORE);
return false;
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
- ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(),
- "NoImplicitFloat", L)
+ ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(),
+ "NoImplicitFloat", L)
<< "loop not vectorized due to NoImplicitFloat attribute");
emitMissedWarning(F, L, Hints, ORE);
return false;
TTI->isFPVectorizationPotentiallyUnsafe()) {
DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
ORE->emit(
- createMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
+ createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
<< "loop not vectorized due to unsafe FP support.");
emitMissedWarning(F, L, Hints, ORE);
return false;