//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantFolder.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TimeValue.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
#ifndef NDEBUG
// We only use this for a debug check.
cl::Hidden);
namespace {
-/// \brief A custom IRBuilder inserter which prefixes all names, but only in
+
+/// A custom IRBuilder inserter which prefixes all names, but only in
/// Assert builds.
class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter {
std::string Prefix;
+
const Twine getNameWithPrefix(const Twine &Name) const {
return Name.isTriviallyEmpty() ? Name : Prefix + Name;
}
}
};
-/// \brief Provide a typedef for IRBuilder that drops names in release builds.
-using IRBuilderTy = llvm::IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
-}
+/// Provide a type for IRBuilder that drops names in release builds.
+using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;
-namespace {
-/// \brief A used slice of an alloca.
+/// A used slice of an alloca.
///
/// This structure represents a slice of an alloca used by some instruction. It
/// stores both the begin and end offsets of this use, a pointer to the use
/// itself, and a flag indicating whether we can classify the use as splittable
/// or not when forming partitions of the alloca.
class Slice {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+ /// The beginning offset of the range.
+ uint64_t BeginOffset = 0;
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+ /// The ending offset, not included in the range.
+ uint64_t EndOffset = 0;
- /// \brief Storage for both the use of this slice and whether it can be
+ /// Storage for both the use of this slice and whether it can be
/// split.
PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
public:
- Slice() : BeginOffset(), EndOffset() {}
+ Slice() = default;
+
Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
: BeginOffset(BeginOffset), EndOffset(EndOffset),
UseAndIsSplittable(U, IsSplittable) {}
bool isDead() const { return getUse() == nullptr; }
void kill() { UseAndIsSplittable.setPointer(nullptr); }
- /// \brief Support for ordering ranges.
+ /// Support for ordering ranges.
///
/// This provides an ordering over ranges such that start offsets are
/// always increasing, and within equal start offsets, the end offsets are
return false;
}
- /// \brief Support comparison with a single offset to allow binary searches.
+ /// Support comparison with a single offset to allow binary searches.
friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
uint64_t RHSOffset) {
return LHS.beginOffset() < RHSOffset;
}
bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
};
+
} // end anonymous namespace
namespace llvm {
+
template <typename T> struct isPodLike;
template <> struct isPodLike<Slice> { static const bool value = true; };
-}
-/// \brief Representation of the alloca slices.
+} // end namespace llvm
+
+/// Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
/// various uses. If a pointer escapes, we can't fully build a representation
/// starting at a particular offset before splittable slices.
class llvm::sroa::AllocaSlices {
public:
- /// \brief Construct the slices of a particular alloca.
+ /// Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
- /// \brief Test whether a pointer to the allocation escapes our analysis.
+ /// Test whether a pointer to the allocation escapes our analysis.
///
/// If this is true, the slices are never fully built and should be
/// ignored.
bool isEscaped() const { return PointerEscapingInstr; }
- /// \brief Support for iterating over the slices.
+ /// Support for iterating over the slices.
/// @{
- typedef SmallVectorImpl<Slice>::iterator iterator;
- typedef iterator_range<iterator> range;
+ using iterator = SmallVectorImpl<Slice>::iterator;
+ using range = iterator_range<iterator>;
+
iterator begin() { return Slices.begin(); }
iterator end() { return Slices.end(); }
- typedef SmallVectorImpl<Slice>::const_iterator const_iterator;
- typedef iterator_range<const_iterator> const_range;
+ using const_iterator = SmallVectorImpl<Slice>::const_iterator;
+ using const_range = iterator_range<const_iterator>;
+
const_iterator begin() const { return Slices.begin(); }
const_iterator end() const { return Slices.end(); }
/// @}
- /// \brief Erase a range of slices.
+ /// Erase a range of slices.
void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
- /// \brief Insert new slices for this alloca.
+ /// Insert new slices for this alloca.
///
/// This moves the slices into the alloca's slices collection, and re-sorts
/// everything so that the usual ordering properties of the alloca's slices
int OldSize = Slices.size();
Slices.append(NewSlices.begin(), NewSlices.end());
auto SliceI = Slices.begin() + OldSize;
- std::sort(SliceI, Slices.end());
+ llvm::sort(SliceI, Slices.end());
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
}
class partition_iterator;
iterator_range<partition_iterator> partitions();
- /// \brief Access the dead users for this alloca.
+ /// Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
- /// \brief Access the dead operands referring to this alloca.
+ /// Access the dead operands referring to this alloca.
///
/// These are operands which have cannot actually be used to refer to the
/// alloca as they are outside its range and the user doesn't correct for
private:
template <typename DerivedT, typename RetT = void> class BuilderBase;
class SliceBuilder;
+
friend class AllocaSlices::SliceBuilder;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// \brief Handle to alloca instruction to simplify method interfaces.
+ /// Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
#endif
- /// \brief The instruction responsible for this alloca not having a known set
+ /// The instruction responsible for this alloca not having a known set
/// of slices.
///
/// When an instruction (potentially) escapes the pointer to the alloca, we
/// alloca. This will be null if the alloca slices are analyzed successfully.
Instruction *PointerEscapingInstr;
- /// \brief The slices of the alloca.
+ /// The slices of the alloca.
///
/// We store a vector of the slices formed by uses of the alloca here. This
/// vector is sorted by increasing begin offset, and then the unsplittable
/// details.
SmallVector<Slice, 8> Slices;
- /// \brief Instructions which will become dead if we rewrite the alloca.
+ /// Instructions which will become dead if we rewrite the alloca.
///
/// Note that these are not separated by slice. This is because we expect an
/// alloca to be completely rewritten or not rewritten at all. If rewritten,
/// they come from outside of the allocated space.
SmallVector<Instruction *, 8> DeadUsers;
- /// \brief Operands which will become dead if we rewrite the alloca.
+ /// Operands which will become dead if we rewrite the alloca.
///
/// These are operands that in their particular use can be replaced with
/// undef when we rewrite the alloca. These show up in out-of-bounds inputs
SmallVector<Use *, 8> DeadOperands;
};
-/// \brief A partition of the slices.
+/// A partition of the slices.
///
/// An ephemeral representation for a range of slices which can be viewed as
/// a partition of the alloca. This range represents a span of the alloca's
friend class AllocaSlices;
friend class AllocaSlices::partition_iterator;
- typedef AllocaSlices::iterator iterator;
+ using iterator = AllocaSlices::iterator;
- /// \brief The beginning and ending offsets of the alloca for this
+ /// The beginning and ending offsets of the alloca for this
/// partition.
uint64_t BeginOffset, EndOffset;
- /// \brief The start end end iterators of this partition.
+ /// The start and end iterators of this partition.
iterator SI, SJ;
- /// \brief A collection of split slice tails overlapping the partition.
+ /// A collection of split slice tails overlapping the partition.
SmallVector<Slice *, 4> SplitTails;
- /// \brief Raw constructor builds an empty partition starting and ending at
+ /// Raw constructor builds an empty partition starting and ending at
/// the given iterator.
Partition(iterator SI) : SI(SI), SJ(SI) {}
public:
- /// \brief The start offset of this partition.
+ /// The start offset of this partition.
///
/// All of the contained slices start at or after this offset.
uint64_t beginOffset() const { return BeginOffset; }
- /// \brief The end offset of this partition.
+ /// The end offset of this partition.
///
/// All of the contained slices end at or before this offset.
uint64_t endOffset() const { return EndOffset; }
- /// \brief The size of the partition.
+ /// The size of the partition.
///
/// Note that this can never be zero.
uint64_t size() const {
return EndOffset - BeginOffset;
}
- /// \brief Test whether this partition contains no slices, and merely spans
+ /// Test whether this partition contains no slices, and merely spans
/// a region occupied by split slices.
bool empty() const { return SI == SJ; }
iterator end() const { return SJ; }
/// @}
- /// \brief Get the sequence of split slice tails.
+ /// Get the sequence of split slice tails.
///
/// These tails are of slices which start before this partition but are
/// split and overlap into the partition. We accumulate these while forming
ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
};
-/// \brief An iterator over partitions of the alloca's slices.
+/// An iterator over partitions of the alloca's slices.
///
/// This iterator implements the core algorithm for partitioning the alloca's
/// slices. It is a forward iterator as we don't support backtracking for
Partition> {
friend class AllocaSlices;
- /// \brief Most of the state for walking the partitions is held in a class
+ /// Most of the state for walking the partitions is held in a class
/// with a nice interface for examining them.
Partition P;
- /// \brief We need to keep the end of the slices to know when to stop.
+ /// We need to keep the end of the slices to know when to stop.
AllocaSlices::iterator SE;
- /// \brief We also need to keep track of the maximum split end offset seen.
+ /// We also need to keep track of the maximum split end offset seen.
/// FIXME: Do we really?
- uint64_t MaxSplitSliceEndOffset;
+ uint64_t MaxSplitSliceEndOffset = 0;
- /// \brief Sets the partition to be empty at given iterator, and sets the
+ /// Sets the partition to be empty at given iterator, and sets the
/// end iterator.
partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
- : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
+ : P(SI), SE(SE) {
// If not already at the end, advance our state to form the initial
// partition.
if (SI != SE)
advance();
}
- /// \brief Advance the iterator to the next partition.
+ /// Advance the iterator to the next partition.
///
/// Requires that the iterator not be at the end of the slices.
void advance() {
// Remove the uses which have ended in the prior partition. This
// cannot change the max split slice end because we just checked that
// the prior partition ended prior to that max.
- P.SplitTails.erase(
- std::remove_if(
- P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
- P.SplitTails.end());
- assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() == MaxSplitSliceEndOffset;
- }) &&
+ P.SplitTails.erase(llvm::remove_if(P.SplitTails,
+ [&](Slice *S) {
+ return S->endOffset() <=
+ P.EndOffset;
+ }),
+ P.SplitTails.end());
+ assert(llvm::any_of(P.SplitTails,
+ [&](Slice *S) {
+ return S->endOffset() == MaxSplitSliceEndOffset;
+ }) &&
"Could not find the current max split slice offset!");
- assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() <= MaxSplitSliceEndOffset;
- }) &&
+ assert(llvm::all_of(P.SplitTails,
+ [&](Slice *S) {
+ return S->endOffset() <= MaxSplitSliceEndOffset;
+ }) &&
"Max split slice end offset is not actually the max!");
}
}
Partition &operator*() { return P; }
};
-/// \brief A forward range over the partitions of the alloca's slices.
+/// A forward range over the partitions of the alloca's slices.
///
/// This accesses an iterator range over the partitions of the alloca's
/// slices. It computes these partitions on the fly based on the overlapping
return nullptr;
}
-/// \brief A helper that folds a PHI node or a select.
+/// A helper that folds a PHI node or a select.
static Value *foldPHINodeOrSelectInst(Instruction &I) {
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
// If PN merges together the same value, return that value.
return foldSelectInst(cast<SelectInst>(I));
}
-/// \brief Builder for the alloca slices.
+/// Builder for the alloca slices.
///
/// This class builds a set of alloca slices by recursively visiting the uses
/// of an alloca and making a slice for each load and store at each offset.
class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
friend class PtrUseVisitor<SliceBuilder>;
friend class InstVisitor<SliceBuilder>;
- typedef PtrUseVisitor<SliceBuilder> Base;
+
+ using Base = PtrUseVisitor<SliceBuilder>;
const uint64_t AllocSize;
AllocaSlices &AS;
SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes;
- /// \brief Set to de-duplicate dead instructions found in the use walk.
+ /// Set to de-duplicate dead instructions found in the use walk.
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
// Completely skip uses which have a zero size or start either before or
// past the end of the allocation.
if (Size == 0 || Offset.uge(AllocSize)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
- << " which has zero size or starts outside of the "
- << AllocSize << " byte alloca:\n"
- << " alloca: " << AS.AI << "\n"
- << " use: " << I << "\n");
+ LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
+ << Offset
+ << " which has zero size or starts outside of the "
+ << AllocSize << " byte alloca:\n"
+ << " alloca: " << AS.AI << "\n"
+ << " use: " << I << "\n");
return markAsDead(I);
}
// them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
- DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
- << " to remain within the " << AllocSize << " byte alloca:\n"
- << " alloca: " << AS.AI << "\n"
- << " use: " << I << "\n");
+ LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
+ << Offset << " to remain within the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << AS.AI << "\n"
+ << " use: " << I << "\n");
EndOffset = AllocSize;
}
// langref in a very strict sense. If we ever want to enable
// SROAStrictInbounds, this code should be factored cleanly into
// PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
- // by writing out the code here where we have tho underlying allocation
+ // by writing out the code here where we have the underlying allocation
// size readily available.
APInt GEPOffset = Offset;
const DataLayout &DL = GEPI.getModule()->getDataLayout();
break;
// Handle a struct index, which adds its field offset to the pointer.
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = DL.getStructLayout(STy);
GEPOffset +=
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
+ // bounds of the allocation, it's behavior is undefined, so simply
// ignore it. Note that this is more strict than the generic clamping
// behavior of insertUse. We also try to handle cases which might run the
// risk of overflow.
// FIXME: We should instead consider the pointer to have escaped if this
// function is being instrumented for addressing bugs or race conditions.
if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << AS.AI << "\n"
- << " use: " << SI << "\n");
+ LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
+ << Offset << " which extends past the end of the "
+ << AllocSize << " byte alloca:\n"
+ << " alloca: " << AS.AI << "\n"
+ << " use: " << SI << "\n");
return markAsDead(SI);
}
void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
- /// \brief Disable SROA entirely if there are unhandled users of the alloca.
+ /// Disable SROA entirely if there are unhandled users of the alloca.
void visitInstruction(Instruction &I) { PI.setAborted(&I); }
};
return;
}
- Slices.erase(std::remove_if(Slices.begin(), Slices.end(),
- [](const Slice &S) {
- return S.isDead();
- }),
- Slices.end());
+ Slices.erase(
+ llvm::remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
+ Slices.end());
#ifndef NDEBUG
if (SROARandomShuffleSlices) {
- std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
+ std::mt19937 MT(static_cast<unsigned>(
+ std::chrono::system_clock::now().time_since_epoch().count()));
std::shuffle(Slices.begin(), Slices.end(), MT);
}
#endif
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
- std::sort(Slices.begin(), Slices.end());
+ llvm::sort(Slices.begin(), Slices.end());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
if (!HaveLoad)
return false;
+ const DataLayout &DL = PN.getModule()->getDataLayout();
+
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (isSafeToLoadUnconditionally(InVal, MaxAlign, TI))
+ if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI))
continue;
return false;
}
static void speculatePHINodeLoads(PHINode &PN) {
- DEBUG(dbgs() << " original: " << PN << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
IRBuilderTy PHIBuilder(&PN);
}
// Inject loads into all of the pred blocks.
+ DenseMap<BasicBlock*, Value*> InjectedLoads;
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
BasicBlock *Pred = PN.getIncomingBlock(Idx);
- TerminatorInst *TI = Pred->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
+
+ // A PHI node is allowed to have multiple (duplicated) entries for the same
+ // basic block, as long as the value is the same. So if we already injected
+ // a load in the predecessor, then we should reuse the same load for all
+ // duplicated entries.
+ if (Value* V = InjectedLoads.lookup(Pred)) {
+ NewPN->addIncoming(V, Pred);
+ continue;
+ }
+
+ TerminatorInst *TI = Pred->getTerminator();
IRBuilderTy PredBuilder(TI);
LoadInst *Load = PredBuilder.CreateLoad(
if (AATags)
Load->setAAMetadata(AATags);
NewPN->addIncoming(Load, Pred);
+ InjectedLoads[Pred] = Load;
}
- DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
+ LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
PN.eraseFromParent();
}
static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
+ const DataLayout &DL = SI.getModule()->getDataLayout();
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
if (!LI || !LI->isSimple())
return false;
- // Both operands to the select need to be dereferencable, either
+ // Both operands to the select need to be dereferenceable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
- if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), LI))
+ if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
return false;
- if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), LI))
+ if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI))
return false;
}
}
static void speculateSelectInstLoads(SelectInst &SI) {
- DEBUG(dbgs() << " original: " << SI << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
IRBuilderTy IRB(&SI);
Value *TV = SI.getTrueValue();
Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
LI->getName() + ".sroa.speculated");
- DEBUG(dbgs() << " speculated to: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
}
SI.eraseFromParent();
}
-/// \brief Build a GEP out of a base pointer and indices.
+/// Build a GEP out of a base pointer and indices.
///
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
NamePrefix + "sroa_idx");
}
-/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
+/// Get a natural GEP off of the BasePtr walking through Ty toward
/// TargetTy without changing the offset of the pointer.
///
/// This routine assumes we've already established a properly offset GEP with
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
-/// \brief Recursively compute indices for a natural GEP.
+/// Recursively compute indices for a natural GEP.
///
/// This is the recursive step for getNaturalGEPWithOffset that walks down the
/// element types adding appropriate indices for the GEP.
Indices, NamePrefix);
}
-/// \brief Get a natural GEP from a base pointer to a particular offset and
+/// Get a natural GEP from a base pointer to a particular offset and
/// resulting in a particular type.
///
/// The goal is to produce a "natural" looking GEP that works with the existing
Indices, NamePrefix);
}
-/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
+/// Compute an adjusted pointer from Ptr by Offset bytes where the
/// resulting pointer has PointerTy.
///
/// This tries very hard to compute a "natural" GEP which arrives at the offset
if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
- if (GA->mayBeOverridden())
+ if (GA->isInterposable())
break;
Ptr = GA->getAliasee();
} else {
return Ptr;
}
-/// \brief Compute the adjusted alignment for a load or store from an offset.
+/// Compute the adjusted alignment for a load or store from an offset.
static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
const DataLayout &DL) {
unsigned Alignment;
return MinAlign(Alignment, Offset);
}
-/// \brief Test whether we can convert a value from the old to the new type.
+/// Test whether we can convert a value from the old to the new type.
///
/// This predicate should be used to guard calls to convertValue in order to
/// ensure that we only try to convert viable values. The strategy is that we
OldTy = OldTy->getScalarType();
NewTy = NewTy->getScalarType();
if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
- if (NewTy->isPointerTy() && OldTy->isPointerTy())
- return true;
- if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
- return true;
+ if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
+ return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
+ cast<PointerType>(OldTy)->getPointerAddressSpace();
+ }
+
+ // We can convert integers to integral pointers, but not to non-integral
+ // pointers.
+ if (OldTy->isIntegerTy())
+ return !DL.isNonIntegralPointerType(NewTy);
+
+ // We can convert integral pointers to integers, but non-integral pointers
+ // need to remain pointers.
+ if (!DL.isNonIntegralPointerType(OldTy))
+ return NewTy->isIntegerTy();
+
return false;
}
return true;
}
-/// \brief Generic routine to convert an SSA value to a value of a different
+/// Generic routine to convert an SSA value to a value of a different
/// type.
///
/// This will try various different casting techniques, such as bitcasts,
// See if we need inttoptr for this type pair. A cast involving both scalars
// and vectors requires and additional bitcast.
- if (OldTy->getScalarType()->isIntegerTy() &&
- NewTy->getScalarType()->isPointerTy()) {
+ if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
// Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
if (OldTy->isVectorTy() && !NewTy->isVectorTy())
return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
// See if we need ptrtoint for this type pair. A cast involving both scalars
// and vectors requires and additional bitcast.
- if (OldTy->getScalarType()->isPointerTy() &&
- NewTy->getScalarType()->isIntegerTy()) {
+ if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
// Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
if (OldTy->isVectorTy() && !NewTy->isVectorTy())
return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
return IRB.CreateBitCast(V, NewTy);
}
-/// \brief Test whether the given slice use can be promoted to a vector.
+/// Test whether the given slice use can be promoted to a vector.
///
/// This function is called to test each entry in a partition which is slated
/// for a single slice.
return true;
}
-/// \brief Test whether the given alloca partitioning and range of slices can be
+/// Test whether the given alloca partitioning and range of slices can be
/// promoted to a vector.
///
/// This is a quick test to check whether we can rewrite a particular alloca
// do that until all the backends are known to produce good code for all
// integer vector types.
if (!HaveCommonEltTy) {
- CandidateTys.erase(std::remove_if(CandidateTys.begin(), CandidateTys.end(),
- [](VectorType *VTy) {
- return !VTy->getElementType()->isIntegerTy();
- }),
- CandidateTys.end());
+ CandidateTys.erase(
+ llvm::remove_if(CandidateTys,
+ [](VectorType *VTy) {
+ return !VTy->getElementType()->isIntegerTy();
+ }),
+ CandidateTys.end());
// If there were no integer vector types, give up.
if (CandidateTys.empty())
// Rank the remaining candidate vector types. This is easy because we know
// they're all integer vectors. We sort by ascending number of elements.
auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
+ (void)DL;
assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&
"Cannot have vector types of different sizes!");
assert(RHSTy->getElementType()->isIntegerTy() &&
"All non-integer types eliminated!");
return RHSTy->getNumElements() < LHSTy->getNumElements();
};
- std::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
+ llvm::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
CandidateTys.erase(
std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
CandidateTys.end());
return nullptr;
}
-/// \brief Test whether a slice of an alloca is valid for integer widening.
+/// Test whether a slice of an alloca is valid for integer widening.
///
/// This implements the necessary checking for the \c isIntegerWideningViable
/// test below on a single slice of the alloca.
// We can't handle loads that extend past the allocated memory.
if (DL.getTypeStoreSize(LI->getType()) > Size)
return false;
+ // So far, AllocaSliceRewriter does not support widening split slice tails
+ // in rewriteIntegerLoad.
+ if (S.beginOffset() < AllocBeginOffset)
+ return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
// We can't handle stores that extend past the allocated memory.
if (DL.getTypeStoreSize(ValueTy) > Size)
return false;
+ // So far, AllocaSliceRewriter does not support widening split slice tails
+ // in rewriteIntegerStore.
+ if (S.beginOffset() < AllocBeginOffset)
+ return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
return true;
}
-/// \brief Test whether the given alloca partition's integer operations can be
+/// Test whether the given alloca partition's integer operations can be
/// widened to promotable ones.
///
/// This is a quick test to check whether we can rewrite the integer loads and
static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
- DEBUG(dbgs() << " start: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
IntegerType *IntTy = cast<IntegerType>(V->getType());
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element extends past full value");
ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
if (ShAmt) {
V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
- DEBUG(dbgs() << " shifted: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
}
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot extract to a larger integer!");
if (Ty != IntTy) {
V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
- DEBUG(dbgs() << " trunced: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
}
return V;
}
IntegerType *Ty = cast<IntegerType>(V->getType());
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot insert a larger integer!");
- DEBUG(dbgs() << " start: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
if (Ty != IntTy) {
V = IRB.CreateZExt(V, IntTy, Name + ".ext");
- DEBUG(dbgs() << " extended: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
}
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element store outside of alloca store");
ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
if (ShAmt) {
V = IRB.CreateShl(V, ShAmt, Name + ".shift");
- DEBUG(dbgs() << " shifted: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
}
if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
- DEBUG(dbgs() << " masked: " << *Old << "\n");
+ LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
V = IRB.CreateOr(Old, V, Name + ".insert");
- DEBUG(dbgs() << " inserted: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
}
return V;
}
if (NumElements == 1) {
V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
Name + ".extract");
- DEBUG(dbgs() << " extract: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
return V;
}
Mask.push_back(IRB.getInt32(i));
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask), Name + ".extract");
- DEBUG(dbgs() << " shuffle: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V;
}
// Single element to insert.
V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
Name + ".insert");
- DEBUG(dbgs() << " insert: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
return V;
}
Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask), Name + ".expand");
- DEBUG(dbgs() << " shuffle: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
Mask.clear();
for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
- DEBUG(dbgs() << " blend: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
}
-/// \brief Visitor to rewrite instructions using p particular slice of an alloca
+/// Visitor to rewrite instructions using p particular slice of an alloca
/// to use a new alloca.
///
/// Also implements the rewriting to vector-based accesses when the partition
class llvm::sroa::AllocaSliceRewriter
: public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
- friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
- typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
+ friend class InstVisitor<AllocaSliceRewriter, bool>;
+
+ using Base = InstVisitor<AllocaSliceRewriter, bool>;
const DataLayout &DL;
AllocaSlices &AS;
// The original offset of the slice currently being rewritten relative to
// the original alloca.
- uint64_t BeginOffset, EndOffset;
+ uint64_t BeginOffset = 0;
+ uint64_t EndOffset = 0;
+
// The new offsets of the slice currently being rewritten relative to the
// original alloca.
uint64_t NewBeginOffset, NewEndOffset;
uint64_t SliceSize;
- bool IsSplittable;
- bool IsSplit;
- Use *OldUse;
- Instruction *OldPtr;
+ bool IsSplittable = false;
+ bool IsSplit = false;
+ Use *OldUse = nullptr;
+ Instruction *OldPtr = nullptr;
// Track post-rewrite users which are PHI nodes and Selects.
- SmallPtrSetImpl<PHINode *> &PHIUsers;
- SmallPtrSetImpl<SelectInst *> &SelectUsers;
+ SmallSetVector<PHINode *, 8> &PHIUsers;
+ SmallSetVector<SelectInst *, 8> &SelectUsers;
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
VectorType *PromotableVecTy,
- SmallPtrSetImpl<PHINode *> &PHIUsers,
- SmallPtrSetImpl<SelectInst *> &SelectUsers)
+ SmallSetVector<PHINode *, 8> &PHIUsers,
+ SmallSetVector<SelectInst *, 8> &SelectUsers)
: DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
VecTy(PromotableVecTy),
ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
- BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
- OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
+ PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
if (VecTy) {
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
IsSplittable = I->isSplittable();
IsSplit =
BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
- DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
- DEBUG(AS.printSlice(dbgs(), I, ""));
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
+ LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
+ LLVM_DEBUG(dbgs() << "\n");
// Compute the intersecting offset range.
assert(BeginOffset < NewAllocaEndOffset);
// Every instruction which can end up as a user must have a rewrite rule.
bool visitInstruction(Instruction &I) {
- DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
+ LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
llvm_unreachable("No rewrite rule for this instruction!");
}
#endif
return getAdjustedPtr(IRB, DL, &NewAI,
- APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
+ APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset),
+ PointerTy,
#ifndef NDEBUG
Twine(OldName) + "."
#else
);
}
- /// \brief Compute suitable alignment to access this slice of the *new*
+ /// Compute suitable alignment to access this slice of the *new*
/// alloca.
///
/// You can optionally pass a type to this routine and if that type's ABI
}
bool visitLoadInst(LoadInst &LI) {
- DEBUG(dbgs() << " original: " << LI << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
+ AAMDNodes AATags;
+ LI.getAAMetadata(AATags);
+
+ unsigned AS = LI.getPointerAddressSpace();
+
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
TargetTy->isIntegerTy()))) {
LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
LI.isVolatile(), LI.getName());
+ if (AATags)
+ NewLI->setAAMetadata(AATags);
if (LI.isVolatile())
- NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
+
+ // Any !nonnull metadata or !range metadata on the old load is also valid
+ // on the new load. This is even true in some cases even when the loads
+ // are different types, for example by mapping !nonnull metadata to
+ // !range metadata by modeling the null pointer constant converted to the
+ // integer type.
+ // FIXME: Add support for range metadata here. Currently the utilities
+ // for this don't propagate range metadata in trivial cases from one
+ // integer load to another, don't handle non-addrspace-0 null pointers
+ // correctly, and don't have any support for mapping ranges as the
+ // integer type becomes winder or narrower.
+ if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
+ copyNonnullMetadata(LI, N, *NewLI);
+
+ // Try to preserve nonnull metadata
V = NewLI;
// If this is an integer load past the end of the slice (which means the
"endian_shift");
}
} else {
- Type *LTy = TargetTy->getPointerTo();
+ Type *LTy = TargetTy->getPointerTo(AS);
LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(TargetTy),
LI.isVolatile(), LI.getName());
+ if (AATags)
+ NewLI->setAAMetadata(AATags);
if (LI.isVolatile())
- NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
V = NewLI;
IsPtrAdjusted = true;
// the computed value, and then replace the placeholder with LI, leaving
// LI only used for this computation.
Value *Placeholder =
- new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
+ new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS)));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
"insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
- delete Placeholder;
+ Placeholder->deleteValue();
} else {
LI.replaceAllUsesWith(V);
}
Pass.DeadInsts.insert(&LI);
deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) {
+ bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
+ AAMDNodes AATags) {
if (V->getType() != VecTy) {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
V = insertVector(IRB, Old, V, BeginIndex, "vec");
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
+ if (AATags)
+ Store->setAAMetadata(AATags);
Pass.DeadInsts.insert(&SI);
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
- bool rewriteIntegerStore(Value *V, StoreInst &SI) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
}
V = convertValue(DL, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
+ Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+ if (AATags)
+ Store->setAAMetadata(AATags);
Pass.DeadInsts.insert(&SI);
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
bool visitStoreInst(StoreInst &SI) {
- DEBUG(dbgs() << " original: " << SI << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
+ AAMDNodes AATags;
+ SI.getAAMetadata(AATags);
+
Value *V = SI.getValueOperand();
// Strip all inbounds GEPs and pointer casts to try to dig out any root
}
if (VecTy)
- return rewriteVectorizedStoreInst(V, SI, OldOp);
+ return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(V, SI);
+ return rewriteIntegerStore(V, SI, AATags);
const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
StoreInst *NewSI;
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
} else {
- Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
+ unsigned AS = SI.getPointerAddressSpace();
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
+ NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+ if (AATags)
+ NewSI->setAAMetadata(AATags);
if (SI.isVolatile())
- NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
+ NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
Pass.DeadInsts.insert(&SI);
deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
}
- /// \brief Compute an integer value from splatting an i8 across the given
+ /// Compute an integer value from splatting an i8 across the given
/// number of bytes.
///
/// Note that this routine assumes an i8 is a byte. If that isn't true, don't
return V;
}
- /// \brief Compute a vector splat for a given element value.
+ /// Compute a vector splat for a given element value.
Value *getVectorSplat(Value *V, unsigned NumElements) {
V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
- DEBUG(dbgs() << " splat: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
return V;
}
bool visitMemSetInst(MemSetInst &II) {
- DEBUG(dbgs() << " original: " << II << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getRawDest() == OldPtr);
+ AAMDNodes AATags;
+ II.getAAMetadata(AATags);
+
// If the memset has a variable size, it cannot be split, just adjust the
// pointer to the new alloca.
if (!isa<Constant>(II.getLength())) {
assert(!IsSplit);
assert(NewBeginOffset == BeginOffset);
II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
- Type *CstTy = II.getAlignmentCst()->getType();
- II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
+ II.setDestAlignment(getSliceAlign());
deleteIfTriviallyDead(OldPtr);
return false;
CallInst *New = IRB.CreateMemSet(
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
getSliceAlign(), II.isVolatile());
- (void)New;
- DEBUG(dbgs() << " to: " << *New << "\n");
+ if (AATags)
+ New->setAAMetadata(AATags);
+ LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
V = convertValue(DL, IRB, V, AllocaTy);
}
- Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
- II.isVolatile());
- (void)New;
- DEBUG(dbgs() << " to: " << *New << "\n");
+ StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ II.isVolatile());
+ if (AATags)
+ New->setAAMetadata(AATags);
+ LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
}
// Rewriting of memory transfer instructions can be a bit tricky. We break
// them into two categories: split intrinsics and unsplit intrinsics.
- DEBUG(dbgs() << " original: " << II << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << II << "\n");
+
+ AAMDNodes AATags;
+ II.getAAMetadata(AATags);
bool IsDest = &II.getRawDestUse() == OldUse;
assert((IsDest && II.getRawDest() == OldPtr) ||
// update both source and dest of a single call.
if (!IsSplittable) {
Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
- if (IsDest)
+ if (IsDest) {
II.setDest(AdjustedPtr);
- else
+ II.setDestAlignment(SliceAlign);
+ }
+ else {
II.setSource(AdjustedPtr);
-
- if (II.getAlignment() > SliceAlign) {
- Type *CstTy = II.getAlignmentCst()->getType();
- II.setAlignment(
- ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign)));
+ II.setSourceAlignment(SliceAlign);
}
- DEBUG(dbgs() << " to: " << II << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << II << "\n");
deleteIfTriviallyDead(OldPtr);
return false;
}
// Compute the relative offset for the other pointer within the transfer.
unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
- unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
- OtherOffset.zextOrTrunc(64).getZExtValue());
+ unsigned OtherAlign =
+ IsDest ? II.getSourceAlignment() : II.getDestAlignment();
+ OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1,
+ OtherOffset.zextOrTrunc(64).getZExtValue());
if (EmitMemCpy) {
// Compute the other pointer, folding as much as possible to produce
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
- CallInst *New = IRB.CreateMemCpy(
- IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
- MinAlign(SliceAlign, OtherAlign), II.isVolatile());
- (void)New;
- DEBUG(dbgs() << " to: " << *New << "\n");
+ Value *DestPtr, *SrcPtr;
+ unsigned DestAlign, SrcAlign;
+ // Note: IsDest is true iff we're copying into the new alloca slice
+ if (IsDest) {
+ DestPtr = OurPtr;
+ DestAlign = SliceAlign;
+ SrcPtr = OtherPtr;
+ SrcAlign = OtherAlign;
+ } else {
+ DestPtr = OtherPtr;
+ DestAlign = OtherAlign;
+ SrcPtr = OurPtr;
+ SrcAlign = SliceAlign;
+ }
+ CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
+ Size, II.isVolatile());
+ if (AATags)
+ New->setAAMetadata(AATags);
+ LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
- Src =
- IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(), "copyload");
+ LoadInst *Load = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(),
+ "copyload");
+ if (AATags)
+ Load->setAAMetadata(AATags);
+ Src = Load;
}
if (VecTy && !IsWholeAlloca && IsDest) {
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
+ if (AATags)
+ Store->setAAMetadata(AATags);
+ LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
}
bool visitIntrinsicInst(IntrinsicInst &II) {
assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end);
- DEBUG(dbgs() << " original: " << II << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
Pass.DeadInsts.insert(&II);
+ // Lifetime intrinsics are only promotable if they cover the whole alloca.
+ // Therefore, we drop lifetime intrinsics which don't cover the whole
+ // alloca.
+ // (In theory, intrinsics which partially cover an alloca could be
+ // promoted, but PromoteMemToReg doesn't handle that case.)
+ // FIXME: Check whether the alloca is promotable before dropping the
+ // lifetime intrinsics?
+ if (NewBeginOffset != NewAllocaBeginOffset ||
+ NewEndOffset != NewAllocaEndOffset)
+ return true;
+
ConstantInt *Size =
ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
New = IRB.CreateLifetimeEnd(Ptr, Size);
(void)New;
- DEBUG(dbgs() << " to: " << *New << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
+
return true;
}
+ void fixLoadStoreAlign(Instruction &Root) {
+ // This algorithm implements the same visitor loop as
+ // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
+ // or store found.
+ SmallPtrSet<Instruction *, 4> Visited;
+ SmallVector<Instruction *, 4> Uses;
+ Visited.insert(&Root);
+ Uses.push_back(&Root);
+ do {
+ Instruction *I = Uses.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ unsigned LoadAlign = LI->getAlignment();
+ if (!LoadAlign)
+ LoadAlign = DL.getABITypeAlignment(LI->getType());
+ LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ unsigned StoreAlign = SI->getAlignment();
+ if (!StoreAlign) {
+ Value *Op = SI->getOperand(0);
+ StoreAlign = DL.getABITypeAlignment(Op->getType());
+ }
+ SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
+ continue;
+ }
+
+ assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+ for (User *U : I->users())
+ if (Visited.insert(cast<Instruction>(U)).second)
+ Uses.push_back(cast<Instruction>(U));
+ } while (!Uses.empty());
+ }
+
bool visitPHINode(PHINode &PN) {
- DEBUG(dbgs() << " original: " << PN << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
// Replace the operands which were using the old pointer.
std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
- DEBUG(dbgs() << " to: " << PN << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this PHI node.
+ fixLoadStoreAlign(PN);
+
// PHIs can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
}
bool visitSelectInst(SelectInst &SI) {
- DEBUG(dbgs() << " original: " << SI << "\n");
+ LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
"Pointer isn't an operand!");
assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
if (SI.getOperand(2) == OldPtr)
SI.setOperand(2, NewPtr);
- DEBUG(dbgs() << " to: " << SI << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this select.
+ fixLoadStoreAlign(SI);
+
// Selects can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
};
namespace {
-/// \brief Visitor to rewrite aggregate loads and stores as scalar.
+
+/// Visitor to rewrite aggregate loads and stores as scalar.
///
/// This pass aggressively rewrites all aggregate loads and stores on
/// a particular pointer (or any pointer derived from it which we can identify)
/// with scalar loads and stores.
class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
- friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
+ friend class InstVisitor<AggLoadStoreRewriter, bool>;
/// Queue of pointer uses to analyze and potentially rewrite.
SmallVector<Use *, 8> Queue;
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
bool rewrite(Instruction &I) {
- DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
+ LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
enqueueUsers(I);
bool Changed = false;
while (!Queue.empty()) {
// Conservative default is to not rewrite anything.
bool visitInstruction(Instruction &I) { return false; }
- /// \brief Generic recursive split emission class.
+ /// Generic recursive split emission class.
template <typename Derived> class OpSplitter {
protected:
/// The builder used to form new instructions.
IRBuilderTy IRB;
+
/// The indices which to be used with insert- or extractvalue to select the
/// appropriate value within the aggregate.
SmallVector<unsigned, 4> Indices;
+
/// The indices to a GEP instruction which will move Ptr to the correct slot
/// within the aggregate.
SmallVector<Value *, 4> GEPIndices;
+
/// The base pointer of the original op, used as a base for GEPing the
/// split operations.
Value *Ptr;
: IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
public:
- /// \brief Generic recursive split emission routine.
+ /// Generic recursive split emission routine.
///
/// This method recursively splits an aggregate op (load or store) into
/// scalar or vector ops. It splits recursively until it hits a single value
};
struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
- LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr)
- : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {}
+ AAMDNodes AATags;
+
+ LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
+ : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
/// Emit a leaf load of a single value. This is called at the leaves of the
/// recursive emission to actually load values.
// Load the single value and insert it using the indices.
Value *GEP =
IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
- Value *Load = IRB.CreateLoad(GEP, Name + ".load");
+ LoadInst *Load = IRB.CreateLoad(GEP, Name + ".load");
+ if (AATags)
+ Load->setAAMetadata(AATags);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
- DEBUG(dbgs() << " to: " << *Load << "\n");
+ LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
}
};
return false;
// We have an aggregate being loaded, split it apart.
- DEBUG(dbgs() << " original: " << LI << "\n");
- LoadOpSplitter Splitter(&LI, *U);
+ LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
+ AAMDNodes AATags;
+ LI.getAAMetadata(AATags);
+ LoadOpSplitter Splitter(&LI, *U, AATags);
Value *V = UndefValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
LI.replaceAllUsesWith(V);
}
struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
- StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr)
- : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {}
+ StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
+ : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
+ AAMDNodes AATags;
/// Emit a leaf store of a single value. This is called at the leaves of the
/// recursive emission to actually produce stores.
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
// Extract the single value and store it using the indices.
- Value *Store = IRB.CreateStore(
- IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
- IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
+ //
+ // The gep and extractvalue values are factored out of the CreateStore
+ // call to make the output independent of the argument evaluation order.
+ Value *ExtractValue =
+ IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
+ Value *InBoundsGEP =
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
+ StoreInst *Store = IRB.CreateStore(ExtractValue, InBoundsGEP);
+ if (AATags)
+ Store->setAAMetadata(AATags);
+ LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
}
};
return false;
// We have an aggregate being stored, split it apart.
- DEBUG(dbgs() << " original: " << SI << "\n");
- StoreOpSplitter Splitter(&SI, *U);
+ LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
+ AAMDNodes AATags;
+ SI.getAAMetadata(AATags);
+ StoreOpSplitter Splitter(&SI, *U, AATags);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
SI.eraseFromParent();
return true;
return false;
}
};
-}
-/// \brief Strip aggregate type wrapping.
+} // end anonymous namespace
+
+/// Strip aggregate type wrapping.
///
/// This removes no-op aggregate types wrapping an underlying type. It will
/// strip as many layers of types as it can without changing either the type
return stripAggregateTypeWrapping(DL, InnerTy);
}
-/// \brief Try to find a partition of the aggregate type passed in for a given
+/// Try to find a partition of the aggregate type passed in for a given
/// offset and size.
///
/// This recurses through the aggregate type and tries to compute a subtype
return nullptr;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
- // We can't partition pointers...
- if (SeqTy->isPointerTy())
- return nullptr;
-
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
- if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
- if (NumSkippedElements >= ArrTy->getNumElements())
- return nullptr;
- } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
- if (NumSkippedElements >= VecTy->getNumElements())
- return nullptr;
- }
+ if (NumSkippedElements >= SeqTy->getNumElements())
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
// First check if we need to recurse.
return SubTy;
}
-/// \brief Pre-split loads and stores to simplify rewriting.
+/// Pre-split loads and stores to simplify rewriting.
///
/// We want to break up the splittable load+store pairs as much as
/// possible. This is important to do as a preprocessing step, as once we
///
/// \returns true if any changes are made.
bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
- DEBUG(dbgs() << "Pre-splitting loads and stores\n");
+ LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
// Track the loads and stores which are candidates for pre-splitting here, in
// the order they first appear during the partition scan. These give stable
// maybe it would make it more principled?
SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
- DEBUG(dbgs() << " Searching for candidate loads and stores\n");
+ LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
for (auto &P : AS.partitions()) {
for (Slice &S : P) {
Instruction *I = cast<Instruction>(S.getUse()->getUser());
}
// Record the initial split.
- DEBUG(dbgs() << " Candidate: " << *I << "\n");
+ LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
auto &Offsets = SplitOffsetsMap[I];
assert(Offsets.Splits.empty() &&
"Should not have splits the first time we see an instruction!");
// match relative to their starting offset. We have to verify this prior to
// any rewriting.
Stores.erase(
- std::remove_if(Stores.begin(), Stores.end(),
- [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
- // Lookup the load we are storing in our map of split
- // offsets.
- auto *LI = cast<LoadInst>(SI->getValueOperand());
- // If it was completely unsplittable, then we're done,
- // and this store can't be pre-split.
- if (UnsplittableLoads.count(LI))
- return true;
-
- auto LoadOffsetsI = SplitOffsetsMap.find(LI);
- if (LoadOffsetsI == SplitOffsetsMap.end())
- return false; // Unrelated loads are definitely safe.
- auto &LoadOffsets = LoadOffsetsI->second;
-
- // Now lookup the store's offsets.
- auto &StoreOffsets = SplitOffsetsMap[SI];
-
- // If the relative offsets of each split in the load and
- // store match exactly, then we can split them and we
- // don't need to remove them here.
- if (LoadOffsets.Splits == StoreOffsets.Splits)
- return false;
-
- DEBUG(dbgs()
- << " Mismatched splits for load and store:\n"
- << " " << *LI << "\n"
- << " " << *SI << "\n");
-
- // We've found a store and load that we need to split
- // with mismatched relative splits. Just give up on them
- // and remove both instructions from our list of
- // candidates.
- UnsplittableLoads.insert(LI);
- return true;
- }),
+ llvm::remove_if(Stores,
+ [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
+ // Lookup the load we are storing in our map of split
+ // offsets.
+ auto *LI = cast<LoadInst>(SI->getValueOperand());
+ // If it was completely unsplittable, then we're done,
+ // and this store can't be pre-split.
+ if (UnsplittableLoads.count(LI))
+ return true;
+
+ auto LoadOffsetsI = SplitOffsetsMap.find(LI);
+ if (LoadOffsetsI == SplitOffsetsMap.end())
+ return false; // Unrelated loads are definitely safe.
+ auto &LoadOffsets = LoadOffsetsI->second;
+
+ // Now lookup the store's offsets.
+ auto &StoreOffsets = SplitOffsetsMap[SI];
+
+ // If the relative offsets of each split in the load and
+ // store match exactly, then we can split them and we
+ // don't need to remove them here.
+ if (LoadOffsets.Splits == StoreOffsets.Splits)
+ return false;
+
+ LLVM_DEBUG(
+ dbgs()
+ << " Mismatched splits for load and store:\n"
+ << " " << *LI << "\n"
+ << " " << *SI << "\n");
+
+ // We've found a store and load that we need to split
+ // with mismatched relative splits. Just give up on them
+ // and remove both instructions from our list of
+ // candidates.
+ UnsplittableLoads.insert(LI);
+ return true;
+ }),
Stores.end());
// Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
- Stores.erase(std::remove_if(Stores.begin(), Stores.end(),
- [&UnsplittableLoads](StoreInst *SI) {
- auto *LI =
- cast<LoadInst>(SI->getValueOperand());
- return UnsplittableLoads.count(LI);
- }),
+ Stores.erase(llvm::remove_if(Stores,
+ [&UnsplittableLoads](StoreInst *SI) {
+ auto *LI =
+ cast<LoadInst>(SI->getValueOperand());
+ return UnsplittableLoads.count(LI);
+ }),
Stores.end());
// Once we've established all the loads that can't be split for some reason,
// filter any that made it into our list out.
- Loads.erase(std::remove_if(Loads.begin(), Loads.end(),
- [&UnsplittableLoads](LoadInst *LI) {
- return UnsplittableLoads.count(LI);
- }),
+ Loads.erase(llvm::remove_if(Loads,
+ [&UnsplittableLoads](LoadInst *LI) {
+ return UnsplittableLoads.count(LI);
+ }),
Loads.end());
-
// If no loads or stores are left, there is no pre-splitting to be done for
// this alloca.
if (Loads.empty() && Stores.empty())
Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
IRB.SetInsertPoint(LI);
- DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
+ LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
+ auto AS = LI->getPointerAddressSpace();
+ auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, BasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
+ APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
+ PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
// Append this load onto the list of split loads so we can find it later
// to rewrite the stores.
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
/*IsSplittable*/ false));
- DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
- << ", " << NewSlices.back().endOffset() << "): " << *PLoad
- << "\n");
+ LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
+ << ", " << NewSlices.back().endOffset()
+ << "): " << *PLoad << "\n");
// See if we've handled all the splits.
if (Idx >= Size)
StoreInst *SI = cast<StoreInst>(LU);
if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
DeferredStores = true;
- DEBUG(dbgs() << " Deferred splitting of store: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
+ << "\n");
continue;
}
Value *StoreBasePtr = SI->getPointerOperand();
IRB.SetInsertPoint(SI);
- DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
LoadInst *PLoad = SplitLoads[Idx];
auto *PartPtrTy =
PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
+ auto AS = SI->getPointerAddressSpace();
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, StoreBasePtr->getName() + "."),
+ PLoad,
+ getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getIndexSizeInBits(AS), PartOffset),
+ PartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
- (void)PStore;
- DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
+ PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+ LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
// We want to immediately iterate on any allocas impacted by splitting
Value *LoadBasePtr = LI->getPointerOperand();
Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
- DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
// Check whether we have an already split load.
auto SplitLoadsMapI = SplitLoadsMap.find(LI);
assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
"Too few split loads for the number of splits in the store!");
} else {
- DEBUG(dbgs() << " of load: " << *LI << "\n");
+ LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
}
uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
+ auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
+ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
// Either lookup a split load or create one.
LoadInst *PLoad;
PLoad = (*SplitLoads)[Idx];
} else {
IRB.SetInsertPoint(LI);
+ auto AS = LI->getPointerAddressSpace();
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, LoadBasePtr->getName() + "."),
+ APInt(DL.getIndexSizeInBits(AS), PartOffset),
+ LoadPartPtrTy, LoadBasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
}
// And store this partition.
IRB.SetInsertPoint(SI);
+ auto AS = SI->getPointerAddressSpace();
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, StoreBasePtr->getName() + "."),
+ PLoad,
+ getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getIndexSizeInBits(AS), PartOffset),
+ StorePartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
&PStore->getOperandUse(PStore->getPointerOperandIndex()),
/*IsSplittable*/ false));
- DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
- << ", " << NewSlices.back().endOffset() << "): " << *PStore
- << "\n");
+ LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
+ << ", " << NewSlices.back().endOffset()
+ << "): " << *PStore << "\n");
if (!SplitLoads) {
- DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
+ LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
}
// See if we've finished all the splits.
}
// Remove the killed slices that have ben pre-split.
- AS.erase(std::remove_if(AS.begin(), AS.end(), [](const Slice &S) {
- return S.isDead();
- }), AS.end());
+ AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }),
+ AS.end());
// Insert our new slices. This will sort and merge them into the sorted
// sequence.
AS.insert(NewSlices);
- DEBUG(dbgs() << " Pre-split slices:\n");
+ LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
#ifndef NDEBUG
for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
- DEBUG(AS.print(dbgs(), I, " "));
+ LLVM_DEBUG(AS.print(dbgs(), I, " "));
#endif
// Finally, don't try to promote any allocas that new require re-splitting.
// They have already been added to the worklist above.
PromotableAllocas.erase(
- std::remove_if(
- PromotableAllocas.begin(), PromotableAllocas.end(),
+ llvm::remove_if(
+ PromotableAllocas,
[&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
PromotableAllocas.end());
return true;
}
-/// \brief Rewrite an alloca partition's users.
+/// Rewrite an alloca partition's users.
///
/// This routine drives both of the rewriting goals of the SROA pass. It tries
/// to rewrite uses of an alloca partition to be conducive for SSA value
// exact same type as the original, and with the same access offsets. In that
// case, re-use the existing alloca, but still run through the rewriter to
// perform phi and select speculation.
+ // P.beginOffset() can be non-zero even with the same type in a case with
+ // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
AllocaInst *NewAI;
- if (SliceTy == AI.getAllocatedType()) {
- assert(P.beginOffset() == 0 &&
- "Non-zero begin offset but same alloca type");
+ if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
NewAI = &AI;
// FIXME: We should be able to bail at this point with "nothing changed".
// FIXME: We might want to defer PHI speculation until after here.
if (Alignment <= DL.getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI = new AllocaInst(
- SliceTy, nullptr, Alignment,
+ SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment,
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
+ // Copy the old AI debug location over to the new one.
+ NewAI->setDebugLoc(AI.getDebugLoc());
++NumNewAllocas;
}
- DEBUG(dbgs() << "Rewriting alloca partition "
- << "[" << P.beginOffset() << "," << P.endOffset()
- << ") to: " << *NewAI << "\n");
+ LLVM_DEBUG(dbgs() << "Rewriting alloca partition "
+ << "[" << P.beginOffset() << "," << P.endOffset()
+ << ") to: " << *NewAI << "\n");
// Track the high watermark on the worklist as it is only relevant for
// promoted allocas. We will reset it to this point if the alloca is not in
// fact scheduled for promotion.
unsigned PPWOldSize = PostPromotionWorklist.size();
unsigned NumUses = 0;
- SmallPtrSet<PHINode *, 8> PHIUsers;
- SmallPtrSet<SelectInst *, 8> SelectUsers;
+ SmallSetVector<PHINode *, 8> PHIUsers;
+ SmallSetVector<SelectInst *, 8> SelectUsers;
AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
}
NumAllocaPartitionUses += NumUses;
- MaxUsesPerAllocaPartition =
- std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
+ MaxUsesPerAllocaPartition.updateMax(NumUses);
// Now that we've processed all the slices in the new partition, check if any
// PHIs or Selects would block promotion.
- for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
- E = PHIUsers.end();
- I != E; ++I)
- if (!isSafePHIToSpeculate(**I)) {
+ for (PHINode *PHI : PHIUsers)
+ if (!isSafePHIToSpeculate(*PHI)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
break;
}
- for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
- E = SelectUsers.end();
- I != E; ++I)
- if (!isSafeSelectToSpeculate(**I)) {
+
+ for (SelectInst *Sel : SelectUsers)
+ if (!isSafeSelectToSpeculate(*Sel)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
Worklist.insert(NewAI);
}
} else {
- // If we can't promote the alloca, iterate on it to check for new
- // refinements exposed by splitting the current alloca. Don't iterate on an
- // alloca which didn't actually change and didn't get promoted.
- if (NewAI != &AI)
- Worklist.insert(NewAI);
-
// Drop any post-promotion work items if promotion didn't happen.
while (PostPromotionWorklist.size() > PPWOldSize)
PostPromotionWorklist.pop_back();
+
+ // We couldn't promote and we didn't create a new partition, nothing
+ // happened.
+ if (NewAI == &AI)
+ return nullptr;
+
+ // If we can't promote the alloca, iterate on it to check for new
+ // refinements exposed by splitting the current alloca. Don't iterate on an
+ // alloca which didn't actually change and didn't get promoted.
+ Worklist.insert(NewAI);
}
return NewAI;
}
-/// \brief Walks the slices of an alloca and form partitions based on them,
+/// Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (AS.begin() == AS.end())
// First try to pre-split loads and stores.
Changed |= presplitLoadsAndStores(AI, AS);
- // Now that we have identified any pre-splitting opportunities, mark any
- // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail
- // to split these during pre-splitting, we want to force them to be
- // rewritten into a partition.
+ // Now that we have identified any pre-splitting opportunities,
+ // mark loads and stores unsplittable except for the following case.
+ // We leave a slice splittable if all other slices are disjoint or fully
+ // included in the slice, such as whole-alloca loads and stores.
+ // If we fail to split these during pre-splitting, we want to force them
+ // to be rewritten into a partition.
bool IsSorted = true;
- for (Slice &S : AS) {
- if (!S.isSplittable())
- continue;
- // FIXME: We currently leave whole-alloca splittable loads and stores. This
- // used to be the only splittable loads and stores and we need to be
- // confident that the above handling of splittable loads and stores is
- // completely sufficient before we forcibly disable the remaining handling.
- if (S.beginOffset() == 0 &&
- S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
- continue;
- if (isa<LoadInst>(S.getUse()->getUser()) ||
- isa<StoreInst>(S.getUse()->getUser())) {
- S.makeUnsplittable();
- IsSorted = false;
+
+ uint64_t AllocaSize = DL.getTypeAllocSize(AI.getAllocatedType());
+ const uint64_t MaxBitVectorSize = 1024;
+ if (AllocaSize <= MaxBitVectorSize) {
+ // If a byte boundary is included in any load or store, a slice starting or
+ // ending at the boundary is not splittable.
+ SmallBitVector SplittableOffset(AllocaSize + 1, true);
+ for (Slice &S : AS)
+ for (unsigned O = S.beginOffset() + 1;
+ O < S.endOffset() && O < AllocaSize; O++)
+ SplittableOffset.reset(O);
+
+ for (Slice &S : AS) {
+ if (!S.isSplittable())
+ continue;
+
+ if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
+ (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
+ continue;
+
+ if (isa<LoadInst>(S.getUse()->getUser()) ||
+ isa<StoreInst>(S.getUse()->getUser())) {
+ S.makeUnsplittable();
+ IsSorted = false;
+ }
+ }
+ }
+ else {
+ // We only allow whole-alloca splittable loads and stores
+ // for a large alloca to avoid creating too large BitVector.
+ for (Slice &S : AS) {
+ if (!S.isSplittable())
+ continue;
+
+ if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
+ continue;
+
+ if (isa<LoadInst>(S.getUse()->getUser()) ||
+ isa<StoreInst>(S.getUse()->getUser())) {
+ S.makeUnsplittable();
+ IsSorted = false;
+ }
}
}
+
if (!IsSorted)
- std::sort(AS.begin(), AS.end());
+ llvm::sort(AS.begin(), AS.end());
- /// \brief Describes the allocas introduced by rewritePartition
- /// in order to migrate the debug info.
- struct Piece {
+ /// Describes the allocas introduced by rewritePartition in order to migrate
+ /// the debug info.
+ struct Fragment {
AllocaInst *Alloca;
uint64_t Offset;
uint64_t Size;
- Piece(AllocaInst *AI, uint64_t O, uint64_t S)
+ Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
: Alloca(AI), Offset(O), Size(S) {}
};
- SmallVector<Piece, 4> Pieces;
+ SmallVector<Fragment, 4> Fragments;
// Rewrite each partition.
for (auto &P : AS.partitions()) {
uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
// Don't include any padding.
uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
- Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size));
+ Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
}
}
++NumPartitions;
}
NumAllocaPartitions += NumPartitions;
- MaxPartitionsPerAlloca =
- std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
+ MaxPartitionsPerAlloca.updateMax(NumPartitions);
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
- if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
- auto *Var = DbgDecl->getVariable();
- auto *Expr = DbgDecl->getExpression();
+ TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
+ if (!DbgDeclares.empty()) {
+ auto *Var = DbgDeclares.front()->getVariable();
+ auto *Expr = DbgDeclares.front()->getExpression();
+ auto VarSize = Var->getSizeInBits();
DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType());
- for (auto Piece : Pieces) {
- // Create a piece expression describing the new partition or reuse AI's
+ for (auto Fragment : Fragments) {
+ // Create a fragment expression describing the new partition or reuse AI's
// expression if there is only one partition.
- auto *PieceExpr = Expr;
- if (Piece.Size < AllocaSize || Expr->isBitPiece()) {
+ auto *FragmentExpr = Expr;
+ if (Fragment.Size < AllocaSize || Expr->isFragment()) {
// If this alloca is already a scalar replacement of a larger aggregate,
- // Piece.Offset describes the offset inside the scalar.
- uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
- uint64_t Start = Offset + Piece.Offset;
- uint64_t Size = Piece.Size;
- if (Expr->isBitPiece()) {
- uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
+ // Fragment.Offset describes the offset inside the scalar.
+ auto ExprFragment = Expr->getFragmentInfo();
+ uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
+ uint64_t Start = Offset + Fragment.Offset;
+ uint64_t Size = Fragment.Size;
+ if (ExprFragment) {
+ uint64_t AbsEnd =
+ ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
if (Start >= AbsEnd)
// No need to describe a SROAed padding.
continue;
Size = std::min(Size, AbsEnd - Start);
}
- PieceExpr = DIB.createBitPieceExpression(Start, Size);
- } else {
- assert(Pieces.size() == 1 &&
- "partition is as large as original alloca");
+ // The new, smaller fragment is stenciled out from the old fragment.
+ if (auto OrigFragment = FragmentExpr->getFragmentInfo()) {
+ assert(Start >= OrigFragment->OffsetInBits &&
+ "new fragment is outside of original fragment");
+ Start -= OrigFragment->OffsetInBits;
+ }
+
+ // The alloca may be larger than the variable.
+ if (VarSize) {
+ if (Size > *VarSize)
+ Size = *VarSize;
+ if (Size == 0 || Start + Size > *VarSize)
+ continue;
+ }
+
+ // Avoid creating a fragment expression that covers the entire variable.
+ if (!VarSize || *VarSize != Size) {
+ if (auto E =
+ DIExpression::createFragmentExpression(Expr, Start, Size))
+ FragmentExpr = *E;
+ else
+ continue;
+ }
}
- // Remove any existing dbg.declare intrinsic describing the same alloca.
- if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
- OldDDI->eraseFromParent();
+ // Remove any existing intrinsics describing the same alloca.
+ for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
+ OldDII->eraseFromParent();
- DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
- &AI);
+ DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
+ DbgDeclares.front()->getDebugLoc(), &AI);
}
}
return Changed;
}
-/// \brief Clobber a use with undef, deleting the used value if it becomes dead.
+/// Clobber a use with undef, deleting the used value if it becomes dead.
void SROA::clobberUse(Use &U) {
Value *OldV = U;
// Replace the use with an undef value.
}
}
-/// \brief Analyze an alloca for SROA.
+/// Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
/// the slices of the alloca, and then hands it off to be split and
/// rewritten as needed.
bool SROA::runOnAlloca(AllocaInst &AI) {
- DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
+ LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
++NumAllocasAnalyzed;
// Special case dead allocas, as they're trivial.
// Build the slices using a recursive instruction-visiting builder.
AllocaSlices AS(DL, AI);
- DEBUG(AS.print(dbgs()));
+ LLVM_DEBUG(AS.print(dbgs()));
if (AS.isEscaped())
return Changed;
Changed |= splitAlloca(AI, AS);
- DEBUG(dbgs() << " Speculating PHIs\n");
+ LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
while (!SpeculatablePHIs.empty())
speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val());
- DEBUG(dbgs() << " Speculating Selects\n");
+ LLVM_DEBUG(dbgs() << " Speculating Selects\n");
while (!SpeculatableSelects.empty())
speculateSelectInstLoads(*SpeculatableSelects.pop_back_val());
return Changed;
}
-/// \brief Delete the dead instructions accumulated in this run.
+/// Delete the dead instructions accumulated in this run.
///
/// Recursively deletes the dead instructions we've accumulated. This is done
/// at the very end to maximize locality of the recursive delete and to
///
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
-void SROA::deleteDeadInstructions(
+bool SROA::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
+ bool Changed = false;
while (!DeadInsts.empty()) {
Instruction *I = DeadInsts.pop_back_val();
- DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
+ LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
+
+ // If the instruction is an alloca, find the possible dbg.declare connected
+ // to it, and remove it too. We must do this before calling RAUW or we will
+ // not be able to find it.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ DeletedAllocas.insert(AI);
+ for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(AI))
+ OldDII->eraseFromParent();
+ }
I->replaceAllUsesWith(UndefValue::get(I->getType()));
DeadInsts.insert(U);
}
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- DeletedAllocas.insert(AI);
- if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI))
- DbgDecl->eraseFromParent();
- }
-
++NumDeleted;
I->eraseFromParent();
+ Changed = true;
}
+ return Changed;
}
-/// \brief Promote the allocas, using the best available technique.
+/// Promote the allocas, using the best available technique.
///
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
NumPromoted += PromotableAllocas.size();
- DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
+ LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+ PromoteMemToReg(PromotableAllocas, *DT, AC);
PromotableAllocas.clear();
return true;
}
PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
AssumptionCache &RunAC) {
- DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
DT = &RunDT;
AC = &RunAC;
do {
while (!Worklist.empty()) {
Changed |= runOnAlloca(*Worklist.pop_back_val());
- deleteDeadInstructions(DeletedAllocas);
+ Changed |= deleteDeadInstructions(DeletedAllocas);
// Remove the deleted allocas from various lists so that we don't try to
// continue processing them.
auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
Worklist.remove_if(IsInSet);
PostPromotionWorklist.remove_if(IsInSet);
- PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
- PromotableAllocas.end(),
- IsInSet),
+ PromotableAllocas.erase(llvm::remove_if(PromotableAllocas, IsInSet),
PromotableAllocas.end());
DeletedAllocas.clear();
}
PostPromotionWorklist.clear();
} while (!Worklist.empty());
- // FIXME: Even when promoting allocas we should preserve some abstract set of
- // CFG-specific analyses.
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<GlobalsAA>();
+ return PA;
}
-PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F));
}
SROA Impl;
public:
+ static char ID;
+
SROALegacyPass() : FunctionPass(ID) {
initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
}
+
bool runOnFunction(Function &F) override {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
auto PA = Impl.runImpl(
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
return !PA.areAllPreserved();
}
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
- const char *getPassName() const override { return "SROA"; }
- static char ID;
+ StringRef getPassName() const override { return "SROA"; }
};
char SROALegacyPass::ID = 0;