/// tracks what operations have a given value number (IE it also tracks the
/// reverse mapping from value number -> operations with that value number), so
/// that it only needs to reprocess the instructions that are affected when
-/// something's value number changes. The rest of the algorithm is devoted to
-/// performing symbolic evaluation, forward propagation, and simplification of
-/// operations based on the value numbers deduced so far.
+/// something's value number changes. The vast majority of complexity and code
+/// in this file is devoted to tracking what value numbers could change for what
+/// instructions when various things happen. The rest of the algorithm is
+/// devoted to performing symbolic evaluation, forward propagation, and
+/// simplification of operations based on the value numbers deduced so far
+///
+/// In order to make the GVN mostly-complete, we use a technique derived from
+/// "Detection of Redundant Expressions: A Complete and Polynomial-time
+/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA
+/// based GVN algorithms is related to their inability to detect equivalence
+/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)).
+/// We resolve this issue by generating the equivalent "phi of ops" form for
+/// each op of phis we see, in a way that only takes polynomial time to resolve.
///
/// We also do not perform elimination by using any published algorithm. All
/// published algorithms are O(Instructions). Instead, we use a technique that
STATISTIC(NumGVNAvoidedSortedLeaderChanges,
"Number of avoided sorted leader changes");
STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
+STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created");
+STATISTIC(NumGVNPHIOfOpsEliminations,
+ "Number of things eliminated using PHI of ops");
DEBUG_COUNTER(VNCounter, "newgvn-vn",
"Controls which instructions are value numbered")
-
+DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi",
+ "Controls which instructions we create phi of ops for")
// Currently store defining access refinement is too slow due to basicaa being
// egregiously slow. This flag lets us keep it working while we work on this
// issue.
}
}
// See if we really were the root of a component, by seeing if we still have
- // our DFSNumber.
- // If we do, we are the root of the component, and we have completed a
- // component. If we do not,
- // we are not the root of a component, and belong on the component stack.
+ // our DFSNumber. If we do, we are the root of the component, and we have
+ // completed a component. If we do not, we are not the root of a component,
+ // and belong on the component stack.
if (Root.lookup(I) == OurDFS) {
unsigned ComponentID = Components.size();
Components.resize(Components.size() + 1);
int StoreCount = 0;
};
+struct HashedExpression;
namespace llvm {
template <> struct DenseMapInfo<const Expression *> {
static const Expression *getEmptyKey() {
Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
return reinterpret_cast<const Expression *>(Val);
}
- static unsigned getHashValue(const Expression *V) {
- return static_cast<unsigned>(V->getHashValue());
+ static unsigned getHashValue(const Expression *E) {
+ return static_cast<unsigned>(E->getHashValue());
}
+ static unsigned getHashValue(const HashedExpression &HE);
+ static bool isEqual(const HashedExpression &LHS, const Expression *RHS);
static bool isEqual(const Expression *LHS, const Expression *RHS) {
if (LHS == RHS)
return true;
};
} // end namespace llvm
+// This is just a wrapper around Expression that computes the hash value once at
+// creation time. Hash values for an Expression can't change once they are
+// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at
+// that point anyway.
+struct HashedExpression {
+ const Expression *E;
+ unsigned HashVal;
+ HashedExpression(const Expression *E)
+ : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {}
+};
+
+unsigned
+DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) {
+ return HE.HashVal;
+}
+bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS,
+ const Expression *RHS) {
+ return isEqual(LHS.E, RHS);
+}
+
namespace {
class NewGVN {
Function &F;
// Value Mappings.
DenseMap<Value *, CongruenceClass *> ValueToClass;
DenseMap<Value *, const Expression *> ValueToExpression;
+ // Value PHI handling, used to make equivalence between phi(op, op) and
+ // op(phi, phi).
+ // These mappings just store various data that would normally be part of the
+ // IR.
+ DenseSet<const Instruction *> PHINodeUses;
+ // Map a temporary instruction we created to a parent block.
+ DenseMap<const Value *, BasicBlock *> TempToBlock;
+ // Map between the temporary phis we created and the real instructions they
+ // are known equivalent to.
+ DenseMap<const Value *, PHINode *> RealToTemp;
+ // In order to know when we should re-process instructions that have
+ // phi-of-ops, we track the set of expressions that they needed as
+ // leaders. When we discover new leaders for those expressions, we process the
+ // associated phi-of-op instructions again in case they have changed. The
+ // other way they may change is if they had leaders, and those leaders
+ // disappear. However, at the point they have leaders, there are uses of the
+ // relevant operands in the created phi node, and so they will get reprocessed
+ // through the normal user marking we perform.
+ mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers;
+ DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>>
+ ExpressionToPhiOfOps;
+ // Map from basic block to the temporary operations we created
+ DenseMap<const BasicBlock *, SmallVector<Instruction *, 8>> PHIOfOpsPHIs;
+ // Map from temporary operation to MemoryAccess.
+ DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory;
+ // Set of all temporary instructions we created.
+ DenseSet<Instruction *> AllTempInstructions;
// Mapping from predicate info we used to the instructions we used it with.
// In order to correctly ensure propagation, we must keep track of what
enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
- enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
- mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
+ enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle };
+ mutable DenseMap<const Instruction *, InstCycleState> InstCycleState;
// Expression to class mapping.
using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
ExpressionClassMap ExpressionToClass;
const Expression *createBinaryExpression(unsigned, Type *, Value *,
Value *) const;
PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge,
- bool &AllConstant) const;
+ bool &OriginalOpsConstant) const;
const VariableExpression *createVariableExpression(Value *) const;
const ConstantExpression *createConstantExpression(Constant *) const;
const Expression *createVariableOrConstant(Value *V) const;
return CClass;
}
void initializeCongruenceClasses(Function &F);
+ const Expression *makePossiblePhiOfOps(Instruction *, bool,
+ SmallPtrSetImpl<Value *> &);
+ void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
// Value number an Instruction or MemoryPhi.
void valueNumberMemoryPhi(MemoryPhi *);
// Symbolic evaluation.
const Expression *checkSimplificationResults(Expression *, Instruction *,
Value *) const;
- const Expression *performSymbolicEvaluation(Value *) const;
+ const Expression *performSymbolicEvaluation(Value *,
+ SmallPtrSetImpl<Value *> &) const;
const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
Instruction *,
MemoryAccess *) const;
bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
- bool isMemoryAccessTop(const MemoryAccess *) const;
+ bool isMemoryAccessTOP(const MemoryAccess *) const;
// Ranking
unsigned int getRank(const Value *) const;
void replaceInstruction(Instruction *, Value *);
void markInstructionForDeletion(Instruction *);
void deleteInstructionsInBlock(BasicBlock *);
+ Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const;
// New instruction creation.
void handleNewInstruction(Instruction *){};
void markPredicateUsersTouched(Instruction *);
void markValueLeaderChangeTouched(CongruenceClass *CC);
void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+ void markPhiOfOpsChanged(const HashedExpression &HE);
void addPredicateUsers(const PredicateBase *, Instruction *) const;
void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;
+ void addAdditionalUsers(Value *To, Value *User) const;
// Main loop of value numbering
void iterateTouchedInstructions();
// Utilities.
void cleanupTables();
std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
- void updateProcessedCount(Value *V);
+ void updateProcessedCount(const Value *V);
void verifyMemoryCongruency() const;
void verifyIterationSettled(Function &F);
void verifyStoreExpressions() const;
const MemoryAccess *, const MemoryAccess *) const;
BasicBlock *getBlockForValue(Value *V) const;
void deleteExpression(const Expression *E) const;
+ MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+ MemoryAccess *getDefiningAccess(const MemoryAccess *) const;
+ MemoryPhi *getMemoryAccess(const BasicBlock *) const;
+ template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
unsigned InstrToDFSNum(const Value *V) const {
assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
return InstrDFS.lookup(V);
? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
: InstrDFS.lookup(MA);
}
- bool isCycleFree(const PHINode *PN) const;
- template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+ bool isCycleFree(const Instruction *) const;
+ bool isBackedge(BasicBlock *From, BasicBlock *To) const;
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
std::pair<int, int> StartingVNCounter;
return true;
}
+// Determine if the edge From->To is a backedge
+bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const {
+ if (From == To)
+ return true;
+ auto *FromDTN = DT->getNode(From);
+ auto *ToDTN = DT->getNode(To);
+ return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN);
+}
+
#ifndef NDEBUG
static std::string getBlockName(const BasicBlock *B) {
return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
}
#endif
+// Get a MemoryAccess for an instruction, fake or real.
+MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const {
+ auto *Result = MSSA->getMemoryAccess(I);
+ return Result ? Result : TempToMemory.lookup(I);
+}
+
+// Get a MemoryPhi for a basic block. These are all real.
+MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const {
+ return MSSA->getMemoryAccess(BB);
+}
+
// Get the basic block from an instruction/memory value.
BasicBlock *NewGVN::getBlockForValue(Value *V) const {
- if (auto *I = dyn_cast<Instruction>(V))
- return I->getParent();
- else if (auto *MP = dyn_cast<MemoryPhi>(V))
- return MP->getBlock();
- llvm_unreachable("Should have been able to figure out a block for our value");
- return nullptr;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ auto *Parent = I->getParent();
+ if (Parent)
+ return Parent;
+ Parent = TempToBlock.lookup(V);
+ assert(Parent && "Every fake instruction should have a block");
+ return Parent;
+ }
+
+ auto *MP = dyn_cast<MemoryPhi>(V);
+ assert(MP && "Should have been an instruction or a MemoryPhi");
+ return MP->getBlock();
}
// Delete a definitely dead expression, so it can be reused by the expression
const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
ExpressionAllocator.Deallocate(E);
}
-
PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
- bool &AllConstant) const {
- BasicBlock *PHIBlock = I->getParent();
+ bool &OriginalOpsConstant) const {
+ BasicBlock *PHIBlock = getBlockForValue(I);
auto *PN = cast<PHINode>(I);
auto *E =
new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
E->setType(I->getType());
E->setOpcode(I->getOpcode());
- unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
-
// NewGVN assumes the operands of a PHI node are in a consistent order across
// PHIs. LLVM doesn't seem to always guarantee this. While we need to fix
// this in LLVM at some point we don't want GVN to find wrong congruences.
auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
});
-
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use *U) -> Value * {
auto *BB = PN->getIncomingBlock(*U);
- auto *DTN = DT->getNode(BB);
- if (RPOOrdering.lookup(DTN) >= PHIRPO)
- HasBackedge = true;
- AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U);
+ HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
+ OriginalOpsConstant =
+ OriginalOpsConstant && isa<Constant>(*U);
// Don't try to transform self-defined phis.
if (*U == PN)
// whether all members are constant.
std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
auto Operand = lookupOperandLeader(O);
- AllConstant &= isa<Constant>(Operand);
+ AllConstant = AllConstant && isa<Constant>(Operand);
return Operand;
});
// Return true if the MemoryAccess is really equivalent to everything. This is
// equivalent to the lattice value "TOP" in most lattices. This is the initial
// state of all MemoryAccesses.
-bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const {
return getMemoryClass(MA) == TOPClass;
}
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
- auto *StoreAccess = MSSA->getMemoryAccess(SI);
+ auto *StoreAccess = getMemoryAccess(SI);
// Get the expression, if any, for the RHS of the MemoryDef.
const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
if (EnableStoreRefinement)
dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
if ((lookupOperandLeader(LI->getPointerOperand()) ==
lookupOperandLeader(SI->getPointerOperand())) &&
- (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+ (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
StoreRHS))
return createVariableExpression(LI);
}
// Load of undef is undef.
if (isa<UndefValue>(LoadAddressLeader))
return createConstantExpression(UndefValue::get(LI->getType()));
-
- MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+ MemoryAccess *OriginalAccess = getMemoryAccess(I);
+ MemoryAccess *DefiningAccess =
+ MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
// operands are equal, because assumes must always be true.
if (CmpInst::isTrueWhenEqual(Predicate)) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createVariableOrConstant(FirstOp);
}
}
if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
(!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createVariableOrConstant(FirstOp);
}
// Handle the special case of floating point.
(!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
addPredicateUsers(PI, I);
+ addAdditionalUsers(Cmp->getOperand(0), I);
return createConstantExpression(cast<Constant>(FirstOp));
}
}
return Changed;
}
-// Determine if a phi is cycle-free. That means the values in the phi don't
-// depend on any expressions that can change value as a result of the phi.
-// For example, a non-cycle free phi would be v = phi(0, v+1).
-bool NewGVN::isCycleFree(const PHINode *PN) const {
- // In order to compute cycle-freeness, we do SCC finding on the phi, and see
- // what kind of SCC it ends up in. If it is a singleton, it is cycle-free.
- // If it is not in a singleton, it is only cycle free if the other members are
- // all phi nodes (as they do not compute anything, they are copies). TODO:
- // There are likely a few other intrinsics or expressions that could be
- // included here, but this happens so infrequently already that it is not
- // likely to be worth it.
- auto PCS = PhiCycleState.lookup(PN);
- if (PCS == PCS_Unknown) {
- SCCFinder.Start(PN);
- auto &SCC = SCCFinder.getComponentFor(PN);
+// Determine if a instruction is cycle-free. That means the values in the
+// instruction don't depend on any expressions that can change value as a result
+// of the instruction. For example, a non-cycle free instruction would be v =
+// phi(0, v+1).
+bool NewGVN::isCycleFree(const Instruction *I) const {
+ // In order to compute cycle-freeness, we do SCC finding on the instruction,
+ // and see what kind of SCC it ends up in. If it is a singleton, it is
+ // cycle-free. If it is not in a singleton, it is only cycle free if the
+ // other members are all phi nodes (as they do not compute anything, they are
+ // copies).
+ auto ICS = InstCycleState.lookup(I);
+ if (ICS == ICS_Unknown) {
+ SCCFinder.Start(I);
+ auto &SCC = SCCFinder.getComponentFor(I);
// It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
if (SCC.size() == 1)
- PhiCycleState.insert({PN, PCS_CycleFree});
+ InstCycleState.insert({I, ICS_CycleFree});
else {
bool AllPhis =
llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
- PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+ ICS = AllPhis ? ICS_CycleFree : ICS_Cycle;
for (auto *Member : SCC)
if (auto *MemberPhi = dyn_cast<PHINode>(Member))
- PhiCycleState.insert({MemberPhi, PCS});
+ InstCycleState.insert({MemberPhi, ICS});
}
}
- if (PCS == PCS_Cycle)
+ if (ICS == ICS_Cycle)
return false;
return true;
}
// constants, or all operands are ignored but the undef, it also must be
// cycle free.
if (!AllConstant && HasBackedge && NumOps > 0 &&
- !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
+ !isa<UndefValue>(AllSameValue) && !isCycleFree(I))
return E;
// Only have to check for instructions
return createExpression(I);
}
+// Return true if V is a value that will always be available (IE can
+// be placed anywhere) in the function. We don't do globals here
+// because they are often worse to put in place.
+// TODO: Separate cost from availability
+static bool alwaysAvailable(Value *V) {
+ return isa<Constant>(V) || isa<Argument>(V);
+}
+
// Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
+const Expression *
+NewGVN::performSymbolicEvaluation(Value *V,
+ SmallPtrSetImpl<Value *> &Visited) const {
const Expression *E = nullptr;
if (auto *C = dyn_cast<Constant>(V))
E = createConstantExpression(C);
return E;
}
+void NewGVN::addAdditionalUsers(Value *To, Value *User) const {
+ AdditionalUsers[To].insert(User);
+}
+
void NewGVN::markUsersTouched(Value *V) {
// Now mark the users as touched.
for (auto *User : V->users()) {
assert(isa<Instruction>(User) && "Use of value not within an instruction?");
TouchedInstructions.set(InstrToDFSNum(User));
}
+ const auto Result = AdditionalUsers.find(V);
+ if (Result != AdditionalUsers.end()) {
+ for (auto *User : Result->second)
+ TouchedInstructions.set(InstrToDFSNum(User));
+ AdditionalUsers.erase(Result);
+ }
}
void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const {
// Add I to the set of users of a given predicate.
void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
+ // Don't add temporary instructions to the user lists.
+ if (AllTempInstructions.count(I))
+ return;
+
if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
PredicateToUsers[PBranch->Condition].insert(I);
else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
assert(!CC->definesNoMemory() && "Can't get next leader if there is none");
if (CC->getStoreCount() > 0) {
if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
- return MSSA->getMemoryAccess(NL);
+ return getMemoryAccess(NL);
// Find the store with the minimum DFS number.
auto *V = getMinDFSOfRange<Value>(make_filter_range(
*CC, [&](const Value *V) { return isa<StoreInst>(V); }));
- return MSSA->getMemoryAccess(cast<StoreInst>(V));
+ return getMemoryAccess(cast<StoreInst>(V));
}
assert(CC->getStoreCount() == 0);
// instructions before.
// If it's not a memory use, set the MemoryAccess equivalence
- auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+ auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I));
if (InstMA)
moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
ValueToClass[I] = NewClass;
}
}
+// For a given expression, mark the phi of ops instructions that could have
+// changed as a result.
+void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) {
+ auto PhiOfOpsSet = ExpressionToPhiOfOps.find_as(HE);
+ if (PhiOfOpsSet != ExpressionToPhiOfOps.end()) {
+ for (auto I : PhiOfOpsSet->second)
+ TouchedInstructions.set(InstrToDFSNum(I));
+ ExpressionToPhiOfOps.erase(PhiOfOpsSet);
+ }
+}
// Perform congruence finding on a given value numbering expression.
void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
// This is guaranteed to return something, since it will at least find
// TOP.
-
CongruenceClass *IClass = ValueToClass[I];
assert(IClass && "Should have found a IClass");
// Dead classes should have been eliminated from the mapping.
assert(!IClass->isDead() && "Found a dead class");
- CongruenceClass *EClass;
+ CongruenceClass *EClass = nullptr;
+ HashedExpression HE(E);
if (const auto *VE = dyn_cast<VariableExpression>(E)) {
- EClass = ValueToClass[VE->getVariableValue()];
+ EClass = ValueToClass.lookup(VE->getVariableValue());
} else {
- auto lookupResult = ExpressionToClass.insert({E, nullptr});
+ auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE);
// If it's not in the value table, create a new congruence class.
if (lookupResult.second) {
if (ClassChanged || LeaderChanged) {
DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
<< "\n");
- if (ClassChanged)
+ if (ClassChanged) {
moveValueToNewCongruenceClass(I, E, IClass, EClass);
+ markPhiOfOpsChanged(HE);
+ }
+
markUsersTouched(I);
- if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+ if (MemoryAccess *MA = getMemoryAccess(I))
markMemoryUsersTouched(MA);
if (auto *CI = dyn_cast<CmpInst>(I))
markPredicateUsersTouched(CI);
// impact predicates. Otherwise, only mark the phi nodes as touched, as
// they are the only thing that depend on new edges. Anything using their
// values will get propagated to if necessary.
- if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
+ if (MemoryAccess *MemPhi = getMemoryAccess(To))
TouchedInstructions.set(InstrToDFSNum(MemPhi));
auto BI = To->begin();
TouchedInstructions.set(InstrToDFSNum(&*BI));
++BI;
}
+ const auto PHIResult = PHIOfOpsPHIs.find(To);
+ if (PHIResult != PHIOfOpsPHIs.end()) {
+ const auto &PHIs = PHIResult->second;
+ for (auto I : PHIs)
+ TouchedInstructions.set(InstrToDFSNum(I));
+ }
}
}
}
// This also may be a memory defining terminator, in which case, set it
// equivalent only to itself.
//
- auto *MA = MSSA->getMemoryAccess(TI);
+ auto *MA = getMemoryAccess(TI);
if (MA && !isa<MemoryUse>(MA)) {
auto *CC = ensureLeaderOfMemoryClass(MA);
if (setMemoryClass(MA, CC))
}
}
+void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
+ Instruction *ExistingValue) {
+ InstrDFS[Op] = InstrToDFSNum(ExistingValue);
+ AllTempInstructions.insert(Op);
+ PHIOfOpsPHIs[BB].push_back(Op);
+ TempToBlock[Op] = BB;
+ if (ExistingValue)
+ RealToTemp[ExistingValue] = Op;
+}
+
+static bool okayForPHIOfOps(const Instruction *I) {
+ return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I) ||
+ isa<LoadInst>(I);
+}
+
+// When we see an instruction that is an op of phis, generate the equivalent phi
+// of ops form.
+const Expression *
+NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+ SmallPtrSetImpl<Value *> &Visited) {
+ if (!okayForPHIOfOps(I))
+ return nullptr;
+
+ if (!Visited.insert(I).second)
+ return nullptr;
+ // For now, we require the instruction be cycle free because we don't
+ // *always* create a phi of ops for instructions that could be done as phi
+ // of ops, we only do it if we think it is useful. If we did do it all the
+ // time, we could remove the cycle free check.
+ if (!isCycleFree(I))
+ return nullptr;
+
+ unsigned IDFSNum = InstrToDFSNum(I);
+ // Pretty much all of the instructions we can convert to phi of ops over a
+ // backedge that are adds, are really induction variables, and those are
+ // pretty much pointless to convert. This is very coarse-grained for a
+ // test, so if we do find some value, we can change it later.
+ // But otherwise, what can happen is we convert the induction variable from
+ //
+ // i = phi (0, tmp)
+ // tmp = i + 1
+ //
+ // to
+ // i = phi (0, tmpphi)
+ // tmpphi = phi(1, tmpphi+1)
+ //
+ // Which we don't want to happen. We could just avoid this for all non-cycle
+ // free phis, and we made go that route.
+ if (HasBackedge && I->getOpcode() == Instruction::Add)
+ return nullptr;
+
+ SmallPtrSet<const Value *, 8> ProcessedPHIs;
+ // TODO: We don't do phi translation on memory accesses because it's
+ // complicated. For a load, we'd need to be able to simulate a new memoryuse,
+ // which we don't have a good way of doing ATM.
+ auto *MemAccess = getMemoryAccess(I);
+ // If the memory operation is defined by a memory operation this block that
+ // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi
+ // can't help, as it would still be killed by that memory operation.
+ if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) &&
+ MemAccess->getDefiningAccess()->getBlock() == I->getParent())
+ return nullptr;
+
+ // Convert op of phis to phi of ops
+ for (auto &Op : I->operands()) {
+ if (!isa<PHINode>(Op))
+ continue;
+ auto *OpPHI = cast<PHINode>(Op);
+ // No point in doing this for one-operand phis.
+ if (OpPHI->getNumOperands() == 1)
+ continue;
+ if (!DebugCounter::shouldExecute(PHIOfOpsCounter))
+ return nullptr;
+ SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops;
+ auto *PHIBlock = getBlockForValue(OpPHI);
+ for (auto PredBB : OpPHI->blocks()) {
+ Value *FoundVal = nullptr;
+ // We could just skip unreachable edges entirely but it's tricky to do
+ // with rewriting existing phi nodes.
+ if (ReachableEdges.count({PredBB, PHIBlock})) {
+ // Clone the instruction, create an expression from it, and see if we
+ // have a leader.
+ Instruction *ValueOp = I->clone();
+ auto Iter = TempToMemory.end();
+ if (MemAccess)
+ Iter = TempToMemory.insert({ValueOp, MemAccess}).first;
+
+ for (auto &Op : ValueOp->operands()) {
+ Op = Op->DoPHITranslation(PHIBlock, PredBB);
+ // When this operand changes, it could change whether there is a
+ // leader for us or not.
+ addAdditionalUsers(Op, I);
+ }
+ // Make sure it's marked as a temporary instruction.
+ AllTempInstructions.insert(ValueOp);
+ // and make sure anything that tries to add it's DFS number is
+ // redirected to the instruction we are making a phi of ops
+ // for.
+ InstrDFS.insert({ValueOp, IDFSNum});
+ const Expression *E = performSymbolicEvaluation(ValueOp, Visited);
+ InstrDFS.erase(ValueOp);
+ AllTempInstructions.erase(ValueOp);
+ ValueOp->deleteValue();
+ if (MemAccess)
+ TempToMemory.erase(Iter);
+ if (!E)
+ return nullptr;
+ FoundVal = findPhiOfOpsLeader(E, PredBB);
+ if (!FoundVal) {
+ ExpressionToPhiOfOps[E].insert(I);
+ return nullptr;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(FoundVal))
+ FoundVal = SI->getValueOperand();
+ } else {
+ DEBUG(dbgs() << "Skipping phi of ops operand for incoming block "
+ << getBlockName(PredBB)
+ << " because the block is unreachable\n");
+ FoundVal = UndefValue::get(I->getType());
+ }
+
+ Ops.push_back({FoundVal, PredBB});
+ DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in "
+ << getBlockName(PredBB) << "\n");
+ }
+ auto *ValuePHI = RealToTemp.lookup(I);
+ bool NewPHI = false;
+ if (!ValuePHI) {
+ ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands());
+ addPhiOfOps(ValuePHI, PHIBlock, I);
+ NewPHI = true;
+ NumGVNPHIOfOpsCreated++;
+ }
+ if (NewPHI) {
+ for (auto PHIOp : Ops)
+ ValuePHI->addIncoming(PHIOp.first, PHIOp.second);
+ } else {
+ unsigned int i = 0;
+ for (auto PHIOp : Ops) {
+ ValuePHI->setIncomingValue(i, PHIOp.first);
+ ValuePHI->setIncomingBlock(i, PHIOp.second);
+ ++i;
+ }
+ }
+
+ DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I
+ << "\n");
+ return performSymbolicEvaluation(ValuePHI, Visited);
+ }
+ return nullptr;
+}
+
// The algorithm initially places the values of the routine in the TOP
// congruence class. The leader of TOP is the undetermined value `undef`.
// When the algorithm has finished, values still in TOP are unreachable.
TOPClass->incStoreCount();
}
for (auto &I : *BB) {
+ // TODO: Move to helper
+ if (isa<PHINode>(&I))
+ for (auto *U : I.users())
+ if (auto *UInst = dyn_cast<Instruction>(U))
+ if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst))
+ PHINodeUses.insert(UInst);
// Don't insert void terminators into the class. We don't value number
// them, and they just end up sitting in TOP.
if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
CongruenceClasses[i] = nullptr;
}
+ // Destroy the value expressions
+ SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(),
+ AllTempInstructions.end());
+ AllTempInstructions.clear();
+
+ // We have to drop all references for everything first, so there are no uses
+ // left as we delete them.
+ for (auto *I : TempInst) {
+ I->dropAllReferences();
+ }
+
+ while (!TempInst.empty()) {
+ auto *I = TempInst.back();
+ TempInst.pop_back();
+ I->deleteValue();
+ }
+
ValueToClass.clear();
ArgRecycler.clear(ExpressionAllocator);
ExpressionAllocator.Reset();
CongruenceClasses.clear();
ExpressionToClass.clear();
ValueToExpression.clear();
+ RealToTemp.clear();
+ AdditionalUsers.clear();
+ ExpressionToPhiOfOps.clear();
+ TempToBlock.clear();
+ TempToMemory.clear();
+ PHIOfOpsPHIs.clear();
ReachableBlocks.clear();
ReachableEdges.clear();
#ifndef NDEBUG
MemoryToUsers.clear();
}
+// Assign local DFS number mapping to instructions, and leave space for Value
+// PHI's.
std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
unsigned Start) {
unsigned End = Start;
- if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
+ if (MemoryAccess *MemPhi = getMemoryAccess(B)) {
InstrDFS[MemPhi] = End++;
DFSToInstr.emplace_back(MemPhi);
}
+ // Then the real block goes next.
for (auto &I : *B) {
// There's no need to call isInstructionTriviallyDead more than once on
// an instruction. Therefore, once we know that an instruction is dead
markInstructionForDeletion(&I);
continue;
}
-
InstrDFS[&I] = End++;
DFSToInstr.emplace_back(&I);
}
return std::make_pair(Start, End);
}
-void NewGVN::updateProcessedCount(Value *V) {
+void NewGVN::updateProcessedCount(const Value *V) {
#ifndef NDEBUG
if (ProcessedCount.count(V) == 0) {
ProcessedCount.insert({V, 1});
const BasicBlock *PHIBlock = MP->getBlock();
auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
- !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+ !isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
});
// If all that is left is nothing, our memoryphi is undef. We keep it as
DEBUG(dbgs() << "Processing instruction " << *I << "\n");
if (!I->isTerminator()) {
const Expression *Symbolized = nullptr;
+ SmallPtrSet<Value *, 2> Visited;
if (DebugCounter::shouldExecute(VNCounter)) {
- Symbolized = performSymbolicEvaluation(I);
+ Symbolized = performSymbolicEvaluation(I, Visited);
+ // Make a phi of ops if necessary
+ if (Symbolized && !isa<ConstantExpression>(Symbolized) &&
+ !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) {
+ // FIXME: Backedge argument
+ auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+ if (PHIE)
+ Symbolized = PHIE;
+ }
+
} else {
// Mark the instruction as unused so we don't value number it again.
InstrDFS[I] = 0;
}
// If we couldn't come up with a symbolic expression, use the unknown
// expression
- if (Symbolized == nullptr) {
+ if (Symbolized == nullptr)
Symbolized = createUnknownExpression(I);
- }
-
performCongruenceFinding(I, Symbolized);
} else {
// Handle terminators that return values. All of them produce values we
auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
for (auto KV : Filtered) {
- assert(KV.second != TOPClass &&
- "Memory not unreachable but ended up in TOP");
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
if (FirstMUD && SecondMUD) {
// Nothing set, nothing to iterate, just return.
if (FirstInstr == -1)
return;
- BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+ const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
while (TouchedInstructions.any()) {
++Iterations;
// Walk through all the instructions in all the blocks in RPO.
}
Value *V = InstrFromDFSNum(InstrNum);
- BasicBlock *CurrBlock = getBlockForValue(V);
+ const BasicBlock *CurrBlock = getBlockForValue(V);
// If we hit a new block, do reachability processing.
if (CurrBlock != LastBlock) {
BlockInstRange.insert({B, BlockRange});
ICount += BlockRange.second - BlockRange.first;
}
+ initializeCongruenceClasses(F);
TouchedInstructions.resize(ICount);
// Ensure we don't end up resizing the expressionToClass map, as
// Initialize the touched instructions to include the entry block.
const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
TouchedInstructions.set(InstRange.first, InstRange.second);
+ DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock())
+ << " marked reachable\n");
ReachableBlocks.insert(&F.getEntryBlock());
- initializeCongruenceClasses(F);
iterateTouchedInstructions();
verifyMemoryCongruency();
verifyIterationSettled(F);
if (!ToErase->use_empty())
ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
- ToErase->eraseFromParent();
+ if (ToErase->getParent())
+ ToErase->eraseFromParent();
}
// Delete all unreachable blocks.
return Changed;
}
-// Return true if V is a value that will always be available (IE can
-// be placed anywhere) in the function. We don't do globals here
-// because they are often worse to put in place.
-// TODO: Separate cost from availability
-static bool alwaysAvailable(Value *V) {
- return isa<Constant>(V) || isa<Argument>(V);
-}
-
struct NewGVN::ValueDFS {
int DFSIn = 0;
int DFSOut = 0;
}
assert(isa<Instruction>(D) &&
"The dense set member should always be an instruction");
- VDDef.LocalNum = InstrToDFSNum(D);
- DFSOrderedSet.emplace_back(VDDef);
Instruction *Def = cast<Instruction>(D);
+ VDDef.LocalNum = InstrToDFSNum(D);
+ DFSOrderedSet.push_back(VDDef);
+ // If there is a phi node equivalent, add it
+ if (auto *PN = RealToTemp.lookup(Def)) {
+ auto *PHIE =
+ dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def));
+ if (PHIE) {
+ VDDef.Def.setInt(false);
+ VDDef.Def.setPointer(PN);
+ VDDef.LocalNum = 0;
+ DFSOrderedSet.push_back(VDDef);
+ }
+ }
+
unsigned int UseCount = 0;
// Now add the uses.
for (auto &U : Def->uses()) {
// they are from.
VDUse.LocalNum = InstrDFS.size() + 1;
} else {
- IBlock = I->getParent();
+ IBlock = getBlockForValue(I);
VDUse.LocalNum = InstrToDFSNum(I);
}
};
}
+// Given a value and a basic block we are trying to see if it is available in,
+// see if the value has a leader available in that block.
+Value *NewGVN::findPhiOfOpsLeader(const Expression *E,
+ const BasicBlock *BB) const {
+ // It would already be constant if we could make it constant
+ if (auto *CE = dyn_cast<ConstantExpression>(E))
+ return CE->getConstantValue();
+ if (auto *VE = dyn_cast<VariableExpression>(E))
+ return VE->getVariableValue();
+
+ auto *CC = ExpressionToClass.lookup(E);
+ if (!CC)
+ return nullptr;
+ if (alwaysAvailable(CC->getLeader()))
+ return CC->getLeader();
+
+ for (auto Member : *CC) {
+ auto *MemberInst = dyn_cast<Instruction>(Member);
+ // Anything that isn't an instruction is always available.
+ if (!MemberInst)
+ return Member;
+ // If we are looking for something in the same block as the member, it must
+ // be a leader because this function is looking for operands for a phi node.
+ if (MemberInst->getParent() == BB ||
+ DT->dominates(MemberInst->getParent(), BB)) {
+ return Member;
+ }
+ }
+ return nullptr;
+}
+
bool NewGVN::eliminateInstructions(Function &F) {
// This is a non-standard eliminator. The normal way to eliminate is
// to walk the dominator tree in order, keeping track of available
// DFS numbers are updated, we compute some ourselves.
DT->updateDFSNumbers();
- for (auto &B : F) {
- if (!ReachableBlocks.count(&B)) {
- for (const auto S : successors(&B)) {
- for (auto II = S->begin(); isa<PHINode>(II); ++II) {
- auto &Phi = cast<PHINode>(*II);
- DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
- << getBlockName(&B)
- << " with undef due to it being unreachable\n");
- for (auto &Operand : Phi.incoming_values())
- if (Phi.getIncomingBlock(Operand) == &B)
- Operand.set(UndefValue::get(Phi.getType()));
+ // Go through all of our phi nodes, and kill the arguments associated with unreachable edges.
+ auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) {
+ for (auto &Operand : PHI.incoming_values())
+ if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) {
+ DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block "
+ << getBlockName(PHI.getIncomingBlock(Operand))
+ << " with undef due to it being unreachable\n");
+ Operand.set(UndefValue::get(PHI.getType()));
+ }
+ };
+ SmallPtrSet<BasicBlock *, 8> BlocksWithPhis;
+ for (auto &B : F)
+ if ((!B.empty() && isa<PHINode>(*B.begin())) ||
+ (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end()))
+ BlocksWithPhis.insert(&B);
+ DenseMap<const BasicBlock *, unsigned> ReachablePredCount;
+ for (auto KV : ReachableEdges)
+ ReachablePredCount[KV.getEnd()]++;
+ for (auto *BB : BlocksWithPhis)
+ // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in
+ // the block and subtract the pred count, but it's more complicated.
+ if (ReachablePredCount.lookup(BB) !=
+ std::distance(pred_begin(BB), pred_end(BB))) {
+ for (auto II = BB->begin(); isa<PHINode>(II); ++II) {
+ auto &PHI = cast<PHINode>(*II);
+ ReplaceUnreachablePHIArgs(PHI, BB);
+ }
+ auto PHIResult = PHIOfOpsPHIs.find(BB);
+ if (PHIResult != PHIOfOpsPHIs.end()) {
+ auto &PHIs = PHIResult->second;
+ for (auto I : PHIs) {
+ auto *PHI = dyn_cast<PHINode>(I);
+ ReplaceUnreachablePHIArgs(*PHI, BB);
}
}
}
- }
// Map to store the use counts
DenseMap<const Value *, unsigned int> UseCounts;
- for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+ for (auto *CC : reverse(CongruenceClasses)) {
// Track the equivalent store info so we can decide whether to try
// dead store elimination.
SmallVector<ValueDFS, 8> PossibleDeadStores;
DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
<< "\n");
// If this is a singleton, we can skip it.
- if (CC->size() != 1) {
+ if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
// This is a stack because equality replacement/etc may place
// constants in the middle of the member list, and we want to use
// those constant values in preference to the current leader, over
// We ignore void things because we can't get a value from them.
if (Def && Def->getType()->isVoidTy())
continue;
+ auto *DefInst = dyn_cast_or_null<Instruction>(Def);
+ if (DefInst && AllTempInstructions.count(DefInst)) {
+ auto *PN = cast<PHINode>(DefInst);
+
+ // If this is a value phi and that's the expression we used, insert
+ // it into the program
+ // remove from temp instruction list.
+ AllTempInstructions.erase(PN);
+ auto *DefBlock = getBlockForValue(Def);
+ DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def
+ << " into block "
+ << getBlockName(getBlockForValue(Def)) << "\n");
+ PN->insertBefore(&DefBlock->front());
+ Def = PN;
+ NumGVNPHIOfOpsEliminations++;
+ }
if (EliminationStack.empty()) {
DEBUG(dbgs() << "Elimination Stack is empty\n");
}
}
}
-
return AnythingReplaced;
}
// we will simplify an operation with all constants so that it doesn't matter
// what order they appear in.
unsigned int NewGVN::getRank(const Value *V) const {
- // Prefer undef to anything else
+ // Prefer constants to undef to anything else
+ // Undef is a constant, have to check it first.
+ // Prefer smaller constants to constantexprs
+ if (isa<ConstantExpr>(V))
+ return 2;
if (isa<UndefValue>(V))
- return 0;
- if (isa<Constant>(V))
return 1;
+ if (isa<Constant>(V))
+ return 0;
else if (auto *A = dyn_cast<Argument>(V))
- return 2 + A->getArgNo();
+ return 3 + A->getArgNo();
// Need to shift the instruction DFS by number of arguments + 3 to account for
// the constant and argument ranking above.
unsigned Result = InstrToDFSNum(V);
if (Result > 0)
- return 3 + NumFuncArgs + Result;
+ return 4 + NumFuncArgs + Result;
// Unreachable or something else, just return a really large number.
return ~0;
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test1(i32, i8**) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK: br label [[TMP6:%.*]]
+; CHECK: br label [[TMP6]]
+; CHECK: [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ]
+; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
+; CHECK-NEXT: ret i32 [[TMP7]]
+;
+ %3 = icmp ne i32 %0, 0
+ br i1 %3, label %4, label %5
+
+; <label>:4: ; preds = %2
+ br label %6
+
+; <label>:5: ; preds = %2
+ br label %6
+
+; <label>:6: ; preds = %5, %4
+ %.0 = phi i32 [ 5, %4 ], [ 7, %5 ]
+ %7 = mul nsw i32 %.0, 15
+ ret i32 %7
+}
+
+define i32 @test2(i32) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
+; CHECK: br label [[TMP5:%.*]]
+; CHECK: br label [[TMP5]]
+; CHECK: [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ]
+; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ 2, [[TMP3]] ], [ 3, [[TMP4]] ]
+; CHECK-NEXT: ret i32 5
+;
+ %2 = icmp ne i32 %0, 0
+ br i1 %2, label %3, label %4
+
+; <label>:3: ; preds = %1
+ br label %5
+
+; <label>:4: ; preds = %1
+ br label %5
+
+; <label>:5: ; preds = %4, %3
+ %.01 = phi i32 [ 3, %3 ], [ 2, %4 ]
+ %.0 = phi i32 [ 2, %3 ], [ 3, %4 ]
+ %6 = add nsw i32 %.01, %.0
+ ret i32 %6
+}
+define i32 @test3(i1 %which) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK: delay:
+; CHECK-NEXT: br label [[FINAL]]
+; CHECK: final:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+
+entry:
+ br i1 %which, label %final, label %delay
+
+delay:
+ br label %final
+
+final:
+ %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+ %value = sub i32 123, %A
+ ret i32 %value
+}
+
+define <2 x i32> @test3vec(i1 %which) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK: delay:
+; CHECK-NEXT: br label [[FINAL]]
+; CHECK: final:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 1000>, [[ENTRY]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT: ret <2 x i32> [[TMP0]]
+;
+
+entry:
+ br i1 %which, label %final, label %delay
+
+delay:
+ br label %final
+
+final:
+ %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+ %value = sub <2 x i32> <i32 123, i32 123>, %A
+ ret <2 x i32> %value
+}
+
+define <2 x i32> @test3vec2(i1 %which) {
+; CHECK-LABEL: @test3vec2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK: delay:
+; CHECK-NEXT: br label [[FINAL]]
+; CHECK: final:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ]
+; CHECK-NEXT: [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 2500>, [[ENTRY]] ], [ <i32 10, i32 30>, [[DELAY]] ]
+; CHECK-NEXT: ret <2 x i32> [[TMP0]]
+;
+
+entry:
+ br i1 %which, label %final, label %delay
+
+delay:
+ br label %final
+
+final:
+ %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+ %value = sub <2 x i32> <i32 123, i32 333>, %A
+ ret <2 x i32> %value
+}
+
+;; This example is a bit contrived because we can't create fake memoryuses, so we use two loads in the if blocks
+define i32 @test4(i32, i8**, i32* noalias, i32* noalias) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: store i32 5, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT: store i32 7, i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
+; CHECK: br label [[TMP8:%.*]]
+; CHECK: br label [[TMP8]]
+; CHECK: [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ]
+; CHECK-NEXT: [[DOT0:%.*]] = phi i32* [ [[TMP2]], [[TMP6]] ], [ [[TMP3]], [[TMP7]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOT0]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP9]], 15
+; CHECK-NEXT: [[TMP11:%.*]] = mul nsw i32 [[TMP10]], [[DOT01]]
+; CHECK-NEXT: ret i32 [[TMP11]]
+;
+ store i32 5, i32* %2, align 4
+ store i32 7, i32* %3, align 4
+ %5 = icmp ne i32 %0, 0
+ br i1 %5, label %6, label %8
+
+; <label>:6: ; preds = %4
+ %7 = load i32, i32* %2, align 4
+ br label %10
+
+; <label>:8: ; preds = %4
+ %9 = load i32, i32* %3, align 4
+ br label %10
+
+; <label>:10: ; preds = %8, %6
+ %.01 = phi i32 [ %7, %6 ], [ %9, %8 ]
+ %.0 = phi i32* [ %2, %6 ], [ %3, %8 ]
+ %11 = load i32, i32* %.0, align 4
+ %12 = mul nsw i32 %11, 15
+ %13 = mul nsw i32 %12, %.01
+ ret i32 %13
+}
+
+@global = common global [100 x i64] zeroinitializer, align 16
+@global.1 = common global [100 x i64] zeroinitializer, align 16
+define i64 @test5(i64 %arg) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB28:%.*]], label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB7:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: br label [[BB5:%.*]]
+; CHECK: bb5:
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP9:%.*]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB27:%.*]], label [[BB7]]
+; CHECK: bb7:
+; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ [[ARG]], [[BB2]] ], [ [[TMP9]], [[BB5]] ]
+; CHECK-NEXT: [[TMP9]] = add nsw i64 [[TMP8]], -1
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+; CHECK-NEXT: [[TMP11:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 [[TMP11]], [[TMP10]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[TMP13]], label [[BB5]], label [[BB14:%.*]]
+; CHECK: bb14:
+; CHECK-NEXT: br label [[BB15:%.*]]
+; CHECK: bb15:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ]
+; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP]], align 8
+; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[TMP18]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]]
+; CHECK-NEXT: [[TMP22]] = load i64, i64* [[TMP21]], align 8
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 [[TMP20]]
+; CHECK-NEXT: [[TMP24]] = load i64, i64* [[TMP23]], align 8
+; CHECK-NEXT: [[TMP25]] = mul nsw i64 [[TMP24]], [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP20]], [[TMP25]]
+; CHECK-NEXT: br i1 [[TMP26]], label [[BB4:%.*]], label [[BB15]]
+; CHECK: bb27:
+; CHECK-NEXT: br label [[BB28]]
+; CHECK: bb28:
+; CHECK-NEXT: ret i64 0
+;
+bb:
+ %tmp = alloca i64, align 8
+ %tmp1 = icmp eq i64 %arg, 0
+ br i1 %tmp1, label %bb28, label %bb2
+
+bb2: ; preds = %bb
+ %tmp3 = bitcast i64* %tmp to i8*
+ br label %bb7
+
+bb4: ; preds = %bb15
+ br label %bb5
+
+bb5: ; preds = %bb7, %bb4
+ %tmp6 = icmp eq i64 %tmp9, 0
+ br i1 %tmp6, label %bb27, label %bb7
+
+bb7: ; preds = %bb5, %bb2
+ %tmp8 = phi i64 [ %arg, %bb2 ], [ %tmp9, %bb5 ]
+ %tmp9 = add nsw i64 %tmp8, -1
+ %tmp10 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+ %tmp11 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+ %tmp12 = mul nsw i64 %tmp11, %tmp10
+ %tmp13 = icmp eq i64 %tmp12, 0
+ br i1 %tmp13, label %bb5, label %bb14
+
+bb14: ; preds = %bb7
+ br label %bb15
+
+bb15: ; preds = %bb15, %bb14
+ %tmp16 = phi i64 [ %tmp24, %bb15 ], [ %tmp11, %bb14 ]
+ %tmp17 = phi i64 [ %tmp22, %bb15 ], [ %tmp10, %bb14 ]
+ %tmp18 = phi i64 [ %tmp20, %bb15 ], [ 0, %bb14 ]
+;; This multiply is an op of phis which is really equivalent to phi(tmp25, tmp12)
+ %tmp19 = mul nsw i64 %tmp16, %tmp17
+ store i64 %tmp19, i64* %tmp, align 8
+ %tmp20 = add nuw nsw i64 %tmp18, 1
+ %tmp21 = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 %tmp20
+ %tmp22 = load i64, i64* %tmp21, align 8
+ %tmp23 = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 %tmp20
+ %tmp24 = load i64, i64* %tmp23, align 8
+ %tmp25 = mul nsw i64 %tmp24, %tmp22
+ %tmp26 = icmp eq i64 %tmp20, %tmp25
+ br i1 %tmp26, label %bb4, label %bb15
+
+bb27: ; preds = %bb5
+ br label %bb28
+
+bb28: ; preds = %bb27, %bb
+ ret i64 0
+}
+
+;; These icmps are all equivalent to phis of constants
+define i8 @test6(i8* %addr) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: entry-block:
+; CHECK-NEXT: br label %main-loop
+; CHECK: main-loop:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ]
+; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ]
+; CHECK-NEXT: store volatile i8 0, i8* [[ADDR:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label %busy-wait-phi-0, label [[EXIT:%.*]]
+; CHECK: busy-wait-phi-0:
+; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0
+; CHECK-NEXT: br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]]
+; CHECK: core:
+; CHECK-NEXT: br i1 [[TMP1]], label [[TRAP:%.*]], label %main-loop
+; CHECK: trap:
+; CHECK-NEXT: ret i8 1
+; CHECK: exit:
+; CHECK-NEXT: ret i8 0
+;
+entry-block:
+ br label %main-loop
+
+main-loop:
+ %phi = phi i8 [ 0, %entry-block ], [ 1, %core ]
+ %switch_0 = icmp eq i8 %phi, 0
+ store volatile i8 0, i8* %addr
+ br i1 %switch_0, label %busy-wait-phi-0, label %exit
+
+busy-wait-phi-0:
+ %load = load volatile i8, i8* %addr
+ %icmp = icmp eq i8 %load, 0
+ br i1 %icmp, label %busy-wait-phi-0, label %core
+
+core:
+ %switch_1 = icmp eq i8 %phi, 1
+ br i1 %switch_1, label %trap, label %main-loop
+
+trap:
+ ret i8 1
+
+exit:
+ ret i8 0
+}
+
+; Test that we don't infinite loop simplifying
+; an undefined value that can go both ways.
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB1]]
+;
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i32 [ undef, %bb ], [ %tmp3, %bb1 ]
+ %tmp2 = icmp eq i32 %tmp, 0
+ %tmp3 = select i1 %tmp2, i32 1, i32 %tmp
+ br label %bb1
+}
+
+
+
+; Test that we get a consistent answer about what the
+; value of this undefined select is.
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB1]]
+;
+bb:
+ %tmp = select i1 undef, i8 0, i8 1
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp2 = phi i8 [ %tmp4, %bb1 ], [ %tmp, %bb ]
+ %tmp3 = icmp eq i8 %tmp2, 0
+ %tmp4 = select i1 %tmp3, i8 1, i8 %tmp2
+ br label %bb1
+}
+
+
+;; Make sure we handle the case where we later come up with an expression that we need
+;; for a phi of ops.
+define void @test9() {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB6:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ]
+; CHECK-NEXT: [[TMP8]] = add nuw nsw i32 [[TMP7]], 1
+; CHECK-NEXT: [[TMP11]] = add i32 -14, [[TMP8]]
+; CHECK-NEXT: br label [[BB6]]
+;
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ br i1 undef, label %bb1, label %bb2
+
+bb2: ; preds = %bb1
+ %tmp = select i1 true, i32 -14, i32 -10
+ %tmp3 = add i32 %tmp, 0
+ %tmp4 = select i1 true, i32 -14, i32 -10
+ %tmp5 = add i32 %tmp4, 0
+ br label %bb6
+
+bb6: ; preds = %bb6, %bb2
+ %tmp7 = phi i32 [ 1, %bb2 ], [ %tmp13, %bb6 ]
+ %tmp8 = add nuw nsw i32 %tmp7, 1
+ %tmp9 = add i32 %tmp3, %tmp7
+ %tmp10 = select i1 false, i32 undef, i32 %tmp9
+ %tmp11 = add i32 %tmp5, %tmp8
+ %tmp12 = select i1 undef, i32 undef, i32 %tmp11
+ %tmp13 = add nuw nsw i32 %tmp7, 1
+ br label %bb6
+}
+
+;; Ensure that we revisit predicateinfo operands at the right points in time.
+define void @test10() {
+b:
+ %m = getelementptr i32, i32* null, i64 8
+ br label %g
+
+g: ; preds = %i, %b
+ %n = phi i32* [ %h, %i ], [ null, %b ]
+ %h = getelementptr i32, i32* %n, i64 1
+ %j = icmp eq i32* %h, %m
+ br i1 %j, label %c, label %i
+
+i: ; preds = %g
+ br i1 undef, label %k, label %g
+
+k: ; preds = %i
+ %l = icmp eq i32* %n, %m
+ br i1 %l, label %c, label %o
+
+o: ; preds = %k
+ br label %c
+
+c: ; preds = %o, %k, %g
+ %0 = phi i32* [ undef, %o ], [ %m, %k ], [ %m, %g ]
+ ret void
+}