#include "llvm/Pass.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
// Turn on use of LazyValueInfo.
static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+EnableLVI("enable-jump-threading-lvi",
+ cl::desc("Use LVI for jump threading"),
+ cl::init(true),
+ cl::ReallyHidden);
#else
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(&ID) {}
+ JumpThreading() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
- BasicBlock *PredBB);
+ const SmallVectorImpl<BasicBlock *> &PredBBs);
typedef SmallVectorImpl<std::pair<ConstantInt*,
BasicBlock*> > PredValueInfo;
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
- bool ProcessJumpOnPHI(PHINode *PN);
+ bool ProcessBranchOnPHI(PHINode *PN);
+ bool ProcessBranchOnXOR(BinaryOperator *BO);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
}
char JumpThreading::ID = 0;
-static RegisterPass<JumpThreading>
-X("jump-threading", "Jump Threading");
+INITIALIZE_PASS(JumpThreading, "jump-threading",
+ "Jump Threading", false, false);
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
FindLoopHeaders(F);
- bool AnotherIteration = true, EverChanged = false;
- while (AnotherIteration) {
- AnotherIteration = false;
- bool Changed = false;
+ bool Changed, EverChanged = false;
+ do {
+ Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
// Thread all of the branches we can over this block.
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
+ if (LVI) LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
BasicBlock *Succ = BI->getSuccessor(0);
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ if (LVI) LVI->eraseBlock(BB);
if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
Changed = true;
// If we deleted BB and BB was the header of a loop, then the
}
}
}
- AnotherIteration = Changed;
EverChanged |= Changed;
- }
+ } while (Changed);
LoopHeaders.clear();
return EverChanged;
if (isa<DbgInfoIntrinsic>(I)) continue;
// If this is a pointer->pointer bitcast, it is free.
- if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
+ if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
// All other instructions count for at least one unit.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
if (!isa<IntrinsicInst>(CI))
Size += 3;
- else if (!isa<VectorType>(CI->getType()))
+ else if (!CI->getType()->isVectorTy())
Size += 1;
}
}
///
bool JumpThreading::
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
// If V is a constantint, then it is known in all predecessors.
if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
ConstantInt *CI = dyn_cast<ConstantInt>(V);
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(CI, *PI));
+
+ RecursionSet.erase(std::make_pair(V, BB));
return true;
}
// Perhaps getConstantOnEdge should be smart enough to do this?
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
if (PredCst == 0 ||
(!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
continue;
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
}
+ RecursionSet.erase(std::make_pair(V, BB));
return !Result.empty();
}
+ RecursionSet.erase(std::make_pair(V, BB));
return false;
}
if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ } else if (LVI) {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ // LVI returns null is no value could be determined.
+ if (!CI) continue;
+ if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI))
+ Result.push_back(std::make_pair(CInt, PN->getIncomingBlock(i)));
+ else if (isa<UndefValue>(CI))
+ Result.push_back(std::make_pair((ConstantInt*)0,
+ PN->getIncomingBlock(i)));
}
}
+
+ RecursionSet.erase(std::make_pair(V, BB));
return !Result.empty();
}
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
- if (LHSVals.empty() && RHSVals.empty())
+ if (LHSVals.empty() && RHSVals.empty()) {
+ RecursionSet.erase(std::make_pair(V, BB));
return false;
+ }
ConstantInt *InterestingVal;
if (I->getOpcode() == Instruction::Or)
else
InterestingVal = ConstantInt::getFalse(I->getContext());
- // Scan for the sentinel.
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
+ // Scan for the sentinel. If we find an undef, force it to the
+ // interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+ if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
Result.push_back(LHSVals[i]);
+ Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
+ }
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
- Result.push_back(RHSVals[i]);
+ if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
+ // If we already inferred a value for this block on the LHS, don't
+ // re-add it.
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
+ Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
+ }
+
+ RecursionSet.erase(std::make_pair(V, BB));
return !Result.empty();
}
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
- if (Result.empty())
+ if (Result.empty()) {
+ RecursionSet.erase(std::make_pair(V, BB));
return false;
+ }
// Invert the known values.
for (unsigned i = 0, e = Result.size(); i != e; ++i)
if (Result[i].first)
Result[i].first =
cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+
+ RecursionSet.erase(std::make_pair(V, BB));
return true;
}
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1));
+ if (CI) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *Folded = 0;
+ if (LHSVals[i].first == 0) {
+ Folded = ConstantExpr::get(BO->getOpcode(),
+ UndefValue::get(BO->getType()),
+ CI);
+ } else {
+ Folded = ConstantExpr::get(BO->getOpcode(), LHSVals[i].first, CI);
+ }
+
+ if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Folded))
+ Result.push_back(std::make_pair(FoldedCInt, LHSVals[i].second));
+ else if (isa<UndefValue>(Folded))
+ Result.push_back(std::make_pair((ConstantInt*)0, LHSVals[i].second));
+ }
+ }
+
+ RecursionSet.erase(std::make_pair(V, BB));
+ return !Result.empty();
}
// Handle compare with phi operand, where the PHI is defined in this block.
Result.push_back(std::make_pair(CI, PredBB));
}
+ RecursionSet.erase(std::make_pair(V, BB));
return !Result.empty();
}
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isInteger() && // Not vector compare.
- (!isa<Instruction>(Cmp->getOperand(0)) ||
- cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
- Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- LazyValueInfo::Tristate
- Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, *PI, BB);
- if (Res == LazyValueInfo::Unknown)
- continue;
+ Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
- Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ }
+
+ RecursionSet.erase(std::make_pair(V, BB));
+ return !Result.empty();
}
- return !Result.empty();
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant * Folded = 0;
+ if (LHSVals[i].first == 0)
+ Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ UndefValue::get(CmpConst->getType()), CmpConst);
+ else
+ Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ LHSVals[i].first, CmpConst);
+
+ if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Folded))
+ Result.push_back(std::make_pair(FoldedCInt, LHSVals[i].second));
+ else if (isa<UndefValue>(Folded))
+ Result.push_back(std::make_pair((ConstantInt*)0,LHSVals[i].second));
+ }
+
+ RecursionSet.erase(std::make_pair(V, BB));
+ return !Result.empty();
+ }
+ }
+ }
+
+ if (LVI) {
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+ if (CInt) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CInt, *PI));
}
+
+ RecursionSet.erase(std::make_pair(V, BB));
+ return !Result.empty();
}
+
+ RecursionSet.erase(std::make_pair(V, BB));
return false;
}
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+ // If the block is trivially dead, just return and let the caller nuke it.
+ // This simplifies other transformations.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock())
+ return false;
+
// If this block has a single predecessor, and if that pred has a single
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ if (LVI) LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
if (PBI->isConditional() && PBI->getCondition() == Condition &&
- ProcessBranchOnDuplicateCond(*PI, BB))
+ ProcessBranchOnDuplicateCond(P, BB))
return true;
+ }
} else {
assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
- for (; PI != E; ++PI)
- if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator()))
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
if (PSI->getCondition() == Condition &&
- ProcessSwitchOnDuplicateCond(*PI, BB))
+ ProcessSwitchOnDuplicateCond(P, BB))
return true;
+ }
}
}
}
- // See if this is a phi node in the current block.
- if (PHINode *PN = dyn_cast<PHINode>(CondInst))
- if (PN->getParent() == BB)
- return ProcessJumpOnPHI(PN);
-
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
if (!LVI &&
(!isa<PHINode>(CondCmp->getOperand(0)) ||
// If we have a comparison, loop over the predecessors to see if there is
// a condition with a lexically identical value.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI->isConditional() && *PI != BB) {
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
+ if (PBI->isConditional() && P != BB) {
if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
if (CI->getOperand(0) == CondCmp->getOperand(0) &&
CI->getOperand(1) == CondCmp->getOperand(1) &&
CI->getPredicate() == CondCmp->getPredicate()) {
// TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(*PI, BB))
+ if (ProcessBranchOnDuplicateCond(P, BB))
return true;
}
}
}
+ }
+ }
+
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ if (LVI && CondBr && CondConst && CondBr->isConditional() &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ unsigned Trues = 0, Falses = 0, predcount = 0;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);PI != PE; ++PI){
+ ++predcount;
+ LazyValueInfo::Tristate Ret =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+ CondCmp->getOperand(0), CondConst, *PI, BB);
+ if (Ret == LazyValueInfo::True)
+ ++Trues;
+ else if (Ret == LazyValueInfo::False)
+ ++Falses;
+ }
+
+ // If we can determine the branch direction statically, convert
+ // the conditional branch to an unconditional one.
+ if (Trues && Trues == predcount) {
+ RemovePredecessorAndSimplify(CondBr->getSuccessor(1), BB, TD);
+ BranchInst::Create(CondBr->getSuccessor(0), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ } else if (Falses && Falses == predcount) {
+ RemovePredecessorAndSimplify(CondBr->getSuccessor(0), BB, TD);
+ BranchInst::Create(CondBr->getSuccessor(1), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
}
}
// we see one, check to see if it's partially redundant. If so, insert a PHI
// which can then be used to thread the values.
//
- // This is particularly important because reg2mem inserts loads and stores all
- // over the place, and this blocks jump threading if we don't zap them.
Value *SimplifyValue = CondInst;
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
if (ProcessThreadableEdges(CondInst, BB))
return true;
+ // If this is an otherwise-unfoldable branch on a phi node in the current
+ // block, see if we can simplify.
+ if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+ if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnPHI(PN);
+
+
+ // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+ if (CondInst->getOpcode() == Instruction::Xor &&
+ CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+ return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+
// TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)" thread through this block.
+ // "(X == 4)", thread through this block.
return false;
}
Value *OldCond = DestBI->getCondition();
DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
BranchDir));
- ConstantFoldTerminator(BB);
+ // Delete dead instructions before we fold the branch. Folding the branch
+ // can eliminate edges from the CFG which can end up deleting OldCond.
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ ConstantFoldTerminator(BB);
return true;
}
// Add all the unavailable predecessors to the PredsToSplit list.
for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI)
- if (!AvailablePredSet.count(*PI))
- PredsToSplit.push_back(*PI);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // If the predecessor is an indirect goto, we can't split the edge.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return false;
+
+ if (!AvailablePredSet.count(P))
+ PredsToSplit.push_back(P);
+ }
// Split them out to their own block.
UnavailablePred =
// have multiple entries here.
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
++PI) {
+ BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(*PI, (Value*)0));
+ std::make_pair(P, (Value*)0));
- assert(I != AvailablePreds.end() && I->first == *PI &&
+ assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
PN->addIncoming(I->second, I->first);
return false;
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) {
return false;
+ }
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
return ThreadEdge(BB, PredsToFactor, MostPopularDest);
}
-/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
-/// the current block. See if there are any simplifications we can do based on
-/// inputs to the phi node.
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block. See if there are any simplifications we
+/// can do based on inputs to the phi node.
///
-bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
- // If any of the predecessor blocks end in an unconditional branch, we can
- // *duplicate* the jump into that block in order to further encourage jump
- // threading and to eliminate cases where we have branch on a phi of an icmp
- // (branch on icmp is much better).
-
- // We don't want to do this tranformation for switches, because we don't
- // really want to duplicate a switch.
- if (isa<SwitchInst>(BB->getTerminator()))
- return false;
+ // TODO: We could make use of this to do it once for blocks with common PHI
+ // values.
+ SmallVector<BasicBlock*, 1> PredBBs;
+ PredBBs.resize(1);
- // Look for unconditional branch predecessors.
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the conditional branch into that block in order to further
+ // encourage jump threading and to eliminate cases where we have branch on a
+ // phi of an icmp (branch on icmp is much better).
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
- if (PredBr->isUnconditional() &&
- // Try to duplicate BB into PredBB.
- DuplicateCondBranchOnPHIIntoPred(BB, PredBB))
- return true;
+ if (PredBr->isUnconditional()) {
+ PredBBs[0] = PredBB;
+ // Try to duplicate BB into PredBB.
+ if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+ return true;
+ }
}
return false;
}
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block. See if there are any
+/// simplifications we can do based on inputs to the xor.
+///
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+ BasicBlock *BB = BO->getParent();
+
+ // If either the LHS or RHS of the xor is a constant, don't do this
+ // optimization.
+ if (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1)))
+ return false;
+
+ // If the first instruction in BB isn't a phi, we won't be able to infer
+ // anything special about any particular predecessor.
+ if (!isa<PHINode>(BB->front()))
+ return false;
+
+ // If we have a xor as the branch input to this block, and we know that the
+ // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+ // the condition into the predecessor and fix that value to true, saving some
+ // logical ops on that path and encouraging other paths to simplify.
+ //
+ // This copies something like this:
+ //
+ // BB:
+ // %X = phi i1 [1], [%X']
+ // %Y = icmp eq i32 %A, %B
+ // %Z = xor i1 %X, %Y
+ // br i1 %Z, ...
+ //
+ // Into:
+ // BB':
+ // %Y = icmp ne i32 %A, %B
+ // br i1 %Z, ...
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+ bool isLHS = true;
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+ assert(XorOpValues.empty());
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+ return false;
+ isLHS = false;
+ }
+
+ assert(!XorOpValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ // Scan the information to see which is most popular: true or false. The
+ // predecessors can be of the set true, false, or undef.
+ unsigned NumTrue = 0, NumFalse = 0;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
+ if (XorOpValues[i].first->isZero())
+ ++NumFalse;
+ else
+ ++NumTrue;
+ }
+
+ // Determine which value to split on, true, false, or undef if neither.
+ ConstantInt *SplitVal = 0;
+ if (NumTrue > NumFalse)
+ SplitVal = ConstantInt::getTrue(BB->getContext());
+ else if (NumTrue != 0 || NumFalse != 0)
+ SplitVal = ConstantInt::getFalse(BB->getContext());
+
+ // Collect all of the blocks that this can be folded into so that we can
+ // factor this once and clone it once.
+ SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+ for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+ if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+
+ BlocksToFoldInto.push_back(XorOpValues[i].second);
+ }
+
+ // If we inferred a value for all of the predecessors, then duplication won't
+ // help us. However, we can just replace the LHS or RHS with the constant.
+ if (BlocksToFoldInto.size() ==
+ cast<PHINode>(BB->front()).getNumIncomingValues()) {
+ if (SplitVal == 0) {
+ // If all preds provide undef, just nuke the xor, because it is undef too.
+ BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+ BO->eraseFromParent();
+ } else if (SplitVal->isZero()) {
+ // If all preds provide 0, replace the xor with the other input.
+ BO->replaceAllUsesWith(BO->getOperand(isLHS));
+ BO->eraseFromParent();
+ } else {
+ // If all preds provide 1, set the computed value to 1.
+ BO->setOperand(!isLHS, SplitVal);
+ }
+
+ return true;
+ }
+
+ // Try to duplicate BB into PredBB.
+ return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
<< ", across block:\n "
<< *BB << "\n");
+ if (LVI)
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- BI = NewBB->begin();
- for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
- Instruction *Inst = BI++;
-
- if (Value *V = SimplifyInstruction(Inst, TD)) {
- WeakVH BIHandle(BI);
- ReplaceAndSimplifyAllUses(Inst, V, TD);
- if (BIHandle == 0)
- BI = NewBB->begin();
- continue;
- }
-
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
- }
+ SimplifyInstructionsInBlock(NewBB, TD);
// Threaded an edge!
++NumThreads;
/// improves the odds that the branch will be on an analyzable instruction like
/// a compare.
bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
- BasicBlock *PredBB) {
+ const SmallVectorImpl<BasicBlock *> &PredBBs) {
+ assert(!PredBBs.empty() && "Can't handle an empty set");
+
// If BB is a loop header, then duplicating this block outside the loop would
// cause us to transform this into an irreducible loop, don't do this.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
- << "' into predecessor block '" << PredBB->getName()
+ << "' into predecessor block '" << PredBBs[0]->getName()
<< "' - it might create an irreducible loop!\n");
return false;
}
return false;
}
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(dbgs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
// Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB.
DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n");
+ // Unless PredBB ends with an unconditional branch, split the edge so that we
+ // can just clone the bits from BB into the end of the new PredBB.
+ BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+
+ if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ PredBB = SplitEdge(PredBB, BB, this);
+ OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+ }
+
// We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB.
DenseMap<Instruction*, Value*> ValueMapping;
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
- BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
-
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
Instruction *New = BI->clone();
- New->setName(BI->getName());
- PredBB->getInstList().insert(OldPredBranch, New);
- ValueMapping[BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
+
+ // If this instruction can be simplified after the operands are updated,
+ // just use the simplified value instead. This frequently happens due to
+ // phi translation.
+ if (Value *IV = SimplifyInstruction(New, TD)) {
+ delete New;
+ ValueMapping[BI] = IV;
+ } else {
+ // Otherwise, insert the new instruction into the block.
+ New->setName(BI->getName());
+ PredBB->getInstList().insert(OldPredBranch, New);
+ ValueMapping[BI] = New;
+ }
}
// Check to see if the targets of the branch had PHI nodes. If so, we need to