X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FJumpThreading.cpp;h=afc1661bf8eb0d1b272da6f0b55cd109e9100765;hb=cb21190cbd8ae1aece5b833ebe3814ada4260627;hp=fadbec58d7af8a99c392e80d479e22f0bffa5657;hpb=fe7fe6603fea2ca121282cd329f5300ea7d9fd51;p=android-x86%2Fexternal-llvm.git diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index fadbec58d7a..afc1661bf8e 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -18,11 +18,13 @@ #include "llvm/Pass.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -44,7 +46,10 @@ Threshold("jump-threading-threshold", // Turn on use of LazyValueInfo. static cl::opt -EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden); +EnableLVI("enable-jump-threading-lvi", + cl::desc("Use LVI for jump threading"), + cl::init(true), + cl::ReallyHidden); @@ -73,9 +78,10 @@ namespace { #else SmallSet, 16> LoopHeaders; #endif + DenseSet > RecursionSet; public: static char ID; // Pass identification - JumpThreading() : FunctionPass(&ID) {} + JumpThreading() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -89,7 +95,7 @@ namespace { bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl &PredBBs, BasicBlock *SuccBB); bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, - BasicBlock *PredBB); + const SmallVectorImpl &PredBBs); typedef SmallVectorImpl > PredValueInfo; @@ -102,15 +108,16 @@ namespace { bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); - bool ProcessJumpOnPHI(PHINode *PN); + bool ProcessBranchOnPHI(PHINode *PN); + bool ProcessBranchOnXOR(BinaryOperator *BO); bool SimplifyPartiallyRedundantLoad(LoadInst *LI); }; } char JumpThreading::ID = 0; -static RegisterPass -X("jump-threading", "Jump Threading"); +INITIALIZE_PASS(JumpThreading, "jump-threading", + "Jump Threading", false, false); // Public interface to the Jump Threading pass FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } @@ -124,10 +131,9 @@ bool JumpThreading::runOnFunction(Function &F) { FindLoopHeaders(F); - bool AnotherIteration = true, EverChanged = false; - while (AnotherIteration) { - AnotherIteration = false; - bool Changed = false; + bool Changed, EverChanged = false; + do { + Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { BasicBlock *BB = I; // Thread all of the branches we can over this block. @@ -143,6 +149,7 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); + if (LVI) LVI->eraseBlock(BB); DeleteDeadBlock(BB); Changed = true; } else if (BranchInst *BI = dyn_cast(BB->getTerminator())) { @@ -163,6 +170,11 @@ bool JumpThreading::runOnFunction(Function &F) { bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); BasicBlock *Succ = BI->getSuccessor(0); + // FIXME: It is always conservatively correct to drop the info + // for a block even if it doesn't get erased. This isn't totally + // awesome, but it allows us to use AssertingVH to prevent nasty + // dangling pointer issues within LazyValueInfo. + if (LVI) LVI->eraseBlock(BB); if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { Changed = true; // If we deleted BB and BB was the header of a loop, then the @@ -176,9 +188,8 @@ bool JumpThreading::runOnFunction(Function &F) { } } } - AnotherIteration = Changed; EverChanged |= Changed; - } + } while (Changed); LoopHeaders.clear(); return EverChanged; @@ -202,7 +213,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { if (isa(I)) continue; // If this is a pointer->pointer bitcast, it is free. - if (isa(I) && isa(I->getType())) + if (isa(I) && I->getType()->isPointerTy()) continue; // All other instructions count for at least one unit. @@ -215,7 +226,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { if (const CallInst *CI = dyn_cast(I)) { if (!isa(CI)) Size += 3; - else if (!isa(CI->getType())) + else if (!CI->getType()->isVectorTy()) Size += 1; } } @@ -260,12 +271,17 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// bool JumpThreading:: ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ + if (!RecursionSet.insert(std::make_pair(V, BB)).second) + return false; + // If V is a constantint, then it is known in all predecessors. if (isa(V) || isa(V)) { ConstantInt *CI = dyn_cast(V); for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) Result.push_back(std::make_pair(CI, *PI)); + + RecursionSet.erase(std::make_pair(V, BB)); return true; } @@ -289,19 +305,22 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // Perhaps getConstantOnEdge should be smart enough to do this? for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. - Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB); + Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); if (PredCst == 0 || (!isa(PredCst) && !isa(PredCst))) continue; - Result.push_back(std::make_pair(dyn_cast(PredCst), *PI)); + Result.push_back(std::make_pair(dyn_cast(PredCst), P)); } + RecursionSet.erase(std::make_pair(V, BB)); return !Result.empty(); } + RecursionSet.erase(std::make_pair(V, BB)); return false; } @@ -312,8 +331,20 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ if (isa(InVal) || isa(InVal)) { ConstantInt *CI = dyn_cast(InVal); Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i))); + } else if (LVI) { + Constant *CI = LVI->getConstantOnEdge(InVal, + PN->getIncomingBlock(i), BB); + // LVI returns null is no value could be determined. + if (!CI) continue; + if (ConstantInt *CInt = dyn_cast(CI)) + Result.push_back(std::make_pair(CInt, PN->getIncomingBlock(i))); + else if (isa(CI)) + Result.push_back(std::make_pair((ConstantInt*)0, + PN->getIncomingBlock(i))); } } + + RecursionSet.erase(std::make_pair(V, BB)); return !Result.empty(); } @@ -328,8 +359,10 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals); - if (LHSVals.empty() && RHSVals.empty()) + if (LHSVals.empty() && RHSVals.empty()) { + RecursionSet.erase(std::make_pair(V, BB)); return false; + } ConstantInt *InterestingVal; if (I->getOpcode() == Instruction::Or) @@ -337,13 +370,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ else InterestingVal = ConstantInt::getFalse(I->getContext()); - // Scan for the sentinel. + SmallPtrSet LHSKnownBBs; + + // Scan for the sentinel. If we find an undef, force it to the + // interesting value: x|undef -> true and x&undef -> false. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) - if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) + if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) { Result.push_back(LHSVals[i]); + Result.back().first = InterestingVal; + LHSKnownBBs.insert(LHSVals[i].second); + } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) - if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) - Result.push_back(RHSVals[i]); + if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { + // If we already inferred a value for this block on the LHS, don't + // re-add it. + if (!LHSKnownBBs.count(RHSVals[i].second)) { + Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } + } + + RecursionSet.erase(std::make_pair(V, BB)); return !Result.empty(); } @@ -352,16 +399,48 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ isa(I->getOperand(1)) && cast(I->getOperand(1))->isOne()) { ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result); - if (Result.empty()) + if (Result.empty()) { + RecursionSet.erase(std::make_pair(V, BB)); return false; + } // Invert the known values. for (unsigned i = 0, e = Result.size(); i != e; ++i) if (Result[i].first) Result[i].first = cast(ConstantExpr::getNot(Result[i].first)); + + RecursionSet.erase(std::make_pair(V, BB)); return true; } + + // Try to simplify some other binary operator values. + } else if (BinaryOperator *BO = dyn_cast(I)) { + ConstantInt *CI = dyn_cast(BO->getOperand(1)); + if (CI) { + SmallVector, 8> LHSVals; + ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals); + + // Try to use constant folding to simplify the binary operator. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *Folded = 0; + if (LHSVals[i].first == 0) { + Folded = ConstantExpr::get(BO->getOpcode(), + UndefValue::get(BO->getType()), + CI); + } else { + Folded = ConstantExpr::get(BO->getOpcode(), LHSVals[i].first, CI); + } + + if (ConstantInt *FoldedCInt = dyn_cast(Folded)) + Result.push_back(std::make_pair(FoldedCInt, LHSVals[i].second)); + else if (isa(Folded)) + Result.push_back(std::make_pair((ConstantInt*)0, LHSVals[i].second)); + } + } + + RecursionSet.erase(std::make_pair(V, BB)); + return !Result.empty(); } // Handle compare with phi operand, where the PHI is defined in this block. @@ -394,6 +473,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ Result.push_back(std::make_pair(CI, PredBB)); } + RecursionSet.erase(std::make_pair(V, BB)); return !Result.empty(); } @@ -401,27 +481,70 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. if (LVI && isa(Cmp->getOperand(1)) && - Cmp->getType()->isInteger() && // Not vector compare. - (!isa(Cmp->getOperand(0)) || - cast(Cmp->getOperand(0))->getParent() != BB)) { - Constant *RHSCst = cast(Cmp->getOperand(1)); - - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - // If the value is known by LazyValueInfo to be a constant in a - // predecessor, use that information to try to thread this block. - LazyValueInfo::Tristate - Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, *PI, BB); - if (Res == LazyValueInfo::Unknown) - continue; + Cmp->getType()->isIntegerTy()) { + if (!isa(Cmp->getOperand(0)) || + cast(Cmp->getOperand(0))->getParent() != BB) { + Constant *RHSCst = cast(Cmp->getOperand(1)); + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ + BasicBlock *P = *PI; + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + LazyValueInfo::Tristate Res = + LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + RHSCst, P, BB); + if (Res == LazyValueInfo::Unknown) + continue; - Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast(ResC), *PI)); + Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Result.push_back(std::make_pair(cast(ResC), P)); + } + + RecursionSet.erase(std::make_pair(V, BB)); + return !Result.empty(); } - return !Result.empty(); + // Try to find a constant value for the LHS of a comparison, + // and evaluate it statically if we can. + if (Constant *CmpConst = dyn_cast(Cmp->getOperand(1))) { + SmallVector, 8> LHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); + + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant * Folded = 0; + if (LHSVals[i].first == 0) + Folded = ConstantExpr::getCompare(Cmp->getPredicate(), + UndefValue::get(CmpConst->getType()), CmpConst); + else + Folded = ConstantExpr::getCompare(Cmp->getPredicate(), + LHSVals[i].first, CmpConst); + + if (ConstantInt *FoldedCInt = dyn_cast(Folded)) + Result.push_back(std::make_pair(FoldedCInt, LHSVals[i].second)); + else if (isa(Folded)) + Result.push_back(std::make_pair((ConstantInt*)0,LHSVals[i].second)); + } + + RecursionSet.erase(std::make_pair(V, BB)); + return !Result.empty(); + } + } + } + + if (LVI) { + // If all else fails, see if LVI can figure out a constant value for us. + Constant *CI = LVI->getConstant(V, BB); + ConstantInt *CInt = dyn_cast_or_null(CI); + if (CInt) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(CInt, *PI)); } + + RecursionSet.erase(std::make_pair(V, BB)); + return !Result.empty(); } + + RecursionSet.erase(std::make_pair(V, BB)); return false; } @@ -452,6 +575,12 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { /// ProcessBlock - If there are any predecessors whose control can be threaded /// through to a successor, transform them now. bool JumpThreading::ProcessBlock(BasicBlock *BB) { + // If the block is trivially dead, just return and let the caller nuke it. + // This simplifies other transformations. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) + return false; + // If this block has a single predecessor, and if that pred has a single // successor, merge the blocks. This encourages recursive jump threading // because now the condition in this block can be threaded through @@ -466,6 +595,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + if (LVI) LVI->eraseBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); if (isEntry && BB != &BB->getParent()->getEntryBlock()) @@ -528,18 +658,22 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. pred_iterator PI = pred_begin(BB), E = pred_end(BB); if (isa(BB->getTerminator())) { - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast(P->getTerminator())) if (PBI->isConditional() && PBI->getCondition() == Condition && - ProcessBranchOnDuplicateCond(*PI, BB)) + ProcessBranchOnDuplicateCond(P, BB)) return true; + } } else { assert(isa(BB->getTerminator()) && "Unknown jump terminator"); - for (; PI != E; ++PI) - if (SwitchInst *PSI = dyn_cast((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (SwitchInst *PSI = dyn_cast(P->getTerminator())) if (PSI->getCondition() == Condition && - ProcessSwitchOnDuplicateCond(*PI, BB)) + ProcessSwitchOnDuplicateCond(P, BB)) return true; + } } } @@ -552,11 +686,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } - // See if this is a phi node in the current block. - if (PHINode *PN = dyn_cast(CondInst)) - if (PN->getParent() == BB) - return ProcessJumpOnPHI(PN); - if (CmpInst *CondCmp = dyn_cast(CondInst)) { if (!LVI && (!isa(CondCmp->getOperand(0)) || @@ -564,19 +693,60 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // If we have a comparison, loop over the predecessors to see if there is // a condition with a lexically identical value. pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) - if (PBI->isConditional() && *PI != BB) { + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast(P->getTerminator())) + if (PBI->isConditional() && P != BB) { if (CmpInst *CI = dyn_cast(PBI->getCondition())) { if (CI->getOperand(0) == CondCmp->getOperand(0) && CI->getOperand(1) == CondCmp->getOperand(1) && CI->getPredicate() == CondCmp->getPredicate()) { // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(*PI, BB)) + if (ProcessBranchOnDuplicateCond(P, BB)) return true; } } } + } + } + + // For a comparison where the LHS is outside this block, it's possible + // that we've branched on it before. Used LVI to see if we can simplify + // the branch based on that. + BranchInst *CondBr = dyn_cast(BB->getTerminator()); + Constant *CondConst = dyn_cast(CondCmp->getOperand(1)); + if (LVI && CondBr && CondConst && CondBr->isConditional() && + (!isa(CondCmp->getOperand(0)) || + cast(CondCmp->getOperand(0))->getParent() != BB)) { + // For predecessor edge, determine if the comparison is true or false + // on that edge. If they're all true or all false, we can simplify the + // branch. + // FIXME: We could handle mixed true/false by duplicating code. + unsigned Trues = 0, Falses = 0, predcount = 0; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);PI != PE; ++PI){ + ++predcount; + LazyValueInfo::Tristate Ret = + LVI->getPredicateOnEdge(CondCmp->getPredicate(), + CondCmp->getOperand(0), CondConst, *PI, BB); + if (Ret == LazyValueInfo::True) + ++Trues; + else if (Ret == LazyValueInfo::False) + ++Falses; + } + + // If we can determine the branch direction statically, convert + // the conditional branch to an unconditional one. + if (Trues && Trues == predcount) { + RemovePredecessorAndSimplify(CondBr->getSuccessor(1), BB, TD); + BranchInst::Create(CondBr->getSuccessor(0), CondBr); + CondBr->eraseFromParent(); + return true; + } else if (Falses && Falses == predcount) { + RemovePredecessorAndSimplify(CondBr->getSuccessor(0), BB, TD); + BranchInst::Create(CondBr->getSuccessor(1), CondBr); + CondBr->eraseFromParent(); + return true; + } } } @@ -585,8 +755,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // we see one, check to see if it's partially redundant. If so, insert a PHI // which can then be used to thread the values. // - // This is particularly important because reg2mem inserts loads and stores all - // over the place, and this blocks jump threading if we don't zap them. Value *SimplifyValue = CondInst; if (CmpInst *CondCmp = dyn_cast(SimplifyValue)) if (isa(CondCmp->getOperand(1))) @@ -606,9 +774,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { if (ProcessThreadableEdges(CondInst, BB)) return true; + // If this is an otherwise-unfoldable branch on a phi node in the current + // block, see if we can simplify. + if (PHINode *PN = dyn_cast(CondInst)) + if (PN->getParent() == BB && isa(BB->getTerminator())) + return ProcessBranchOnPHI(PN); + + + // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. + if (CondInst->getOpcode() == Instruction::Xor && + CondInst->getParent() == BB && isa(BB->getTerminator())) + return ProcessBranchOnXOR(cast(CondInst)); + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know - // "(X == 4)" thread through this block. + // "(X == 4)", thread through this block. return false; } @@ -655,8 +835,10 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, Value *OldCond = DestBI->getCondition(); DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), BranchDir)); - ConstantFoldTerminator(BB); + // Delete dead instructions before we fold the branch. Folding the branch + // can eliminate edges from the CFG which can end up deleting OldCond. RecursivelyDeleteTriviallyDeadInstructions(OldCond); + ConstantFoldTerminator(BB); return true; } @@ -852,9 +1034,15 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Add all the unavailable predecessors to the PredsToSplit list. for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) - if (!AvailablePredSet.count(*PI)) - PredsToSplit.push_back(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + // If the predecessor is an indirect goto, we can't split the edge. + if (isa(P->getTerminator())) + return false; + + if (!AvailablePredSet.count(P)) + PredsToSplit.push_back(P); + } // Split them out to their own block. UnavailablePred = @@ -886,11 +1074,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // have multiple entries here. for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { + BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(*PI, (Value*)0)); + std::make_pair(P, (Value*)0)); - assert(I != AvailablePreds.end() && I->first == *PI && + assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); PN->addIncoming(I->second, I->first); @@ -974,8 +1163,9 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { return false; SmallVector, 8> PredValues; - if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) + if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) { return false; + } assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); @@ -1070,36 +1260,135 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { return ThreadEdge(BB, PredsToFactor, MostPopularDest); } -/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in -/// the current block. See if there are any simplifications we can do based on -/// inputs to the phi node. +/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on +/// a PHI node in the current block. See if there are any simplifications we +/// can do based on inputs to the phi node. /// -bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { +bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { BasicBlock *BB = PN->getParent(); - // If any of the predecessor blocks end in an unconditional branch, we can - // *duplicate* the jump into that block in order to further encourage jump - // threading and to eliminate cases where we have branch on a phi of an icmp - // (branch on icmp is much better). - - // We don't want to do this tranformation for switches, because we don't - // really want to duplicate a switch. - if (isa(BB->getTerminator())) - return false; + // TODO: We could make use of this to do it once for blocks with common PHI + // values. + SmallVector PredBBs; + PredBBs.resize(1); - // Look for unconditional branch predecessors. + // If any of the predecessor blocks end in an unconditional branch, we can + // *duplicate* the conditional branch into that block in order to further + // encourage jump threading and to eliminate cases where we have branch on a + // phi of an icmp (branch on icmp is much better). for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = PN->getIncomingBlock(i); if (BranchInst *PredBr = dyn_cast(PredBB->getTerminator())) - if (PredBr->isUnconditional() && - // Try to duplicate BB into PredBB. - DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) - return true; + if (PredBr->isUnconditional()) { + PredBBs[0] = PredBB; + // Try to duplicate BB into PredBB. + if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs)) + return true; + } } return false; } +/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on +/// a xor instruction in the current block. See if there are any +/// simplifications we can do based on inputs to the xor. +/// +bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { + BasicBlock *BB = BO->getParent(); + + // If either the LHS or RHS of the xor is a constant, don't do this + // optimization. + if (isa(BO->getOperand(0)) || + isa(BO->getOperand(1))) + return false; + + // If the first instruction in BB isn't a phi, we won't be able to infer + // anything special about any particular predecessor. + if (!isa(BB->front())) + return false; + + // If we have a xor as the branch input to this block, and we know that the + // LHS or RHS of the xor in any predecessor is true/false, then we can clone + // the condition into the predecessor and fix that value to true, saving some + // logical ops on that path and encouraging other paths to simplify. + // + // This copies something like this: + // + // BB: + // %X = phi i1 [1], [%X'] + // %Y = icmp eq i32 %A, %B + // %Z = xor i1 %X, %Y + // br i1 %Z, ... + // + // Into: + // BB': + // %Y = icmp ne i32 %A, %B + // br i1 %Z, ... + + SmallVector, 8> XorOpValues; + bool isLHS = true; + if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) { + assert(XorOpValues.empty()); + if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues)) + return false; + isLHS = false; + } + + assert(!XorOpValues.empty() && + "ComputeValueKnownInPredecessors returned true with no values"); + + // Scan the information to see which is most popular: true or false. The + // predecessors can be of the set true, false, or undef. + unsigned NumTrue = 0, NumFalse = 0; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (!XorOpValues[i].first) continue; // Ignore undefs for the count. + if (XorOpValues[i].first->isZero()) + ++NumFalse; + else + ++NumTrue; + } + + // Determine which value to split on, true, false, or undef if neither. + ConstantInt *SplitVal = 0; + if (NumTrue > NumFalse) + SplitVal = ConstantInt::getTrue(BB->getContext()); + else if (NumTrue != 0 || NumFalse != 0) + SplitVal = ConstantInt::getFalse(BB->getContext()); + + // Collect all of the blocks that this can be folded into so that we can + // factor this once and clone it once. + SmallVector BlocksToFoldInto; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue; + + BlocksToFoldInto.push_back(XorOpValues[i].second); + } + + // If we inferred a value for all of the predecessors, then duplication won't + // help us. However, we can just replace the LHS or RHS with the constant. + if (BlocksToFoldInto.size() == + cast(BB->front()).getNumIncomingValues()) { + if (SplitVal == 0) { + // If all preds provide undef, just nuke the xor, because it is undef too. + BO->replaceAllUsesWith(UndefValue::get(BO->getType())); + BO->eraseFromParent(); + } else if (SplitVal->isZero()) { + // If all preds provide 0, replace the xor with the other input. + BO->replaceAllUsesWith(BO->getOperand(isLHS)); + BO->eraseFromParent(); + } else { + // If all preds provide 1, set the computed value to 1. + BO->setOperand(!isLHS, SplitVal); + } + + return true; + } + + // Try to duplicate BB into PredBB. + return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); +} + /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for @@ -1171,6 +1460,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, << ", across block:\n " << *BB << "\n"); + if (LVI) + LVI->threadEdge(PredBB, BB, SuccBB); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. @@ -1263,20 +1555,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - BI = NewBB->begin(); - for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { - Instruction *Inst = BI++; - - if (Value *V = SimplifyInstruction(Inst, TD)) { - WeakVH BIHandle(BI); - ReplaceAndSimplifyAllUses(Inst, V, TD); - if (BIHandle == 0) - BI = NewBB->begin(); - continue; - } - - RecursivelyDeleteTriviallyDeadInstructions(Inst); - } + SimplifyInstructionsInBlock(NewBB, TD); // Threaded an edge! ++NumThreads; @@ -1289,13 +1568,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, /// improves the odds that the branch will be on an analyzable instruction like /// a compare. bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, - BasicBlock *PredBB) { + const SmallVectorImpl &PredBBs) { + assert(!PredBBs.empty() && "Can't handle an empty set"); + // If BB is a loop header, then duplicating this block outside the loop would // cause us to transform this into an irreducible loop, don't do this. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() - << "' into predecessor block '" << PredBB->getName() + << "' into predecessor block '" << PredBBs[0]->getName() << "' - it might create an irreducible loop!\n"); return false; } @@ -1307,12 +1588,32 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, return false; } + // And finally, do it! Start by factoring the predecessors is needed. + BasicBlock *PredBB; + if (PredBBs.size() == 1) + PredBB = PredBBs[0]; + else { + DEBUG(dbgs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); + PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), + ".thr_comm", this); + } + // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" << PredBB->getName() << "' to eliminate branch on phi. Cost: " << DuplicationCost << " block is:" << *BB << "\n"); + // Unless PredBB ends with an unconditional branch, split the edge so that we + // can just clone the bits from BB into the end of the new PredBB. + BranchInst *OldPredBranch = dyn_cast(PredBB->getTerminator()); + + if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) { + PredBB = SplitEdge(PredBB, BB, this); + OldPredBranch = cast(PredBB->getTerminator()); + } + // We are going to have to map operands from the original BB block into the // PredBB block. Evaluate PHI nodes in BB. DenseMap ValueMapping; @@ -1321,15 +1622,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, for (; PHINode *PN = dyn_cast(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - BranchInst *OldPredBranch = cast(PredBB->getTerminator()); - // Clone the non-phi instructions of BB into PredBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. for (; BI != BB->end(); ++BI) { Instruction *New = BI->clone(); - New->setName(BI->getName()); - PredBB->getInstList().insert(OldPredBranch, New); - ValueMapping[BI] = New; // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) @@ -1338,6 +1634,19 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (I != ValueMapping.end()) New->setOperand(i, I->second); } + + // If this instruction can be simplified after the operands are updated, + // just use the simplified value instead. This frequently happens due to + // phi translation. + if (Value *IV = SimplifyInstruction(New, TD)) { + delete New; + ValueMapping[BI] = IV; + } else { + // Otherwise, insert the new instruction into the block. + New->setName(BI->getName()); + PredBB->getInstList().insert(OldPredBranch, New); + ValueMapping[BI] = New; + } } // Check to see if the targets of the branch had PHI nodes. If so, we need to