From 4b01fd29c09b135625a55b02c512665a1ebe2d4a Mon Sep 17 00:00:00 2001 From: Jun Bum Lim Date: Fri, 15 Dec 2017 18:12:49 +0000 Subject: [PATCH] Revert "Re-commit : [LICM] Allow sinking when foldable in loop" This reverts commit r320833. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320836 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Utils/LoopUtils.h | 5 +- lib/Transforms/Scalar/LICM.cpp | 107 ++++++--------------- test/Transforms/LICM/sink-foldable.ll | 150 ------------------------------ 3 files changed, 31 insertions(+), 231 deletions(-) delete mode 100644 test/Transforms/LICM/sink-foldable.ll diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 75066613650..90299c5e6a2 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -436,9 +436,8 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, TargetTransformInfo *, Loop *, - AliasSetTracker *, LoopSafetyInfo *, - OptimizationRemarkEmitter *ORE); + TargetLibraryInfo *, Loop *, AliasSetTracker *, + LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index ccb0af2d85a..f610aae2403 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -90,15 +90,14 @@ static cl::opt MaxNumUsesTraversed( "invariance in loop using invariant start (default = 8)")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); -static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop); +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool FreeInLoop); + OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -116,8 +115,7 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, TargetTransformInfo *TTI, - ScalarEvolution *SE, MemorySSA *MSSA, + TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST); DenseMap &getLoopToAliasSetMap() { @@ -161,8 +159,6 @@ struct LegacyLICMPass : public LoopPass { &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), &getAnalysis().getTLI(), - &getAnalysis().getTTI( - *L->getHeader()->getParent()), SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); } @@ -174,7 +170,6 @@ struct LegacyLICMPass : public LoopPass { AU.addRequired(); if (EnableMSSALoopDependency) AU.addRequired(); - AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -215,8 +210,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, "cached at a higher level"); LoopInvariantCodeMotion LICM; - if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, - AR.MSSA, ORE, true)) + if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, AR.MSSA, ORE, + true)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -229,7 +224,6 @@ INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) @@ -242,10 +236,12 @@ Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } /// We should delete AST for inner loops in the new pass manager to avoid /// memory leak. /// -bool LoopInvariantCodeMotion::runOnLoop( - Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, - MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { +bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, + LoopInfo *LI, DominatorTree *DT, + TargetLibraryInfo *TLI, + ScalarEvolution *SE, MemorySSA *MSSA, + OptimizationRemarkEmitter *ORE, + bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -270,7 +266,7 @@ bool LoopInvariantCodeMotion::runOnLoop( // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) - Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, + Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, CurAST, &SafetyInfo, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, @@ -363,8 +359,7 @@ bool LoopInvariantCodeMotion::runOnLoop( /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, TargetLibraryInfo *TLI, - TargetTransformInfo *TTI, Loop *CurLoop, + DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { @@ -405,15 +400,12 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - bool FreeInLoop = false; - if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && + if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { - if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { - if (!FreeInLoop) { - ++II; - CurAST->deleteValue(&I); - I.eraseFromParent(); - } + if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) { + ++II; + CurAST->deleteValue(&I); + I.eraseFromParent(); Changed = true; } } @@ -716,39 +708,13 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { return true; } -/// Return true if the instruction is free in the loop. -static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, - const TargetTransformInfo *TTI) { - if (const GetElementPtrInst *GEP = dyn_cast(&I)) { - if (TTI->getUserCost(&I) != TargetTransformInfo::TCC_Free) - return false; - // For a GEP, we cannot simply use getUserCost because currently it - // optimistically assume that a GEP will fold into addressing mode - // regardless of its users. - const BasicBlock *BB = I.getParent(); - for (const User *U : I.users()) { - const Instruction *UI = cast(U); - if (CurLoop->contains(UI) && - (BB != UI->getParent() || - (!isa(UI) && !isa(UI)))) - return false; - } - return true; - } else - return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; -} - /// Return true if the only users of this instruction are outside of /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// -/// We also return true if the instruction could be folded away in lowering. -/// (e.g., a GEP can be folded into a load as an addressing mode in the loop). -static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop) { +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo) { const auto &BlockColors = SafetyInfo->BlockColors; - bool IsFree = isFreeInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { const Instruction *UI = cast(U); if (const PHINode *PN = dyn_cast(UI)) { @@ -765,13 +731,8 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, return false; } - if (CurLoop->contains(UI)) { - if (IsFree) { - FreeInLoop = true; - continue; - } + if (CurLoop->contains(UI)) return false; - } } return true; } @@ -927,7 +888,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, /// static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool FreeInLoop) { + OptimizationRemarkEmitter *ORE) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) @@ -939,6 +900,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, else if (isa(I)) ++NumMovedCalls; ++NumSunk; + Changed = true; // Iterate over users to be ready for actual sinking. Replace users via // unrechable blocks with undef and make all user PHIs trivially replcable. @@ -948,12 +910,11 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, Use &U = UI.getUse(); ++UI; - if (VisitedUsers.count(User) || CurLoop->contains(User)) + if (VisitedUsers.count(User)) continue; if (!DT->isReachableFromEntry(User->getParent())) { U = UndefValue::get(I.getType()); - Changed = true; continue; } @@ -966,7 +927,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { U = UndefValue::get(I.getType()); - Changed = true; continue; } @@ -975,7 +935,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, continue; if (!canSplitPredecessors(PN)) - return Changed; + return false; // Split predecessors of the PHI so that we can make users trivially // replacable. @@ -987,9 +947,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, UE = I.user_end(); } - if (VisitedUsers.empty()) - return Changed; - #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); @@ -1003,14 +960,9 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. - SmallSetVector Users(I.user_begin(), I.user_end()); - for (auto *UI : Users) { - auto *User = cast(UI); - - if (CurLoop->contains(User)) - continue; - - PHINode *PN = cast(User); + while (!I.use_empty()) { + Value::user_iterator UI = I.user_begin(); + PHINode *PN = cast(*UI); assert(ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"); // The PHI must be trivially replacable. @@ -1018,7 +970,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, SafetyInfo, CurLoop); PN->replaceAllUsesWith(New); PN->eraseFromParent(); - Changed = true; } return Changed; } diff --git a/test/Transforms/LICM/sink-foldable.ll b/test/Transforms/LICM/sink-foldable.ll deleted file mode 100644 index 1d4a99081a1..00000000000 --- a/test/Transforms/LICM/sink-foldable.ll +++ /dev/null @@ -1,150 +0,0 @@ -; REQUIRES: aarch64-registered-target - -; RUN: opt < %s -licm -S | FileCheck %s - -target triple = "aarch64--linux-gnueabi" - -; CHECK-LABEL:@test1 -; CHECK-LABEL:loopexit1: -; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ] -; CHECK: getelementptr inbounds i8*, i8** %[[PHI]], i64 1 -define i8** @test1(i32 %j, i8** readonly %P, i8* readnone %Q) { -entry: - %cmp0 = icmp slt i32 0, %j - br i1 %cmp0, label %for.body.lr.ph, label %return - -for.body.lr.ph: - br label %for.body - -for.body: - %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ] - %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end] - - %i0.ext = sext i32 %i0 to i64 - %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext - %l0 = load i8*, i8** %arrayidx0, align 8 - %cmp1 = icmp ugt i8* %l0, %Q - br i1 %cmp1, label %loopexit0, label %if.end - -if.end: ; preds = %for.body - %arrayidx1 = getelementptr inbounds i8*, i8** %arrayidx0, i64 1 - %l1 = load i8*, i8** %arrayidx1, align 8 - %cmp4 = icmp ugt i8* %l1, %Q - %i.add = add nsw i32 %i0, 2 - br i1 %cmp4, label %loopexit1, label %for.body - -loopexit0: - %p1 = phi i8** [%arrayidx0, %for.body] - br label %return - -loopexit1: - %p2 = phi i8** [%arrayidx1, %if.end] - br label %return - -return: - %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ] - ret i8** %retval.0 -} - -; CHECK-LABEL: @test2 -; CHECK-LABEL: loopexit2: -; CHECK: %[[PHI:.*]] = phi i8** [ %add.ptr, %if.end ] -; CHECK: getelementptr inbounds i8*, i8** %[[PHI]] -define i8** @test2(i32 %j, i8** readonly %P, i8* readnone %Q) { - -entry: - br label %for.body - -for.cond: - %i.addr.0 = phi i32 [ %add, %if.end ] - %P.addr.0 = phi i8** [ %add.ptr, %if.end ] - %cmp = icmp slt i32 %i.addr.0, %j - br i1 %cmp, label %for.body, label %loopexit0 - -for.body: - %P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ] - %i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ] - - %idx.ext = sext i32 %i.addr to i64 - %add.ptr = getelementptr inbounds i8*, i8** %P.addr, i64 %idx.ext - %l0 = load i8*, i8** %add.ptr, align 8 - - %cmp1 = icmp ugt i8* %l0, %Q - br i1 %cmp1, label %loopexit1, label %if.end - -if.end: - %add.i = add i32 %i.addr, 1 - %idx2.ext = sext i32 %add.i to i64 - %arrayidx2 = getelementptr inbounds i8*, i8** %add.ptr, i64 %idx2.ext - %l1 = load i8*, i8** %arrayidx2, align 8 - %cmp2 = icmp ugt i8* %l1, %Q - %add = add nsw i32 %add.i, 1 - br i1 %cmp2, label %loopexit2, label %for.cond - -loopexit0: - %p0 = phi i8** [ null, %for.cond ] - br label %return - -loopexit1: - %p1 = phi i8** [ %add.ptr, %for.body ] - br label %return - -loopexit2: - %p2 = phi i8** [ %arrayidx2, %if.end ] - br label %return - -return: - %retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ] - ret i8** %retval.0 -} - - -; CHECK-LABEL: @test3 -; CHECK-LABEL: loopexit1: -; CHECK: %[[ADD:.*]] = phi i64 [ %add, %if.end ] -; CHECK: %[[ADDR:.*]] = phi i8** [ %P.addr, %if.end ] -; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32 -; CHECK: getelementptr inbounds i8*, i8** %[[ADDR]], i32 %[[TRUNC]] -; CHECK: call void @dummy(i32 %[[TRUNC]]) -define i8** @test3(i64 %j, i8** readonly %P, i8* readnone %Q) { -entry: - %cmp0 = icmp slt i64 0, %j - br i1 %cmp0, label %for.body.lr.ph, label %return - -for.body.lr.ph: - br label %for.body - -for.body: - %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ] - %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end] - - %i0.ext = sext i32 %i0 to i64 - %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext - %l0 = load i8*, i8** %arrayidx0, align 8 - %cmp1 = icmp ugt i8* %l0, %Q - br i1 %cmp1, label %loopexit0, label %if.end - -if.end: ; preds = %for.body - %add = add i64 %i0.ext, 1 - %trunc = trunc i64 %add to i32 - %arrayidx1 = getelementptr inbounds i8*, i8** %P.addr, i32 %trunc - %l1 = load i8*, i8** %arrayidx1, align 8 - %cmp4 = icmp ugt i8* %l1, %Q - %i.add = add nsw i32 %i0, 2 - br i1 %cmp4, label %loopexit1, label %for.body - -loopexit0: - %p1 = phi i8** [%arrayidx0, %for.body] - br label %return - -loopexit1: - %p2 = phi i8** [%arrayidx1, %if.end] - call void @dummy(i32 %trunc) - br label %return - -return: - %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ] - ret i8** %retval.0 -} - -declare void @dummy(i32) -- 2.11.0