From 5c724a4fef84bb71490c839118e1a81d58888757 Mon Sep 17 00:00:00 2001 From: Geoff Berry Date: Mon, 6 Jun 2016 19:10:46 +0000 Subject: [PATCH] Reapply [LSR] Create fewer redundant instructions. Summary: Fix LSRInstance::HoistInsertPosition() to check the original insert position block first for a canonical insertion point that is dominated by all inputs. This leads to SCEV being able to reuse more instructions since it currently tracks the instructions it creates for reuse by keeping a table of pairs. Originally reviewed in http://reviews.llvm.org/D18001 Reviewers: atrick Subscribers: llvm-commits, mzolotukhin, mcrosier Differential Revision: http://reviews.llvm.org/D18480 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271929 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 42 ++++++++++--------- .../LoopStrengthReduce/AArch64/lsr-reuse.ll | 34 ++++++++++++++++ .../LoopStrengthReduce/scev-insertpt-bug.ll | 47 ++++++++++++++++++++++ 3 files changed, 103 insertions(+), 20 deletions(-) create mode 100644 test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll create mode 100644 test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 125f2cbc516..190fc5a2dc4 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4315,28 +4315,10 @@ BasicBlock::iterator LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl &Inputs) const { + Instruction *Tentative = &*IP; for (;;) { - const Loop *IPLoop = LI.getLoopFor(IP->getParent()); - unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; - - BasicBlock *IDom; - for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { - if (!Rung) return IP; - Rung = Rung->getIDom(); - if (!Rung) return IP; - IDom = Rung->getBlock(); - - // Don't climb into a loop though. - const Loop *IDomLoop = LI.getLoopFor(IDom); - unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; - if (IDomDepth <= IPLoopDepth && - (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) - break; - } - bool AllDominate = true; Instruction *BetterPos = nullptr; - Instruction *Tentative = IDom->getTerminator(); // Don't bother attempting to insert before a catchswitch, their basic block // cannot have other non-PHI instructions. if (isa(Tentative)) @@ -4349,7 +4331,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, } // Attempt to find an insert position in the middle of the block, // instead of at the end, so that it can be used for other expansions. - if (IDom == Inst->getParent() && + if (Tentative->getParent() == Inst->getParent() && (!BetterPos || !DT.dominates(Inst, BetterPos))) BetterPos = &*std::next(BasicBlock::iterator(Inst)); } @@ -4359,6 +4341,26 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, IP = BetterPos->getIterator(); else IP = Tentative->getIterator(); + + const Loop *IPLoop = LI.getLoopFor(IP->getParent()); + unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; + + BasicBlock *IDom; + for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { + if (!Rung) return IP; + Rung = Rung->getIDom(); + if (!Rung) return IP; + IDom = Rung->getBlock(); + + // Don't climb into a loop though. + const Loop *IDomLoop = LI.getLoopFor(IDom); + unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; + if (IDomDepth <= IPLoopDepth && + (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) + break; + } + + Tentative = IDom->getTerminator(); } return IP; diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll new file mode 100644 index 00000000000..a2dfe81b108 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=arm64-unknown-unknown -print-lsr-output < %s 2>&1 | FileCheck %s + +declare void @foo(i64) + +; Verify that redundant adds aren't inserted by LSR. +; CHECK-LABEL: @bar( +define void @bar(double* %A) { +entry: + br label %while.cond + +while.cond: +; CHECK-LABEL: while.cond: +; CHECK: add i64 %lsr.iv, 1 +; CHECK-NOT: add i64 %lsr.iv, 1 +; CHECK-LABEL: land.rhs: + %indvars.iv28 = phi i64 [ %indvars.iv.next29, %land.rhs ], [ 50, %entry ] + %cmp = icmp sgt i64 %indvars.iv28, 0 + br i1 %cmp, label %land.rhs, label %while.end + +land.rhs: + %indvars.iv.next29 = add nsw i64 %indvars.iv28, -1 + %arrayidx = getelementptr inbounds double, double* %A, i64 %indvars.iv.next29 + %Aload = load double, double* %arrayidx, align 8 + %cmp1 = fcmp oeq double %Aload, 0.000000e+00 + br i1 %cmp1, label %while.cond, label %if.end + +while.end: + %indvars.iv28.lcssa = phi i64 [ %indvars.iv28, %while.cond ] + tail call void @foo(i64 %indvars.iv28.lcssa) + br label %if.end + +if.end: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll b/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll new file mode 100644 index 00000000000..81a6b07fe95 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -loop-reduce -S + +; Test that SCEV insertpoint's don't get corrupted and cause an +; invalid instruction to be inserted in a block other than its parent. +; See http://reviews.llvm.org/D20703 for context. +define void @test() { +entry: + %bf.load = load i32, i32* null, align 4 + %bf.clear = lshr i32 %bf.load, 1 + %div = and i32 %bf.clear, 134217727 + %sub = add nsw i32 %div, -1 + %0 = zext i32 %sub to i64 + br label %while.cond + +while.cond: ; preds = %cond.end, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ] + %cmp = icmp eq i64 %indvars.iv, %0 + br i1 %cmp, label %cleanup16, label %while.body + +while.body: ; preds = %while.cond + %1 = trunc i64 %indvars.iv to i32 + %mul = shl i32 %1, 1 + %add = add nuw i32 %mul, 2 + %cmp3 = icmp ult i32 %add, 0 + br i1 %cmp3, label %if.end, label %if.then + +if.then: ; preds = %while.body + unreachable + +if.end: ; preds = %while.body + br i1 false, label %cond.end, label %cond.true + +cond.true: ; preds = %if.end + br label %cond.end + +cond.end: ; preds = %cond.true, %if.end + %add7 = add i32 %1, 1 + %cmp12 = icmp ugt i32 %add7, %sub + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %cmp12, label %if.then13, label %while.cond + +if.then13: ; preds = %cond.end + unreachable + +cleanup16: ; preds = %while.cond + ret void +} -- 2.11.0