bool currentLimitations();
+ const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
+ return OuterInnerReductions;
+ }
+
private:
bool tightlyNested(Loop *Outer, Loop *Inner);
bool containsUnsafeInstructions(BasicBlock *BB);
- bool findInductions(Loop *L, SmallVector<PHINode *, 8> &Inductions);
+
+ /// Discover induction and reduction PHIs in the header of \p L. Induction
+ /// PHIs are added to \p Inductions, reductions are added to
+ /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
+ /// to be passed as \p InnerLoop.
+ bool findInductionAndReductions(Loop *L,
+ SmallVector<PHINode *, 8> &Inductions,
+ Loop *InnerLoop);
Loop *OuterLoop;
Loop *InnerLoop;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
+ /// Set of reduction PHIs taking part of a reduction across the inner and
+ /// outer loop.
+ SmallPtrSet<PHINode *, 4> OuterInnerReductions;
};
/// LoopInterchangeProfitability checks if it is profitable to interchange the
public:
LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
- BasicBlock *LoopNestExit)
+ BasicBlock *LoopNestExit,
+ const LoopInterchangeLegality &LIL)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
- LoopExit(LoopNestExit) {}
+ LoopExit(LoopNestExit), LIL(LIL) {}
/// Interchange OuterLoop and InnerLoop.
bool transform();
LoopInfo *LI;
DominatorTree *DT;
BasicBlock *LoopExit;
+
+ const LoopInterchangeLegality &LIL;
};
// Main LoopInterchange Pass.
<< "Loop interchanged with enclosing loop.";
});
- LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
- LoopNestExit);
+ LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
+ LIL);
LIT.transform();
LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
LoopsInterchanged++;
return true;
}
-bool LoopInterchangeLegality::findInductions(
- Loop *L, SmallVector<PHINode *, 8> &Inductions) {
+// If SV is a LCSSA PHI node with a single incoming value, return the incoming
+// value.
+static Value *followLCSSA(Value *SV) {
+ PHINode *PHI = dyn_cast<PHINode>(SV);
+ if (!PHI)
+ return SV;
+
+ if (PHI->getNumIncomingValues() != 1)
+ return SV;
+ return followLCSSA(PHI->getIncomingValue(0));
+}
+
+// Check V's users to see if it is involved in a reduction in L.
+static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
+ for (Value *User : V->users()) {
+ if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+ if (PHI->getNumIncomingValues() == 1)
+ continue;
+ RecurrenceDescriptor RD;
+ if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
+ return PHI;
+ return nullptr;
+ }
+ }
+
+ return nullptr;
+}
+
+bool LoopInterchangeLegality::findInductionAndReductions(
+ Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
if (!L->getLoopLatch() || !L->getLoopPredecessor())
return false;
for (PHINode &PHI : L->getHeader()->phis()) {
if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
Inductions.push_back(&PHI);
else {
- LLVM_DEBUG(dbgs() << "Failed to recognize PHI as an induction.\n");
- return false;
+ // PHIs in inner loops need to be part of a reduction in the outer loop,
+ // discovered when checking the PHIs of the outer loop earlier.
+ if (!InnerLoop) {
+ if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
+ LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
+ "across the outer loop.\n");
+ return false;
+ }
+ } else {
+ assert(PHI.getNumIncomingValues() == 2 &&
+ "Phis in loop header should have exactly 2 incoming values");
+ // Check if we have a PHI node in the outer loop that has a reduction
+ // result from the inner loop as an incoming value.
+ Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
+ PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
+ if (!InnerRedPhi ||
+ !llvm::any_of(InnerRedPhi->incoming_values(),
+ [&PHI](Value *V) { return V == &PHI; })) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Failed to recognize PHI as an induction or reduction.\n");
+ return false;
+ }
+ OuterInnerReductions.insert(&PHI);
+ OuterInnerReductions.insert(InnerRedPhi);
+ }
}
}
return true;
PHINode *InnerInductionVar;
SmallVector<PHINode *, 8> Inductions;
- if (!findInductions(InnerLoop, Inductions)) {
+ if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
LLVM_DEBUG(
- dbgs() << "Only inner loops with induction or reduction PHI nodes "
+ dbgs() << "Only outer loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with induction or reduction PHI nodes can be"
- " interchange currently.";
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with induction or reduction PHI nodes can be"
+ " interchanged currently.";
});
return true;
}
// TODO: Currently we handle only loops with 1 induction variable.
if (Inductions.size() != 1) {
- LLVM_DEBUG(
- dbgs() << "We currently only support loops with 1 induction variable."
- << "Failed to interchange due to current limitation\n");
+ LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
+ << "supported currently.\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with 1 induction variable can be "
+ return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
+ OuterLoop->getStartLoc(),
+ OuterLoop->getHeader())
+ << "Only outer loops with 1 induction variable can be "
"interchanged currently.";
});
return true;
}
- InnerInductionVar = Inductions.pop_back_val();
- if (!findInductions(OuterLoop, Inductions)) {
+ Inductions.clear();
+ if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
LLVM_DEBUG(
- dbgs() << "Only outer loops with induction or reduction PHI nodes "
+ dbgs() << "Only inner loops with induction or reduction PHI nodes "
<< "are supported currently.\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Only outer loops with induction or reduction PHI nodes can be"
- " interchanged currently.";
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with induction or reduction PHI nodes can be"
+ " interchange currently.";
});
return true;
}
// TODO: Currently we handle only loops with 1 induction variable.
if (Inductions.size() != 1) {
- LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
- << "supported currently.\n");
+ LLVM_DEBUG(
+ dbgs() << "We currently only support loops with 1 induction variable."
+ << "Failed to interchange due to current limitation\n");
ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
- OuterLoop->getStartLoc(),
- OuterLoop->getHeader())
- << "Only outer loops with 1 induction variable can be "
+ return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Only inner loops with 1 induction variable can be "
"interchanged currently.";
});
return true;
}
+ InnerInductionVar = Inductions.pop_back_val();
// TODO: Triangular loops are not handled for now.
if (!isLoopStructureUnderstood(InnerInductionVar)) {
// replaced by Inners'.
updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
- // Make sure we have no other PHIs.
- auto InnerPhis = drop_begin(InnerLoopHeader->phis(), 1);
- auto OuterPhis = drop_begin(OuterLoopHeader->phis(), 1);
- (void) InnerPhis;
- (void) OuterPhis;
- assert(begin(InnerPhis) == end(InnerPhis) && "Unexpected PHIs in inner loop");
- assert(begin(OuterPhis) == end(OuterPhis) && "Unexpected PHis in outer loop");
+ // Now update the reduction PHIs in the inner and outer loop headers.
+ SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
+ for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
+ InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
+ for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
+ OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
+
+ auto &OuterInnerReductions = LIL.getOuterInnerReductions();
+ (void)OuterInnerReductions;
+
+ // Now move the remaining reduction PHIs from outer to inner loop header and
+ // vice versa. The PHI nodes must be part of a reduction across the inner and
+ // outer loop and all the remains to do is and updating the incoming blocks.
+ for (PHINode *PHI : OuterLoopPHIs) {
+ PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
+ assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+ "Expected a reduction PHI node");
+ }
+ for (PHINode *PHI : InnerLoopPHIs) {
+ PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
+ assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
+ "Expected a reduction PHI node");
+ }
// Update the incoming blocks for moved PHI nodes.
updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);
; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
-; RUN: -verify-dom-info -verify-loop-info 2>&1 | FileCheck -check-prefix=IR %s
+; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s
; Inner loop only reductions are not supported currently. See discussion at
-; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t
-; RUN: cat %t | FileCheck --check-prefix REMARK %s
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t -S
+; RUN: FileCheck --input-file %t --check-prefix REMARK %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
--- /dev/null
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --input-file=%t %s
+
+; Outer loop only reductions are not supported currently.
+
+@A = common global [500 x [500 x i32]] zeroinitializer
+
+;; global X
+
+;; for( int i=1;i<N;i++) {
+;; for( int j=1;j<N;j++)
+;; ;
+;; X+=A[j][i];
+;; }
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedPHI
+; CHECK-NEXT: Function: reduction_01
+
+; IR-LABEL: @reduction_01(
+; IR-NOT: split
+
+define i32 @reduction_01(i32 %N) {
+entry:
+ br label %outer.header
+
+outer.header: ; preds = %for.cond1.for.inc6_crit_edge, %entry
+ %indvars.iv18 = phi i64 [ %indvars.iv.next19, %outer.inc ], [ 1, %entry ]
+ %add15 = phi i32 [ 0, %entry ], [ %add, %outer.inc ]
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body3.lr.ph
+ %indvars.iv = phi i64 [ 1, %outer.header ], [ %indvars.iv.next, %for.body3 ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %outer.inc, label %for.body3
+
+outer.inc: ; preds = %for.body3
+ %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
+ %0 = load i32, i32* %arrayidx5
+ %add = add nsw i32 %add15, %0
+ %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+ %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
+ %exitcond21 = icmp eq i32 %lftr.wideiv20, %N
+ br i1 %exitcond21, label %for.end8, label %outer.header
+
+for.end8: ; preds = %for.cond1.for.inc6_crit_edge, %entry
+ ret i32 %add
+}
--- /dev/null
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; REMARKS: --- !Passed
+; REMARKS-NEXT: Pass: loop-interchange
+; REMARKS-NEXT: Name: Interchanged
+; REMARKS-NEXT: Function: test1
+
+define i64 @test1([100 x [100 x i64]]* %Arr) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR2_PREHEADER:%.*]]
+; CHECK: for1.header.preheader:
+; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
+; CHECK: for1.header:
+; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ]
+; CHECK-NEXT: br label [[FOR2_SPLIT1:%.*]]
+; CHECK: for2.preheader:
+; CHECK-NEXT: br label [[FOR2:%.*]]
+; CHECK: for2:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT: [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
+; CHECK: for2.split1:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
+; CHECK-NEXT: br label [[FOR1_INC]]
+; CHECK: for2.split:
+; CHECK-NEXT: [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100
+; CHECK-NEXT: br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]]
+; CHECK: for1.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
+; CHECK-NEXT: [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100
+; CHECK-NEXT: br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
+; CHECK: for1.loopexit:
+; CHECK-NEXT: [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ]
+; CHECK-NEXT: ret i64 [[SUM_INC_LCSSA2]]
+;
+entry:
+ br label %for1.header
+
+for1.header: ; preds = %for1.inc, %entry
+ %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+ %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+ br label %for2
+
+for2: ; preds = %for2, %for1.header
+ %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+ %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
+ %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+ %lv = load i64, i64* %arrayidx, align 4
+ %sum.inc = add i64 %sum.inner, %lv
+ %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+ %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+ br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc: ; preds = %for2
+ %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ %exit2 = icmp eq i64 %indvars.iv.next24, 100
+ br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit: ; preds = %for1.inc
+ %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+ ret i64 %sum.inc.lcssa2
+}
+
+; In this test case, the inner reduction PHI %inner does not involve the outer
+; reduction PHI %sum.outer, do not interchange.
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass: loop-interchange
+; REMARKS-NEXT: Name: UnsupportedPHIOuter
+; REMARKS-NEXT: Function: test2
+
+define i64 @test2([100 x [100 x i64]]* %Arr) {
+entry:
+ br label %for1.header
+
+for1.header: ; preds = %for1.inc, %entry
+ %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+ %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+ br label %for2
+
+for2: ; preds = %for2, %for1.header
+ %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+ %inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ]
+ %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+ %lv = load i64, i64* %arrayidx, align 4
+ %sum.inc = add i64 %inner, %lv
+ %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+ %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+ br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc: ; preds = %for2
+ %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ %exit2 = icmp eq i64 %indvars.iv.next24, 100
+ br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit: ; preds = %for1.inc
+ %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+ ret i64 %sum.inc.lcssa2
+}
+
+; Check that we do not interchange if there is an additional instruction
+; between the outer and inner reduction PHIs.
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass: loop-interchange
+; REMARKS-NEXT: Name: UnsupportedPHIOuter
+; REMARKS-NEXT: Function: test3
+
+define i64 @test3([100 x [100 x i64]]* %Arr) {
+entry:
+ br label %for1.header
+
+for1.header: ; preds = %for1.inc, %entry
+ %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+ %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+ %so = add i64 %sum.outer, 10
+ br label %for2
+
+for2: ; preds = %for2, %for1.header
+ %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+ %sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ]
+ %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+ %lv = load i64, i64* %arrayidx, align 4
+ %sum.inc = add i64 %sum.inner, %lv
+ %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+ %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+ br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc: ; preds = %for2
+ %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ %exit2 = icmp eq i64 %indvars.iv.next24, 100
+ br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit: ; preds = %for1.inc
+ %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+ ret i64 %sum.inc.lcssa2
+}