#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
+/// Return true if we can prove that the given ExitBlock is not reached on the
+/// first iteration of the given loop. That is, the backedge of the loop must
+/// be executed before the ExitBlock is executed in any dynamic execution trace.
+static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
+ const DominatorTree *DT,
+ const Loop *CurLoop) {
+ auto *CondExitBlock = ExitBlock->getSinglePredecessor();
+ if (!CondExitBlock)
+ // expect unique exits
+ return false;
+ assert(CurLoop->contains(CondExitBlock) && "meaning of exit block");
+ auto *BI = dyn_cast<BranchInst>(CondExitBlock->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+ // todo: handle fcmp someday
+ // todo: this would be a lot more powerful if we used scev, but all the
+ // plumbing is currently missing to pass a pointer in from the pass
+ auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICI)
+ return false;
+ // Check for cmp (phi [x, preheader] ...), y where (pred x, y is known
+ auto *LHS = dyn_cast<PHINode>(ICI->getOperand(0));
+ auto *RHS = ICI->getOperand(1);
+ if (!LHS || LHS->getParent() != CurLoop->getHeader())
+ return false;
+ auto DL = ExitBlock->getModule()->getDataLayout();
+ auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+ auto *SimpleValOrNull = SimplifyICmpInst(ICI->getPredicate(),
+ IVStart, RHS,
+ {DL, /*TLI*/ nullptr,
+ DT, /*AC*/ nullptr, BI});
+ auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull);
+ if (!SimpleCst)
+ return false;
+ if (ExitBlock == BI->getSuccessor(0))
+ return SimpleCst->isZeroValue();
+ assert(ExitBlock == BI->getSuccessor(1) && "implied by above");
+ return SimpleCst->isAllOnesValue();
+}
+
/// Returns true if the instruction in a loop is guaranteed to execute at least
/// once.
bool llvm::isGuaranteedToExecute(const Instruction &Inst,
if (SafetyInfo->MayThrow)
return false;
+ // Note: There are two styles of reasoning intermixed below for
+ // implementation efficiency reasons. They are:
+ // 1) If we can prove that the instruction dominates all exit blocks, then we
+ // know the instruction must have executed on *some* iteration before we
+ // exit. We do not prove *which* iteration the instruction must execute on.
+ // 2) If we can prove that the instruction dominates the latch and all exits
+ // which might be taken on the first iteration, we know the instruction must
+ // execute on the first iteration. This second style allows a conditional
+ // exit before the instruction of interest which is provably not taken on the
+ // first iteration. This is a quite common case for range check like
+ // patterns. TODO: support loops with multiple latches.
+
+ const bool InstDominatesLatch =
+ CurLoop->getLoopLatch() != nullptr &&
+ DT->dominates(Inst.getParent(), CurLoop->getLoopLatch());
+
// Get the exit blocks for the current loop.
SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getExitBlocks(ExitBlocks);
// Verify that the block dominates each of the exit blocks of the loop.
for (BasicBlock *ExitBlock : ExitBlocks)
if (!DT->dominates(Inst.getParent(), ExitBlock))
- return false;
+ if (!InstDominatesLatch ||
+ !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop))
+ return false;
// As a degenerate case, if the loop is statically infinite then we haven't
// proven anything since there are no exit blocks.
--- /dev/null
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f() nounwind
+
+; constant fold on first ieration
+define i32 @test1(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test1(
+entry:
+; CHECK: %i1 = load i32, i32* %a, align 4
+; CHECK-NEXT: br label %for.body
+ br label %for.body
+
+for.body:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %continue ]
+ %r.chk = icmp ult i32 %iv, 2000
+ br i1 %r.chk, label %continue, label %fail
+continue:
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}
+
+; Count down from a.length w/entry guard
+; TODO: currently unable to prove the following:
+; ule i32 (add nsw i32 %len, -1), %len where len is [0, 512]
+define i32 @test2(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test2(
+entry:
+ %len = load i32, i32* %a, align 4, !range !{i32 0, i32 512}
+ %is.non.pos = icmp eq i32 %len, 0
+ br i1 %is.non.pos, label %fail, label %preheader
+preheader:
+ %lenminusone = add nsw i32 %len, -1
+ br label %for.body
+for.body:
+ %iv = phi i32 [ %lenminusone, %preheader ], [ %dec, %continue ]
+ %acc = phi i32 [ 0, %preheader ], [ %add, %continue ]
+ %r.chk = icmp ule i32 %iv, %len
+ br i1 %r.chk, label %continue, label %fail
+continue:
+; CHECK-LABEL: continue
+; CHECK: %i1 = load i32, i32* %a, align 4
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %dec = add nsw i32 %iv, -1
+ %exitcond = icmp eq i32 %dec, 0
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}
+
+; trivially true for zero
+define i32 @test3(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test3(
+entry:
+ %len = load i32, i32* %a, align 4, !range !{i32 0, i32 512}
+ %is.zero = icmp eq i32 %len, 0
+ br i1 %is.zero, label %fail, label %preheader
+preheader:
+; CHECK: %i1 = load i32, i32* %a, align 4
+; CHECK-NEXT: br label %for.body
+ br label %for.body
+for.body:
+ %iv = phi i32 [ 0, %preheader ], [ %inc, %continue ]
+ %acc = phi i32 [ 0, %preheader ], [ %add, %continue ]
+ %r.chk = icmp ule i32 %iv, %len
+ br i1 %r.chk, label %continue, label %fail
+continue:
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}
+
+; requires fact length is non-zero
+; TODO: IsKnownNonNullFromDominatingConditions is currently only be done for
+; pointers; should handle integers too
+define i32 @test4(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test4(
+entry:
+ %len = load i32, i32* %a, align 4, !range !{i32 0, i32 512}
+ %is.zero = icmp eq i32 %len, 0
+ br i1 %is.zero, label %fail, label %preheader
+preheader:
+ br label %for.body
+for.body:
+ %iv = phi i32 [ 0, %preheader ], [ %inc, %continue ]
+ %acc = phi i32 [ 0, %preheader ], [ %add, %continue ]
+ %r.chk = icmp ult i32 %iv, %len
+ br i1 %r.chk, label %continue, label %fail
+continue:
+; CHECK-LABEL: continue
+; CHECK: %i1 = load i32, i32* %a, align 4
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}
+
+; variation on test1 with branch swapped
+define i32 @test-brswap(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test-brswap(
+entry:
+; CHECK: %i1 = load i32, i32* %a, align 4
+; CHECK-NEXT: br label %for.body
+ br label %for.body
+
+for.body:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %continue ]
+ %r.chk = icmp ugt i32 %iv, 2000
+ br i1 %r.chk, label %fail, label %continue
+continue:
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}
+
+define i32 @test-nonphi(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test-nonphi(
+entry:
+ br label %for.body
+
+for.body:
+; CHECK-LABEL: continue
+; CHECK: %i1 = load i32, i32* %a, align 4
+ %iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %continue ]
+ %xor = xor i32 %iv, 72
+ %r.chk = icmp ugt i32 %xor, 2000
+ br i1 %r.chk, label %fail, label %continue
+continue:
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}
+
+define i32 @test-wrongphi(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test-wrongphi(
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %continue ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %continue ]
+ br label %dummy_block
+dummy_block:
+ %wrongphi = phi i32 [12, %for.body]
+ %r.chk = icmp ugt i32 %wrongphi, 2000
+ br i1 %r.chk, label %fail, label %continue
+continue:
+; CHECK-LABEL: continue
+; CHECK: %i1 = load i32, i32* %a, align 4
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}
+
+; This works because loop-simplify is run implicitly, but test for it anyways
+define i32 @test-multiple-latch(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test-multiple-latch(
+entry:
+; CHECK: %i1 = load i32, i32* %a, align 4
+; CHECK-NEXT: br label %for.body
+ br label %for.body
+
+for.body:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %continue1 ], [ %inc, %continue2 ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %continue1 ], [ %add, %continue2 ]
+ %r.chk = icmp ult i32 %iv, 2000
+ br i1 %r.chk, label %continue1, label %fail
+continue1:
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %acc
+ %inc = add nuw nsw i32 %iv, 1
+ %cmp = icmp eq i32 %add, 0
+ br i1 %cmp, label %continue2, label %for.body
+continue2:
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+
+fail:
+ call void @f()
+ ret i32 -1
+}