#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
/// constant arguments.
DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
+ /// Model the elimination of repeated loads that is expected to happen
+ /// whenever we simplify away the stores that would otherwise cause them to be
+ /// loads.
+ bool EnableLoadElimination;
+ SmallPtrSet<Value *, 16> LoadAddrSet;
+ int LoadEliminationCost;
+
// Custom simplification helper routines.
bool isAllocaDerivedArg(Value *V);
bool lookupSROAArgAndCost(Value *V, Value *&Arg,
void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost);
+ void disableLoadElimination();
bool isGEPFree(GetElementPtrInst &GEP);
bool canFoldInboundsGEP(GetElementPtrInst &I);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
+ EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0),
+ NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+ NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+ SROACostSavings(0), SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
SROACostSavings -= CostIt->second;
SROACostSavingsLost += CostIt->second;
SROAArgCosts.erase(CostIt);
+ disableLoadElimination();
}
/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
SROACostSavings += InstructionCost;
}
+void CallAnalyzer::disableLoadElimination() {
+ if (EnableLoadElimination) {
+ Cost += LoadEliminationCost;
+ EnableLoadElimination = false;
+ }
+}
+
/// \brief Accumulate a constant GEP offset into an APInt if possible.
///
/// Returns false if unable to compute the offset for any reason. Respects any
disableSROA(CostIt);
}
+ // If the data is already loaded from this address and hasn't been clobbered
+ // by any stores or calls, this load is likely to be redundant and can be
+ // eliminated.
+ if (EnableLoadElimination &&
+ !LoadAddrSet.insert(I.getPointerOperand()).second) {
+ LoadEliminationCost += InlineConstants::InstrCost;
+ return true;
+ }
+
return false;
}
disableSROA(CostIt);
}
+ // The store can potentially clobber loads and prevent repeated loads from
+ // being eliminated.
+ // FIXME:
+ // 1. We can probably keep an initial set of eliminatable loads substracted
+ // from the cost even when we finally see a store. We just need to disable
+ // *further* accumulation of elimination savings.
+ // 2. We should probably at some point thread MemorySSA for the callee into
+ // this and then use that to actually compute *really* precise savings.
+ disableLoadElimination();
return false;
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
switch (II->getIntrinsicID()) {
default:
+ if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+ disableLoadElimination();
return Base::visitCallSite(CS);
case Intrinsic::load_relative:
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
+ disableLoadElimination();
// SROA can usually chew through these intrinsics, but they aren't free.
return false;
case Intrinsic::localescape:
Cost += InlineConstants::CallPenalty;
}
+ if (!CS.onlyReadsMemory())
+ disableLoadElimination();
return Base::visitCallSite(CS);
}
// Next, check if this happens to be an indirect function call to a known
// function in this inline context. If not, we've done all we can.
Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
- if (!F)
+ if (!F) {
+ if (!CS.onlyReadsMemory())
+ disableLoadElimination();
return Base::visitCallSite(CS);
+ }
// If we have a constant that we are calling as a function, we can peer
// through it and see the function target. This happens not infrequently
Cost -= std::max(0, CA.getThreshold() - CA.getCost());
}
+ if (!F->onlyReadsMemory())
+ disableLoadElimination();
return Base::visitCallSite(CS);
}
DEBUG_PRINT_STAT(NumInstructions);
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
+ DEBUG_PRINT_STAT(LoadEliminationCost);
DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
DEBUG_PRINT_STAT(Cost);
DEBUG_PRINT_STAT(Threshold);
--- /dev/null
+; RUN: opt -inline < %s -S -o - -inline-threshold=3 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @pad() readnone
+
+define void @outer1(i32* %a) {
+; CHECK-LABEL: @outer1(
+; CHECK-NOT: call void @inner1
+ %b = alloca i32
+ call void @inner1(i32* %a, i32* %b)
+ ret void
+}
+
+define void @inner1(i32* %a, i32* %b) {
+ %1 = load i32, i32* %a
+ store i32 %1, i32 * %b ; This store does not clobber the first load.
+ %2 = load i32, i32* %a
+ call void @pad()
+ %3 = load i32, i32* %a
+ ret void
+}
+
+
+define void @outer2(i32* %a, i32* %b) {
+; CHECK-LABEL: @outer2(
+; CHECK: call void @inner2
+ call void @inner2(i32* %a, i32* %b)
+ ret void
+}
+
+define void @inner2(i32* %a, i32* %b) {
+ %1 = load i32, i32* %a
+ store i32 %1, i32 * %b ; This store clobbers the first load.
+ %2 = load i32, i32* %a
+ call void @pad()
+ ret void
+}
+
+
+define void @outer3(i32* %a) {
+; CHECK-LABEL: @outer3(
+; CHECK: call void @inner3
+ call void @inner3(i32* %a)
+ ret void
+}
+
+declare void @ext()
+
+define void @inner3(i32* %a) {
+ %1 = load i32, i32* %a
+ call void @ext() ; This call clobbers the first load.
+ %2 = load i32, i32* %a
+ ret void
+}
+
+
+define void @outer4(i32* %a, i32* %b, i32* %c) {
+; CHECK-LABEL: @outer4(
+; CHECK-NOT: call void @inner4
+ call void @inner4(i32* %a, i32* %b, i1 false)
+ ret void
+}
+
+define void @inner4(i32* %a, i32* %b, i1 %pred) {
+ %1 = load i32, i32* %a
+ br i1 %pred, label %cond_true, label %cond_false
+
+cond_true:
+ store i32 %1, i32 * %b ; This store does not clobber the first load.
+ br label %cond_false
+
+cond_false:
+ %2 = load i32, i32* %a
+ call void @pad()
+ %3 = load i32, i32* %a
+ %4 = load i32, i32* %a
+ ret void
+}
+
+
+define void @outer5(i32* %a, double %b) {
+; CHECK-LABEL: @outer5(
+; CHECK-NOT: call void @inner5
+ call void @inner5(i32* %a, double %b)
+ ret void
+}
+
+declare double @llvm.fabs.f64(double) nounwind readnone
+
+define void @inner5(i32* %a, double %b) {
+ %1 = load i32, i32* %a
+ %2 = call double @llvm.fabs.f64(double %b) ; This intrinsic does not clobber the first load.
+ %3 = load i32, i32* %a
+ call void @pad()
+ ret void
+}
+
+define void @outer6(i32* %a, i8* %ptr) {
+; CHECK-LABEL: @outer6(
+; CHECK-NOT: call void @inner6
+ call void @inner6(i32* %a, i8* %ptr)
+ ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) argmemonly nounwind
+
+define void @inner6(i32* %a, i8* %ptr) {
+ %1 = load i32, i32* %a
+ call void @llvm.lifetime.start.p0i8(i64 32, i8* %ptr) ; This intrinsic does not clobber the first load.
+ %2 = load i32, i32* %a
+ call void @pad()
+ %3 = load i32, i32* %a
+ ret void
+}
+
+define void @outer7(i32* %a) {
+; CHECK-LABEL: @outer7(
+; CHECK-NOT: call void @inner7
+ call void @inner7(i32* %a)
+ ret void
+}
+
+declare void @ext2() readnone
+
+define void @inner7(i32* %a) {
+ %1 = load i32, i32* %a
+ call void @ext2() ; This call does not clobber the first load.
+ %2 = load i32, i32* %a
+ ret void
+}
+
+
+define void @outer8(i32* %a) {
+; CHECK-LABEL: @outer8(
+; CHECK-NOT: call void @inner8
+ call void @inner8(i32* %a, void ()* @ext2)
+ ret void
+}
+
+define void @inner8(i32* %a, void ()* %f) {
+ %1 = load i32, i32* %a
+ call void %f() ; This indirect call does not clobber the first load.
+ %2 = load i32, i32* %a
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ ret void
+}
+
+
+define void @outer9(i32* %a) {
+; CHECK-LABEL: @outer9(
+; CHECK: call void @inner9
+ call void @inner9(i32* %a, void ()* @ext)
+ ret void
+}
+
+define void @inner9(i32* %a, void ()* %f) {
+ %1 = load i32, i32* %a
+ call void %f() ; This indirect call clobbers the first load.
+ %2 = load i32, i32* %a
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ call void @pad()
+ ret void
+}