void go();
void foo(int x) {
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
if (x == 0)
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0)
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
bar();
else
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0)
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
go();
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0)
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
}
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0)));
}
+ ConstantInt *getIndex() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+ }
+
ConstantInt *getAttributes() const {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
}
- ConstantInt *getIndex() const {
- return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+ ConstantInt *getFactor() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
}
};
// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
// optimizer as having opaque side effects so that it won't be get rid of or moved
// out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrInaccessibleMemOnly, IntrWillReturn]>;
// Intrinsics to support half precision floating point format
#include "llvm/ADT/Optional.h"
#include <cassert>
#include <cstdint>
+#include <limits>
namespace llvm {
class Instruction;
+class BasicBlock;
constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
+// The saturated distrution factor representing 100% for block probes.
+constexpr static uint64_t PseudoProbeFullDistributionFactor =
+ std::numeric_limits<uint64_t>::max();
+
struct PseudoProbeDwarfDiscriminator {
+public:
// The following APIs encodes/decodes per-probe information to/from a
// 32-bit integer which is organized as:
// [2:0] - 0x7, this is reserved for regular discriminator,
// see DWARF discriminator encoding rule
// [18:3] - probe id
- // [25:19] - reserved
+ // [25:19] - probe distribution factor
// [28:26] - probe type, see PseudoProbeType
// [31:29] - reserved for probe attributes
- static uint32_t packProbeData(uint32_t Index, uint32_t Type) {
+ static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags,
+ uint32_t Factor) {
assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16");
assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7");
- return (Index << 3) | (Type << 26) | 0x7;
+ assert(Flags <= 0x7);
+ assert(Factor <= 100 &&
+ "Probe distribution factor too big to encode, exceeding 100");
+ return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7;
}
static uint32_t extractProbeIndex(uint32_t Value) {
static uint32_t extractProbeAttributes(uint32_t Value) {
return (Value >> 29) & 0x7;
}
+
+ static uint32_t extractProbeFactor(uint32_t Value) {
+ return (Value >> 19) & 0x7F;
+ }
+
+ // The saturated distrution factor representing 100% for callsites.
+ constexpr static uint8_t FullDistributionFactor = 100;
};
struct PseudoProbe {
uint32_t Id;
uint32_t Type;
uint32_t Attr;
+ float Factor;
};
Optional<PseudoProbe> extractProbe(const Instruction &Inst);
+void setProbeDistributionFactor(Instruction &Inst, float Factor);
+
} // end namespace llvm
#endif // LLVM_IR_PSEUDOPROBE_H
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include <string>
#include <utility>
OptBisectInstrumentation OptBisect;
PreservedCFGCheckerInstrumentation PreservedCFGChecker;
IRChangedPrinter PrintChangedIR;
+ PseudoProbeVerifier PseudoProbeVerification;
VerifyInstrumentation Verify;
bool VerifyEach;
return SortedTargets;
}
+ /// Prorate call targets by a distribution factor.
+ static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
+ float DistributionFactor) {
+ CallTargetMap AdjustedTargets;
+ for (const auto &I : Targets) {
+ AdjustedTargets[I.first()] = I.second * DistributionFactor;
+ }
+ return AdjustedTargets;
+ }
+
/// Merge the samples in \p Other into this record.
/// Optionally scale sample counts by \p Weight.
sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/ProfileData/SampleProf.h"
using namespace sampleprof;
using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
+using ProbeFactorMap = std::unordered_map<uint64_t, float>;
+using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
uint64_t getFunctionHash() const { return FunctionHash; }
};
+// A pseudo probe verifier that can be run after each IR passes to detect the
+// violation of updating probe factors. In principle, the sum of distribution
+// factor for a probe should be identical before and after a pass. For a
+// function pass, the factor sum for a probe would be typically 100%.
+class PseudoProbeVerifier {
+public:
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+ // Implementation of pass instrumentation callbacks for new pass manager.
+ void runAfterPass(StringRef PassID, Any IR);
+
+private:
+ // Allow a little bias due the rounding to integral factors.
+ constexpr static float DistributionFactorVariance = 0.02;
+ // Distribution factors from last pass.
+ FuncProbeFactorMap FunctionProbeFactors;
+
+ void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors);
+ void runAfterPass(const Module *M);
+ void runAfterPass(const LazyCallGraph::SCC *C);
+ void runAfterPass(const Function *F);
+ void runAfterPass(const Loop *L);
+ bool shouldVerifyFunction(const Function *F);
+ void verifyProbeFactors(const Function *F,
+ const ProbeFactorMap &ProbeFactors);
+};
+
// This class serves sample counts correlation for SampleProfileLoader by
// analyzing pseudo probes and their function descriptors injected by
// SampleProfileProber.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
+class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
+ void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
+
+public:
+ PseudoProbeUpdatePass() {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
Probe.Attr =
PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
+ Probe.Factor =
+ PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
+ (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
return Probe;
}
}
Probe.Id = II->getIndex()->getZExtValue();
Probe.Type = (uint32_t)PseudoProbeType::Block;
Probe.Attr = II->getAttributes()->getZExtValue();
+ Probe.Factor = II->getFactor()->getZExtValue() /
+ (float)PseudoProbeFullDistributionFactor;
return Probe;
}
return None;
}
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor) {
+ assert(Factor >= 0 && Factor <= 1 &&
+ "Distribution factor must be in [0, 1.0]");
+ if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+ IRBuilder<> Builder(&Inst);
+ uint64_t IntFactor = PseudoProbeFullDistributionFactor;
+ if (Factor < 1)
+ IntFactor *= Factor;
+ auto OrigFactor = II->getFactor()->getZExtValue();
+ if (IntFactor != OrigFactor)
+ II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
+ } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
+ if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+ const DILocation *DIL = DLoc;
+ auto Discriminator = DIL->getDiscriminator();
+ if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+ auto Index =
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+ auto Type =
+ PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+ auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+ Discriminator);
+ // Round small factors to 0 to avoid over-counting.
+ uint32_t IntFactor =
+ PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+ if (Factor < 1)
+ IntFactor *= Factor;
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, Attr, IntFactor);
+ DIL = DIL->cloneWithDiscriminator(V);
+ Inst.setDebugLoc(DIL);
+ }
+ }
+ }
+}
} // namespace llvm
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(PseudoProbeUpdatePass());
+
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
if (PTO.Coroutines)
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(PseudoProbeUpdatePass());
+
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
#undef MODULE_PASS
#ifndef CGSCC_ANALYSIS
OptBisect.registerCallbacks(PIC);
PreservedCFGChecker.registerCallbacks(PIC);
PrintChangedIR.registerCallbacks(PIC);
+ PseudoProbeVerification.registerCallbacks(PIC);
if (VerifyEach)
Verify.registerCallbacks(PIC);
}
STATISTIC(NumMismatchedProfile,
"Number of functions with CFG mismatched profile");
STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+ "Number of inlined callsites with a partial distribution factor");
STATISTIC(NumCSInlinedHitMinLimit,
"Number of functions with FDO inline stopped due to min size limit");
struct InlineCandidate {
CallBase *CallInstr;
const FunctionSamples *CalleeSamples;
+ // Prorated callsite count, which will be used to guide inlining. For example,
+ // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+ // copies will get their own distribution factors and their prorated counts
+ // will be used to decide if they should be inlined independently.
uint64_t CallsiteCount;
+ // Call site distribution factor to prorate the profile samples for a
+ // duplicated callsite. Default value is 1.0.
+ float CallsiteDistribution;
};
// Inline candidate comparer using call site weight
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
// Attempt to promote indirect call and also inline the promoted call
bool tryPromoteAndInlineCandidate(
- Function &F, InlineCandidate &Candidate, uint64_t &Sum,
- DenseSet<Instruction *> &PromotedInsns,
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+ uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
if (R) {
- uint64_t Samples = R.get();
+ uint64_t Samples = R.get() * Probe->Factor;
bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
if (FirstMark) {
ORE->emit([&]() {
Remark << "Applied " << ore::NV("NumSamples", Samples);
Remark << " samples from profile (ProbeId=";
Remark << ore::NV("ProbeId", Probe->Id);
+ Remark << ", Factor=";
+ Remark << ore::NV("Factor", Probe->Factor);
+ Remark << ", OriginalSamples=";
+ Remark << ore::NV("OriginalSamples", R.get());
Remark << ")";
return Remark;
});
}
-
LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
- << " - weight: " << R.get() << ")\n");
+ << " - weight: " << R.get() << " - factor: "
+ << format("%0.2f", Probe->Factor) << ")\n");
return Samples;
}
return R;
/// \param InlinedCallSite Output vector for new call sites exposed after
/// inlining.
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
- Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
DenseSet<Instruction *> &PromotedInsns,
SmallVector<CallBase *, 8> *InlinedCallSite) {
const char *Reason = "Callee function not available";
Candidate.CallsiteCount, Sum, false, ORE);
if (DI) {
Sum -= Candidate.CallsiteCount;
+ // Prorate the indirect callsite distribution.
+ // Do not update the promoted direct callsite distribution at this
+ // point since the original distribution combined with the callee
+ // profile will be used to prorate callsites from the callee if
+ // inlined. Once not inlined, the direct callsite distribution should
+ // be prorated so that the it will reflect the real callsite counts.
+ setProbeDistributionFactor(*Candidate.CallInstr,
+ Candidate.CallsiteDistribution * Sum /
+ SumOrigin);
PromotedInsns.insert(Candidate.CallInstr);
Candidate.CallInstr = DI;
- if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
- return tryInlineCandidate(Candidate, InlinedCallSite);
+ if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+ bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+ if (!Inlined) {
+ // Prorate the direct callsite distribution so that it reflects real
+ // callsite counts.
+ setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+ Candidate.CallsiteCount /
+ SumOrigin);
+ }
+ return Inlined;
+ }
}
} else {
LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
}
for (CallBase *I : CIS) {
Function *CalledFunction = I->getCalledFunction();
- InlineCandidate Candidate = {I,
- LocalNotInlinedCallSites.count(I)
- ? LocalNotInlinedCallSites[I]
- : nullptr,
- 0 /* dummy count */};
+ InlineCandidate Candidate = {
+ I,
+ LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+ : nullptr,
+ 0 /* dummy count */, 1.0 /* dummy distribution factor */};
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
continue;
uint64_t Sum;
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+ uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
PSI->getOrCompHotCountThreshold());
if (!callsiteIsHot(FS, PSI))
continue;
- Candidate = {I, FS, FS->getEntrySamples()};
- if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+ Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns)) {
LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
}
if (ProfileIsCS)
ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
++NumCSInlined;
+
+ // Prorate inlined probes for a duplicated inlining callsite which probably
+ // has a distribution less than 100%. Samples for an inlinee should be
+ // distributed among the copies of the original callsite based on each
+ // callsite's distribution factor for counts accuracy. Note that an inlined
+ // probe may come with its own distribution factor if it has been duplicated
+ // in the inlinee body. The two factor are multiplied to reflect the
+ // aggregation of duplication.
+ if (Candidate.CallsiteDistribution < 1) {
+ for (auto &I : IFI.InlinedCallSites) {
+ if (Optional<PseudoProbe> Probe = extractProbe(*I))
+ setProbeDistributionFactor(*I, Probe->Factor *
+ Candidate.CallsiteDistribution);
+ }
+ NumDuplicatedInlinesite++;
+ }
+
return true;
}
return false;
if (!CalleeSamples)
return false;
+ float Factor = 1.0;
+ if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+ Factor = Probe->Factor;
+
uint64_t CallsiteCount = 0;
ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
if (Weight)
CallsiteCount = Weight.get();
if (CalleeSamples)
- CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples());
+ CallsiteCount = std::max(
+ CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
- *NewCandidate = {CB, CalleeSamples, CallsiteCount};
+ *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
return true;
}
uint64_t Sum;
auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
uint64_t SumOrigin = Sum;
+ Sum *= Candidate.CallsiteDistribution;
for (const auto *FS : CalleeSamples) {
// TODO: Consider disable pre-lTO ICP for MonoLTO as well
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
PSI->getOrCompHotCountThreshold());
continue;
}
- uint64_t EntryCountDistributed = FS->getEntrySamples();
+ uint64_t EntryCountDistributed =
+ FS->getEntrySamples() * Candidate.CallsiteDistribution;
// In addition to regular inline cost check, we also need to make sure
// ICP isn't introducing excessive speculative checks even if individual
// target looks beneficial to promote and inline. That means we should
SmallVector<CallBase *, 8> InlinedCallSites;
// Attach function profile for promoted indirect callee, and update
// call site count for the promoted inline candidate too.
- Candidate = {I, FS, EntryCountDistributed};
- if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
- &InlinedCallSites)) {
+ Candidate = {I, FS, EntryCountDistributed,
+ Candidate.CallsiteDistribution};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns, &InlinedCallSites)) {
for (auto *CB : InlinedCallSites) {
if (getInlineCandidate(&NewCandidate, CB))
CQueue.emplace(NewCandidate);
auto T = FS->findCallTargetMapAt(CallSite);
if (!T || T.get().empty())
continue;
+ // Prorate the callsite counts to reflect what is already done to the
+ // callsite, such as ICP or calliste cloning.
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ if (Probe->Factor < 1)
+ T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+ }
+ }
SmallVector<InstrProfValueData, 2> SortedCallTargets =
GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
#include <vector>
using namespace llvm;
STATISTIC(ArtificialDbgLine,
"Number of probes that have an artificial debug line");
+static cl::opt<bool>
+ VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+ cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+ "verify-pseudo-probe-funcs", cl::Hidden,
+ cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+ UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+ cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+ // Skip function declaration.
+ if (F->isDeclaration())
+ return false;
+ // Skip function that will not be emitted into object file. The prevailing
+ // defintion will be verified instead.
+ if (F->hasAvailableExternallyLinkage())
+ return false;
+ // Do a name matching.
+ static std::unordered_set<std::string> VerifyFuncNames(
+ VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+ return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+ if (VerifyPseudoProbe) {
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any IR, const PreservedAnalyses &) {
+ this->runAfterPass(P, IR);
+ });
+ }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+ std::string Banner =
+ "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+ dbgs() << Banner;
+ if (any_isa<const Module *>(IR))
+ runAfterPass(any_cast<const Module *>(IR));
+ else if (any_isa<const Function *>(IR))
+ runAfterPass(any_cast<const Function *>(IR));
+ else if (any_isa<const LazyCallGraph::SCC *>(IR))
+ runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+ else if (any_isa<const Loop *>(IR))
+ runAfterPass(any_cast<const Loop *>(IR));
+ else
+ llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+ for (const Function &F : *M)
+ runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+ for (const LazyCallGraph::Node &N : *C)
+ runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+ if (!shouldVerifyFunction(F))
+ return;
+ ProbeFactorMap ProbeFactors;
+ for (const auto &BB : *F)
+ collectProbeFactors(&BB, ProbeFactors);
+ verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+ const Function *F = L->getHeader()->getParent();
+ runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+ ProbeFactorMap &ProbeFactors) {
+ for (const auto &I : *Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += Probe->Factor;
+ }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+ const Function *F, const ProbeFactorMap &ProbeFactors) {
+ bool BannerPrinted = false;
+ auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+ for (const auto &I : ProbeFactors) {
+ float CurProbeFactor = I.second;
+ if (PrevProbeFactors.count(I.first)) {
+ float PrevProbeFactor = PrevProbeFactors[I.first];
+ if (std::abs(CurProbeFactor - PrevProbeFactor) >
+ DistributionFactorVariance) {
+ if (!BannerPrinted) {
+ dbgs() << "Function " << F->getName() << ":\n";
+ BannerPrinted = true;
+ }
+ dbgs() << "Probe " << I.first << "\tprevious factor "
+ << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+ << format("%0.2f", CurProbeFactor) << "\n";
+ }
+ }
+
+ // Update
+ PrevProbeFactors[I.first] = I.second;
+ }
+}
+
PseudoProbeManager::PseudoProbeManager(const Module &M) {
if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
for (const auto *Operand : FuncInfo->operands()) {
Function *ProbeFn =
llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
- Builder.getInt32(0)};
+ Builder.getInt32(0),
+ Builder.getInt64(PseudoProbeFullDistributionFactor)};
auto *Probe = Builder.CreateCall(ProbeFn, Args);
AssignDebugLoc(Probe);
}
// Levarge the 32-bit discriminator field of debug data to store the ID and
// type of a callsite probe. This gets rid of the dependency on plumbing a
// customized metadata through the codegen pipeline.
- uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
if (auto DIL = Call->getDebugLoc()) {
DIL = DIL->cloneWithDiscriminator(V);
Call->setDebugLoc(DIL);
return PreservedAnalyses::none();
}
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+ FunctionAnalysisManager &FAM) {
+ BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto BBProfileCount = [&BFI](BasicBlock *BB) {
+ return BFI.getBlockProfileCount(BB)
+ ? BFI.getBlockProfileCount(BB).getValue()
+ : 0;
+ };
+
+ // Collect the sum of execution weight for each probe.
+ ProbeFactorMap ProbeFactors;
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+ }
+ }
+
+ // Fix up over-counted probes.
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ float Sum = ProbeFactors[Probe->Id];
+ if (Sum != 0)
+ setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+ }
+ }
+ }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (UpdatePseudoProbe) {
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ runOnFunction(F, FAM);
+ }
+ }
+ return PreservedAnalyses::none();
+}
--- /dev/null
+foo:3200:13
+ 1: 13
+ 2: 7
+ 3: 6
+ 4: 13
+ 5: 7
+ 6: 6
+ !CFGChecksum: 844530426352218
; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ
define dso_local void @foo2() !dbg !7 {
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0
ret void, !dbg !10
}
define dso_local void @foo() #0 !dbg !11 {
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]]
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2
call void @foo2(), !dbg !12
}
define dso_local i32 @entry() !dbg !14 {
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]]
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]]
; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0
; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2
; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2
; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo"
; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]])
; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]])
-;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe
-;; with an index of 2.
-; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751)
+;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe
+;; with an index of 2 and a scale of 100%.
+; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551)
; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry"
; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3, scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]])
; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]])
-; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751)
+; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551)
; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]])
; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1]], inlinedAt: ![[#INL2]])
;; Check the generation of pseudoprobe intrinsic call.
+@a = dso_local global i32 0, align 4
+
define void @foo(i32 %x) !dbg !3 {
bb0:
%cmp = icmp eq i32 %x, 0
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0
br i1 %cmp, label %bb1, label %bb2
bb1:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0
+ store i32 6, i32* @a, align 4
br label %bb3
bb2:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]]
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0
; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0
; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0
+ store i32 8, i32* @a, align 4
br label %bb3
bb3:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]]
ret void, !dbg !12
}
define internal void @foo2(void (i32)* %f) !dbg !4 {
entry:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0
; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0
; Check pseudo_probe metadata attached to the indirect call instruction.
; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]])
; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]])
; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]])
-;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe
+;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe
;; with an index of 2.
-; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887)
+; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687)
; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe
+;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe
;; with an index of 3.
-; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759)
+; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559)
; Check the generation of .pseudo_probe_desc section
; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat
define dso_local i32 @foo(i32 %x) #0 !dbg !12 {
entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1)
%add = add nsw i32 %x, 100000, !dbg !19
;; Check zen is fully inlined so there's no call to zen anymore.
;; Check code from the inlining of zen is properly annotated here.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
; CHECK-NOT: call i32 @zen
%call = call i32 @zen(i32 %add), !dbg !20
ret i32 %call, !dbg !21
; CHECK: define dso_local i32 @zen
define dso_local i32 @zen(i32 %x) #0 !dbg !22 {
entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1)
%cmp = icmp sgt i32 %x, 0, !dbg !26
br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28
while.cond:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
%x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ]
%cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29
br i1 %cmp1, label %while.body, label %if.end, !dbg !31
while.body:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
%0 = load volatile i32, i32* @factor, align 4, !dbg !32
%sub = sub nsw i32 %x.addr.0, %0, !dbg !39
br label %while.cond, !dbg !31
while.cond2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
%x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ]
%cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42
br i1 %cmp3, label %while.body4, label %if.end, !dbg !44
while.body4:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
%1 = load volatile i32, i32* @factor, align 4, !dbg !45
%add = add nsw i32 %x.addr.1, %1, !dbg !48
br label %while.cond2, !dbg !44
if.end:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
%x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ]
ret i32 %x.addr.2, !dbg !51
}
;YAML-NEXT: - NumSamples: '23'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '23'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
;YAML: --- !Analysis
;YAML-NEXT: - NumSamples: '23'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '23'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
;YAML: --- !Analysis
;YAML-NEXT: - NumSamples: '382920'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '2'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '382920'
;YAML-NEXT: - String: ')'
;YAML-NEXT: ...
store i32 %x, i32* %x.addr, align 4
%0 = load i32, i32* %x.addr, align 4
%cmp = icmp eq i32 %0, 0
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
br i1 %cmp, label %if.then, label %if.else
; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]]
if.then:
; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]]
call void %f(i32 1)
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
store i32 1, i32* %retval, align 4
br label %return
if.else:
; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
call void %f(i32 2)
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
store i32 2, i32* %retval, align 4
br label %return
return:
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
%1 = load i32, i32* %retval, align 4
ret i32 %1
}
; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911)
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
;; with an index of 6.
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919)
+; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
!llvm.module.flags = !{!9, !10}
;YAML-NEXT: - NumSamples: '13'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '1'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '13'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: - NumSamples: '7'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '5'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '7'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: - NumSamples: '7'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '2'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '7'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '6'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '6'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '3'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '6'
;YAML-NEXT: - String: ')'
;YAML: --- !Analysis
;YAML-NEXT: Pass: sample-profile
;YAML-NEXT: - NumSamples: '13'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
;YAML-NEXT: - ProbeId: '4'
+;YAML-NEXT: - String: ', Factor='
+;YAML-NEXT: - Factor: '1.000000e+00'
+;YAML-NEXT: - String: ', OriginalSamples='
+;YAML-NEXT: - OriginalSamples: '13'
;YAML-NEXT: - String: ')'
--- /dev/null
+; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S | FileCheck %s
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+
+;; This tests that the branch in 'merge' can be cloned up into T1.
+define i32 @foo(i1 %cond, i1 %cond2) #0 {
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+ br i1 %cond, label %T1, label %F1
+T1:
+; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]]
+ %v1 = call i32 @f1()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+ %cond3 = icmp eq i32 %v1, 412
+ br label %Merge
+F1:
+; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
+ %v2 = call i32 @f2()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+ br label %Merge
+Merge:
+
+ %A = phi i1 [%cond3, %T1], [%cond2, %F1]
+ %B = phi i32 [%v1, %T1], [%v2, %F1]
+ br i1 %A, label %T2, label %F2
+T2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+ call void @f3()
+ ret i32 %B
+F2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+ ret i32 %B
+}
+
+; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7}
+; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
+
+attributes #0 = {"use-sample-profile"}
+
--- /dev/null
+; REQUIRES: x86_64-linux
+; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY
+; RUN: FileCheck %s < %t
+
+; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
+; VERIFY: Function foo:
+; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00
+; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00
+
+declare void @foo2() nounwind
+
+define void @foo(i32 %x) {
+bb:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+ %tmp = alloca [5 x i32*], align 16
+ br label %bb7.preheader
+
+bb3.loopexit:
+ %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ]
+ %tmp5.not = icmp eq i32 %spec.select.lcssa, 0
+ br i1 %tmp5.not, label %bb24, label %bb7.preheader
+
+bb7.preheader:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+ %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ]
+ br label %bb10
+
+bb10:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+ %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
+ %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
+ %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv
+ %tmp14 = load i32*, i32** %tmp13, align 8
+ %tmp15.not = icmp ne i32* %tmp14, null
+ %tmp18 = sext i1 %tmp15.not to i32
+ %spec.select = add nsw i32 %tmp1.14, %tmp18
+ call void @foo2(), !dbg !12
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 5
+ br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
+
+bb24:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+ ret void
+}
+
+;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
+;; with an index of 6 and a scale of -1%.
+; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.9.0"}
+!12 = !DILocation(line: 2, column: 20, scope: !4)
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.unroll.full"}