1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// Measures execution properties (latencies/uops) of an instruction.
13 //===----------------------------------------------------------------------===//
15 #include "lib/Analysis.h"
16 #include "lib/BenchmarkResult.h"
17 #include "lib/BenchmarkRunner.h"
18 #include "lib/Clustering.h"
19 #include "lib/Latency.h"
20 #include "lib/LlvmState.h"
21 #include "lib/PerfHelper.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/MC/MCInstBuilder.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Format.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/TargetRegistry.h"
33 #include "llvm/Support/TargetSelect.h"
37 #include <unordered_map>
39 static llvm::cl::opt<unsigned>
40 OpcodeIndex("opcode-index", llvm::cl::desc("opcode to measure, by index"),
43 static llvm::cl::opt<std::string>
44 OpcodeName("opcode-name", llvm::cl::desc("opcode to measure, by name"),
47 static llvm::cl::opt<std::string>
48 BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init("-"));
50 enum class BenchmarkModeE { Latency, Uops, Analysis };
51 static llvm::cl::opt<BenchmarkModeE> BenchmarkMode(
52 "mode", llvm::cl::desc("the mode to run"),
54 clEnumValN(BenchmarkModeE::Latency, "latency", "Instruction Latency"),
55 clEnumValN(BenchmarkModeE::Uops, "uops", "Uop Decomposition"),
56 clEnumValN(BenchmarkModeE::Analysis, "analysis", "Analysis")));
58 static llvm::cl::opt<unsigned>
59 NumRepetitions("num-repetitions",
60 llvm::cl::desc("number of time to repeat the asm snippet"),
61 llvm::cl::init(10000));
63 static llvm::cl::opt<unsigned> AnalysisNumPoints(
65 llvm::cl::desc("minimum number of points in an analysis cluster"),
68 static llvm::cl::opt<float>
69 AnalysisEpsilon("analysis-epsilon",
70 llvm::cl::desc("dbscan epsilon for analysis clustering"),
73 static llvm::cl::opt<std::string>
74 AnalysisClustersOutputFile("analysis-clusters-output-file",
75 llvm::cl::desc(""), llvm::cl::init("-"));
76 static llvm::cl::opt<std::string>
77 AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
78 llvm::cl::desc(""), llvm::cl::init("-"));
82 static unsigned GetOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) {
83 if (OpcodeName.empty() && (OpcodeIndex == 0))
84 llvm::report_fatal_error(
85 "please provide one and only one of 'opcode-index' or 'opcode-name'");
88 // Resolve opcode name -> opcode.
89 for (unsigned I = 0, E = MCInstrInfo.getNumOpcodes(); I < E; ++I)
90 if (MCInstrInfo.getName(I) == OpcodeName)
92 llvm::report_fatal_error(llvm::Twine("unknown opcode ").concat(OpcodeName));
95 void benchmarkMain() {
96 if (exegesis::pfm::pfmInitialize())
97 llvm::report_fatal_error("cannot initialize libpfm");
99 llvm::InitializeNativeTarget();
100 llvm::InitializeNativeTargetAsmPrinter();
102 // FIXME: Target-specific filter.
105 const LLVMState State;
107 // FIXME: Do not require SchedModel for latency.
108 if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
109 llvm::report_fatal_error("sched model is missing extra processor info!");
111 std::unique_ptr<BenchmarkRunner> Runner;
112 switch (BenchmarkMode) {
113 case BenchmarkModeE::Latency:
114 Runner = llvm::make_unique<LatencyBenchmarkRunner>(State);
116 case BenchmarkModeE::Uops:
117 Runner = llvm::make_unique<UopsBenchmarkRunner>(State);
119 case BenchmarkModeE::Analysis:
120 llvm_unreachable("not a benchmark");
123 if (NumRepetitions == 0)
124 llvm::report_fatal_error("--num-repetitions must be greater than zero");
126 Runner->run(GetOpcodeOrDie(State.getInstrInfo()), Filter, NumRepetitions)
127 .writeYamlOrDie(BenchmarkFile);
128 exegesis::pfm::pfmTerminate();
131 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
132 // if OutputFilename is non-empty.
133 template <typename Pass>
134 static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
135 const std::string &OutputFilename) {
136 if (OutputFilename.empty())
138 if (OutputFilename != "-") {
139 llvm::errs() << "Printing " << Name << " results to file '"
140 << OutputFilename << "'\n";
142 std::error_code ErrorCode;
143 llvm::raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
144 llvm::sys::fs::F_RW);
146 llvm::report_fatal_error("cannot open out file: " + OutputFilename);
147 if (auto Err = Analyzer.run<Pass>(ClustersOS))
148 llvm::report_fatal_error(std::move(Err));
151 static void analysisMain() {
153 const std::vector<InstructionBenchmark> Points =
154 InstructionBenchmark::readYamlsOrDie(BenchmarkFile);
155 llvm::outs() << "Parsed " << Points.size() << " benchmark points\n";
156 if (Points.empty()) {
157 llvm::errs() << "no benchmarks to analyze\n";
160 // FIXME: Check that all points have the same triple/cpu.
161 // FIXME: Merge points from several runs (latency and uops).
163 llvm::InitializeNativeTarget();
164 llvm::InitializeNativeTargetAsmPrinter();
167 const auto *TheTarget =
168 llvm::TargetRegistry::lookupTarget(Points[0].LLVMTriple, Error);
170 llvm::errs() << "unknown target '" << Points[0].LLVMTriple << "'\n";
173 const auto Clustering = llvm::cantFail(InstructionBenchmarkClustering::create(
174 Points, AnalysisNumPoints, AnalysisEpsilon));
176 const Analysis Analyzer(*TheTarget, Clustering);
178 maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
179 AnalysisClustersOutputFile);
180 maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
181 Analyzer, "sched class consistency analysis",
182 AnalysisInconsistenciesOutputFile);
185 } // namespace exegesis
187 int main(int Argc, char **Argv) {
188 llvm::cl::ParseCommandLineOptions(Argc, Argv, "");
190 if (BenchmarkMode == BenchmarkModeE::Analysis) {
191 exegesis::analysisMain();
193 exegesis::benchmarkMain();