OSDN Git Service

[llvm-exegesis] Improve documentation.
[android-x86/external-llvm.git] / tools / llvm-exegesis / llvm-exegesis.cpp
1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// Measures execution properties (latencies/uops) of an instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "lib/Analysis.h"
16 #include "lib/BenchmarkResult.h"
17 #include "lib/BenchmarkRunner.h"
18 #include "lib/Clustering.h"
19 #include "lib/Latency.h"
20 #include "lib/LlvmState.h"
21 #include "lib/PerfHelper.h"
22 #include "lib/Uops.h"
23 #include "lib/X86.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/MC/MCInstBuilder.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Format.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/TargetRegistry.h"
33 #include "llvm/Support/TargetSelect.h"
34 #include <algorithm>
35 #include <random>
36 #include <string>
37 #include <unordered_map>
38
39 static llvm::cl::opt<unsigned>
40     OpcodeIndex("opcode-index", llvm::cl::desc("opcode to measure, by index"),
41                 llvm::cl::init(0));
42
43 static llvm::cl::opt<std::string>
44     OpcodeName("opcode-name", llvm::cl::desc("opcode to measure, by name"),
45                llvm::cl::init(""));
46
47 static llvm::cl::opt<std::string>
48     BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init("-"));
49
50 enum class BenchmarkModeE { Latency, Uops, Analysis };
51 static llvm::cl::opt<BenchmarkModeE> BenchmarkMode(
52     "mode", llvm::cl::desc("the mode to run"),
53     llvm::cl::values(
54         clEnumValN(BenchmarkModeE::Latency, "latency", "Instruction Latency"),
55         clEnumValN(BenchmarkModeE::Uops, "uops", "Uop Decomposition"),
56         clEnumValN(BenchmarkModeE::Analysis, "analysis", "Analysis")));
57
58 static llvm::cl::opt<unsigned>
59     NumRepetitions("num-repetitions",
60                    llvm::cl::desc("number of time to repeat the asm snippet"),
61                    llvm::cl::init(10000));
62
63 static llvm::cl::opt<unsigned> AnalysisNumPoints(
64     "analysis-numpoints",
65     llvm::cl::desc("minimum number of points in an analysis cluster"),
66     llvm::cl::init(3));
67
68 static llvm::cl::opt<float>
69     AnalysisEpsilon("analysis-epsilon",
70                     llvm::cl::desc("dbscan epsilon for analysis clustering"),
71                     llvm::cl::init(0.1));
72
73 static llvm::cl::opt<std::string>
74     AnalysisClustersOutputFile("analysis-clusters-output-file",
75                                llvm::cl::desc(""), llvm::cl::init("-"));
76 static llvm::cl::opt<std::string>
77     AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
78                                       llvm::cl::desc(""), llvm::cl::init("-"));
79
80 namespace exegesis {
81
82 static unsigned GetOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) {
83   if (OpcodeName.empty() && (OpcodeIndex == 0))
84     llvm::report_fatal_error(
85         "please provide one and only one of 'opcode-index' or 'opcode-name'");
86   if (OpcodeIndex > 0)
87     return OpcodeIndex;
88   // Resolve opcode name -> opcode.
89   for (unsigned I = 0, E = MCInstrInfo.getNumOpcodes(); I < E; ++I)
90     if (MCInstrInfo.getName(I) == OpcodeName)
91       return I;
92   llvm::report_fatal_error(llvm::Twine("unknown opcode ").concat(OpcodeName));
93 }
94
95 void benchmarkMain() {
96   if (exegesis::pfm::pfmInitialize())
97     llvm::report_fatal_error("cannot initialize libpfm");
98
99   llvm::InitializeNativeTarget();
100   llvm::InitializeNativeTargetAsmPrinter();
101
102   // FIXME: Target-specific filter.
103   X86Filter Filter;
104
105   const LLVMState State;
106
107   // FIXME: Do not require SchedModel for latency.
108   if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
109     llvm::report_fatal_error("sched model is missing extra processor info!");
110
111   std::unique_ptr<BenchmarkRunner> Runner;
112   switch (BenchmarkMode) {
113   case BenchmarkModeE::Latency:
114     Runner = llvm::make_unique<LatencyBenchmarkRunner>(State);
115     break;
116   case BenchmarkModeE::Uops:
117     Runner = llvm::make_unique<UopsBenchmarkRunner>(State);
118     break;
119   case BenchmarkModeE::Analysis:
120     llvm_unreachable("not a benchmark");
121   }
122
123   if (NumRepetitions == 0)
124     llvm::report_fatal_error("--num-repetitions must be greater than zero");
125
126   Runner->run(GetOpcodeOrDie(State.getInstrInfo()), Filter, NumRepetitions)
127       .writeYamlOrDie(BenchmarkFile);
128   exegesis::pfm::pfmTerminate();
129 }
130
131 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
132 // if OutputFilename is non-empty.
133 template <typename Pass>
134 static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
135                       const std::string &OutputFilename) {
136   if (OutputFilename.empty())
137     return;
138   if (OutputFilename != "-") {
139     llvm::errs() << "Printing " << Name << " results to file '"
140                  << OutputFilename << "'\n";
141   }
142   std::error_code ErrorCode;
143   llvm::raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
144                                   llvm::sys::fs::F_RW);
145   if (ErrorCode)
146     llvm::report_fatal_error("cannot open out file: " + OutputFilename);
147   if (auto Err = Analyzer.run<Pass>(ClustersOS))
148     llvm::report_fatal_error(std::move(Err));
149 }
150
151 static void analysisMain() {
152   // Read benchmarks.
153   const std::vector<InstructionBenchmark> Points =
154       InstructionBenchmark::readYamlsOrDie(BenchmarkFile);
155   llvm::outs() << "Parsed " << Points.size() << " benchmark points\n";
156   if (Points.empty()) {
157     llvm::errs() << "no benchmarks to analyze\n";
158     return;
159   }
160   // FIXME: Check that all points have the same triple/cpu.
161   // FIXME: Merge points from several runs (latency and uops).
162
163   llvm::InitializeNativeTarget();
164   llvm::InitializeNativeTargetAsmPrinter();
165
166   std::string Error;
167   const auto *TheTarget =
168       llvm::TargetRegistry::lookupTarget(Points[0].LLVMTriple, Error);
169   if (!TheTarget) {
170     llvm::errs() << "unknown target '" << Points[0].LLVMTriple << "'\n";
171     return;
172   }
173   const auto Clustering = llvm::cantFail(InstructionBenchmarkClustering::create(
174       Points, AnalysisNumPoints, AnalysisEpsilon));
175
176   const Analysis Analyzer(*TheTarget, Clustering);
177
178   maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
179                                             AnalysisClustersOutputFile);
180   maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
181       Analyzer, "sched class consistency analysis",
182       AnalysisInconsistenciesOutputFile);
183 }
184
185 } // namespace exegesis
186
187 int main(int Argc, char **Argv) {
188   llvm::cl::ParseCommandLineOptions(Argc, Argv, "");
189
190   if (BenchmarkMode == BenchmarkModeE::Analysis) {
191     exegesis::analysisMain();
192   } else {
193     exegesis::benchmarkMain();
194   }
195   return EXIT_SUCCESS;
196 }