1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 #include "Assembler.h"
13 #include "BenchmarkRunner.h"
14 #include "MCInstrDescView.h"
15 #include "PerfHelper.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstBuilder.h"
20 #include "llvm/Support/FormatVariadic.h"
25 struct ExecutionClass {
27 const char *Description;
28 } static const kExecutionClasses[] = {
29 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
30 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
31 "Repeating a single implicitly serial instruction"},
32 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
33 "Repeating a single explicitly serial instruction"},
34 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
35 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
36 "Repeating two instructions"},
39 static constexpr size_t kMaxAliasingInstructions = 10;
41 static std::vector<Instruction>
42 computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
43 size_t MaxAliasingInstructions) {
44 // Randomly iterate the set of instructions.
45 std::vector<unsigned> Opcodes;
46 Opcodes.resize(State.getInstrInfo().getNumOpcodes());
47 std::iota(Opcodes.begin(), Opcodes.end(), 0U);
48 std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
50 std::vector<Instruction> AliasingInstructions;
51 for (const unsigned OtherOpcode : Opcodes) {
52 if (OtherOpcode == Instr.Description->getOpcode())
54 const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
55 if (OtherInstr.hasMemoryOperands())
57 if (Instr.hasAliasingRegistersThrough(OtherInstr))
58 AliasingInstructions.push_back(std::move(OtherInstr));
59 if (AliasingInstructions.size() >= MaxAliasingInstructions)
62 return AliasingInstructions;
65 static ExecutionMode getExecutionModes(const Instruction &Instr) {
66 ExecutionMode EM = ExecutionMode::UNKNOWN;
67 if (Instr.hasAliasingImplicitRegisters())
68 EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
69 if (Instr.hasTiedRegisters())
70 EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
71 if (Instr.hasMemoryOperands())
72 EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
74 if (Instr.hasAliasingRegisters())
75 EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
76 if (Instr.hasOneUseOrOneDef())
77 EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
82 static void appendCodeTemplates(const LLVMState &State,
83 const Instruction &Instr,
84 ExecutionMode ExecutionModeBit,
85 llvm::StringRef ExecutionClassDescription,
86 std::vector<CodeTemplate> &CodeTemplates) {
87 assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
88 switch (ExecutionModeBit) {
89 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
90 // Nothing to do, the instruction is always serial.
92 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
93 // Picking whatever value for the tied variable will make the instruction
96 CT.Execution = ExecutionModeBit;
97 CT.Info = ExecutionClassDescription;
98 CT.Instructions.push_back(Instr);
99 CodeTemplates.push_back(std::move(CT));
102 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
103 // Select back-to-back memory instruction.
104 // TODO: Implement me.
107 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
108 // Making the execution of this instruction serial by selecting one def
109 // register to alias with one use register.
110 const AliasingConfigurations SelfAliasing(Instr, Instr);
111 assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
112 "Instr must alias itself explicitly");
113 InstructionTemplate IT(Instr);
114 // This is a self aliasing instruction so defs and uses are from the same
115 // instance, hence twice IT in the following call.
116 setRandomAliasing(SelfAliasing, IT, IT);
118 CT.Execution = ExecutionModeBit;
119 CT.Info = ExecutionClassDescription;
120 CT.Instructions.push_back(std::move(IT));
121 CodeTemplates.push_back(std::move(CT));
124 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
125 // Select back-to-back non-memory instruction.
126 for (const auto OtherInstr :
127 computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
128 const AliasingConfigurations Forward(Instr, OtherInstr);
129 const AliasingConfigurations Back(OtherInstr, Instr);
130 InstructionTemplate ThisIT(Instr);
131 InstructionTemplate OtherIT(OtherInstr);
132 if (!Forward.hasImplicitAliasing())
133 setRandomAliasing(Forward, ThisIT, OtherIT);
134 if (!Back.hasImplicitAliasing())
135 setRandomAliasing(Back, OtherIT, ThisIT);
137 CT.Execution = ExecutionModeBit;
138 CT.Info = ExecutionClassDescription;
139 CT.Instructions.push_back(std::move(ThisIT));
140 CT.Instructions.push_back(std::move(OtherIT));
141 CodeTemplates.push_back(std::move(CT));
146 llvm_unreachable("Unhandled enum value");
150 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
152 llvm::Expected<std::vector<CodeTemplate>>
153 LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
154 std::vector<CodeTemplate> Results;
155 const ExecutionMode EM = getExecutionModes(Instr);
156 for (const auto EC : kExecutionClasses) {
157 for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
158 appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
160 if (!Results.empty())
164 return llvm::make_error<BenchmarkFailure>(
165 "No strategy found to make the execution serial");
166 return std::move(Results);
169 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
171 llvm::Expected<std::vector<BenchmarkMeasure>>
172 LatencyBenchmarkRunner::runMeasurements(
173 const FunctionExecutor &Executor) const {
174 // Cycle measurements include some overhead from the kernel. Repeat the
175 // measure several times and take the minimum value.
176 constexpr const int NumMeasurements = 30;
177 int64_t MinValue = std::numeric_limits<int64_t>::max();
178 const char *CounterName = State.getPfmCounters().CycleCounter;
180 llvm::report_fatal_error("sched model does not define a cycle counter");
181 for (size_t I = 0; I < NumMeasurements; ++I) {
182 auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
183 if (!ExpectedCounterValue)
184 return ExpectedCounterValue.takeError();
185 if (*ExpectedCounterValue < MinValue)
186 MinValue = *ExpectedCounterValue;
188 std::vector<BenchmarkMeasure> Result = {
189 BenchmarkMeasure::Create("latency", MinValue)};
190 return std::move(Result);
193 } // namespace exegesis