From 6bd5429ddf517c689be12c979127d6a7a4a068b0 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 29 Mar 2019 14:24:27 +0000 Subject: [PATCH] [NFC][llvm-exegesis] Refactor ResolvedSchedClass & friends Summary: `ResolvedSchedClass` will need to be used outside of `Analysis` (before `InstructionBenchmarkClustering` even), therefore promote it into a non-private top-level class, and while there also move all of the functions that are only called by `ResolvedSchedClass` into that same new file. Reviewers: courbet, gchatelet Reviewed By: courbet Subscribers: mgorny, tschuett, mgrang, jdoerfert, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59993 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357259 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-exegesis/lib/Analysis.cpp | 224 +------------------ tools/llvm-exegesis/lib/Analysis.h | 21 +- tools/llvm-exegesis/lib/CMakeLists.txt | 3 +- tools/llvm-exegesis/lib/SchedClassResolution.cpp | 243 +++++++++++++++++++++ tools/llvm-exegesis/lib/SchedClassResolution.h | 58 +++++ unittests/tools/llvm-exegesis/X86/CMakeLists.txt | 4 +- ...alysisTest.cpp => SchedClassResolutionTest.cpp} | 17 +- 7 files changed, 323 insertions(+), 247 deletions(-) create mode 100644 tools/llvm-exegesis/lib/SchedClassResolution.cpp create mode 100644 tools/llvm-exegesis/lib/SchedClassResolution.h rename unittests/tools/llvm-exegesis/X86/{AnalysisTest.cpp => SchedClassResolutionTest.cpp} (85%) diff --git a/tools/llvm-exegesis/lib/Analysis.cpp b/tools/llvm-exegesis/lib/Analysis.cpp index 632ba81b508..4e711bc2b91 100644 --- a/tools/llvm-exegesis/lib/Analysis.cpp +++ b/tools/llvm-exegesis/lib/Analysis.cpp @@ -20,16 +20,6 @@ namespace exegesis { static const char kCsvSep = ','; -static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI, - unsigned SchedClassId, - const llvm::MCInst &MCI) { - const auto &SM = STI.getSchedModel(); - while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant()) - SchedClassId = - STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID()); - return SchedClassId; -} - namespace { enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; @@ -150,9 +140,9 @@ void Analysis::printInstructionRowCsv(const size_t PointId, OS << kCsvSep; assert(!Point.Key.Instructions.empty()); const llvm::MCInst &MCI = Point.keyInstruction(); - const unsigned SchedClassId = resolveSchedClassId( - *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI); - + unsigned SchedClassId; + std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( + *SubtargetInfo_, *InstrInfo_, MCI); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const llvm::MCSchedClassDesc *const SCDesc = SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId); @@ -239,11 +229,11 @@ Analysis::makePointsPerSchedClass() const { // FIXME: we should be using the tuple of classes for instructions in the // snippet as key. const llvm::MCInst &MCI = Point.keyInstruction(); - unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass(); - const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel() - .getSchedClassDesc(SchedClassId) - ->isVariant(); - SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI); + unsigned SchedClassId; + bool WasVariant; + std::tie(SchedClassId, WasVariant) = + ResolvedSchedClass::resolveSchedClassId(*SubtargetInfo_, *InstrInfo_, + MCI); const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); if (IndexIt == SchedClassIdToIndex.end()) { // Create a new entry. @@ -347,92 +337,6 @@ void Analysis::printSchedClassClustersHtml( OS << ""; } -// Return the non-redundant list of WriteProcRes used by the given sched class. -// The scheduling model for LLVM is such that each instruction has a certain -// number of uops which consume resources which are described by WriteProcRes -// entries. Each entry describe how many cycles are spent on a specific ProcRes -// kind. -// For example, an instruction might have 3 uOps, one dispatching on P0 -// (ProcResIdx=1) and two on P06 (ProcResIdx = 7). -// Note that LLVM additionally denormalizes resource consumption to include -// usage of super resources by subresources. So in practice if there exists a -// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by -// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed -// by P06 are also consumed by P016. In the figure below, parenthesized cycles -// denote implied usage of superresources by subresources: -// P0 P06 P016 -// uOp1 1 (1) (1) -// uOp2 1 (1) -// uOp3 1 (1) -// ============================= -// 1 3 3 -// Eventually we end up with three entries for the WriteProcRes of the -// instruction: -// {ProcResIdx=1, Cycles=1} // P0 -// {ProcResIdx=7, Cycles=3} // P06 -// {ProcResIdx=10, Cycles=3} // P016 -// -// Note that in this case, P016 does not contribute any cycles, so it would -// be removed by this function. -// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca. -static llvm::SmallVector -getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc, - const llvm::MCSubtargetInfo &STI) { - llvm::SmallVector Result; - const auto &SM = STI.getSchedModel(); - const unsigned NumProcRes = SM.getNumProcResourceKinds(); - - // This assumes that the ProcResDescs are sorted in topological order, which - // is guaranteed by the tablegen backend. - llvm::SmallVector ProcResUnitUsage(NumProcRes); - for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc), - *const WPREnd = STI.getWriteProcResEnd(&SCDesc); - WPR != WPREnd; ++WPR) { - const llvm::MCProcResourceDesc *const ProcResDesc = - SM.getProcResource(WPR->ProcResourceIdx); - if (ProcResDesc->SubUnitsIdxBegin == nullptr) { - // This is a ProcResUnit. - Result.push_back({WPR->ProcResourceIdx, WPR->Cycles}); - ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles; - } else { - // This is a ProcResGroup. First see if it contributes any cycles or if - // it has cycles just from subunits. - float RemainingCycles = WPR->Cycles; - for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; - SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; - ++SubResIdx) { - RemainingCycles -= ProcResUnitUsage[*SubResIdx]; - } - if (RemainingCycles < 0.01f) { - // The ProcResGroup contributes no cycles of its own. - continue; - } - // The ProcResGroup contributes `RemainingCycles` cycles of its own. - Result.push_back({WPR->ProcResourceIdx, - static_cast(std::round(RemainingCycles))}); - // Spread the remaining cycles over all subunits. - for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; - SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; - ++SubResIdx) { - ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits; - } - } - } - return Result; -} - -Analysis::ResolvedSchedClass::ResolvedSchedClass( - const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId, - bool WasVariant) - : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)), - WasVariant(WasVariant), - NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)), - IdealizedProcResPressure(computeIdealizedProcResPressure( - STI.getSchedModel(), NonRedundantWriteProcRes)) { - assert((SCDesc == nullptr || !SCDesc->isVariant()) && - "ResolvedSchedClass should never be variant"); -} - void Analysis::SchedClassCluster::addPoint( size_t PointId, const InstructionBenchmarkClustering &Clustering) { PointIds.push_back(PointId); @@ -737,117 +641,5 @@ llvm::Error Analysis::run( return llvm::Error::success(); } -// Distributes a pressure budget as evenly as possible on the provided subunits -// given the already existing port pressure distribution. -// -// The algorithm is as follows: while there is remaining pressure to -// distribute, find the subunits with minimal pressure, and distribute -// remaining pressure equally up to the pressure of the unit with -// second-to-minimal pressure. -// For example, let's assume we want to distribute 2*P1256 -// (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is: -// DensePressure = P0 P1 P2 P3 P4 P5 P6 P7 -// 0.1 0.3 0.2 0.0 0.0 0.5 0.5 0.5 -// RemainingPressure = 2.0 -// We sort the subunits by pressure: -// Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)] -// We'll first start by the subunits with minimal pressure, which are at -// the beginning of the sorted array. In this example there is one (P2). -// The subunit with second-to-minimal pressure is the next one in the -// array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles -// from the budget. -// Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)] -// RemainingPressure = 1.9 -// We repeat this process: distribute 0.2 pressure on each of the minimal -// P2 and P1, decrease budget by 2*0.2: -// Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)] -// RemainingPressure = 1.5 -// There are no second-to-minimal subunits so we just share the remaining -// budget (1.5 cycles) equally: -// Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)] -// RemainingPressure = 0.0 -// We stop as there is no remaining budget to distribute. -void distributePressure(float RemainingPressure, - llvm::SmallVector Subunits, - llvm::SmallVector &DensePressure) { - // Find the number of subunits with minimal pressure (they are at the - // front). - llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) { - return DensePressure[A] < DensePressure[B]; - }); - const auto getPressureForSubunit = [&DensePressure, - &Subunits](size_t I) -> float & { - return DensePressure[Subunits[I]]; - }; - size_t NumMinimalSU = 1; - while (NumMinimalSU < Subunits.size() && - getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) { - ++NumMinimalSU; - } - while (RemainingPressure > 0.0f) { - if (NumMinimalSU == Subunits.size()) { - // All units are minimal, just distribute evenly and be done. - for (size_t I = 0; I < NumMinimalSU; ++I) { - getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; - } - return; - } - // Distribute the remaining pressure equally. - const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1); - const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU); - assert(MinimalPressure < SecondToMinimalPressure); - const float Increment = SecondToMinimalPressure - MinimalPressure; - if (RemainingPressure <= NumMinimalSU * Increment) { - // There is not enough remaining pressure. - for (size_t I = 0; I < NumMinimalSU; ++I) { - getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; - } - return; - } - // Bump all minimal pressure subunits to `SecondToMinimalPressure`. - for (size_t I = 0; I < NumMinimalSU; ++I) { - getPressureForSubunit(I) = SecondToMinimalPressure; - RemainingPressure -= SecondToMinimalPressure; - } - while (NumMinimalSU < Subunits.size() && - getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) { - ++NumMinimalSU; - } - } -} - -std::vector> computeIdealizedProcResPressure( - const llvm::MCSchedModel &SM, - llvm::SmallVector WPRS) { - // DensePressure[I] is the port pressure for Proc Resource I. - llvm::SmallVector DensePressure(SM.getNumProcResourceKinds()); - llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A, - const llvm::MCWriteProcResEntry &B) { - return A.ProcResourceIdx < B.ProcResourceIdx; - }); - for (const llvm::MCWriteProcResEntry &WPR : WPRS) { - // Get units for the entry. - const llvm::MCProcResourceDesc *const ProcResDesc = - SM.getProcResource(WPR.ProcResourceIdx); - if (ProcResDesc->SubUnitsIdxBegin == nullptr) { - // This is a ProcResUnit. - DensePressure[WPR.ProcResourceIdx] += WPR.Cycles; - } else { - // This is a ProcResGroup. - llvm::SmallVector Subunits(ProcResDesc->SubUnitsIdxBegin, - ProcResDesc->SubUnitsIdxBegin + - ProcResDesc->NumUnits); - distributePressure(WPR.Cycles, Subunits, DensePressure); - } - } - // Turn dense pressure into sparse pressure by removing zero entries. - std::vector> Pressure; - for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { - if (DensePressure[I] > 0.0f) - Pressure.emplace_back(I, DensePressure[I]); - } - return Pressure; -} - } // namespace exegesis } // namespace llvm diff --git a/tools/llvm-exegesis/lib/Analysis.h b/tools/llvm-exegesis/lib/Analysis.h index 36a3479b6df..15bd832af0a 100644 --- a/tools/llvm-exegesis/lib/Analysis.h +++ b/tools/llvm-exegesis/lib/Analysis.h @@ -15,6 +15,7 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_ANALYSIS_H #include "Clustering.h" +#include "SchedClassResolution.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstPrinter.h" @@ -51,19 +52,6 @@ public: private: using ClusterId = InstructionBenchmarkClustering::ClusterId; - // An llvm::MCSchedClassDesc augmented with some additional data. - struct ResolvedSchedClass { - ResolvedSchedClass(const llvm::MCSubtargetInfo &STI, - unsigned ResolvedSchedClassId, bool WasVariant); - - const unsigned SchedClassId; - const llvm::MCSchedClassDesc *const SCDesc; - const bool WasVariant; // Whether the original class was variant. - const llvm::SmallVector - NonRedundantWriteProcRes; - const std::vector> IdealizedProcResPressure; - }; - // Represents the intersection of a sched class and a cluster. class SchedClassCluster { public: @@ -137,13 +125,6 @@ private: const bool AnalysisDisplayUnstableOpcodes_; }; -// Computes the idealized ProcRes Unit pressure. This is the expected -// distribution if the CPU scheduler can distribute the load as evenly as -// possible. -std::vector> computeIdealizedProcResPressure( - const llvm::MCSchedModel &SM, - llvm::SmallVector WPRS); - } // namespace exegesis } // namespace llvm diff --git a/tools/llvm-exegesis/lib/CMakeLists.txt b/tools/llvm-exegesis/lib/CMakeLists.txt index ef85056db0a..ef1a411d9ef 100644 --- a/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/tools/llvm-exegesis/lib/CMakeLists.txt @@ -28,8 +28,9 @@ add_library(LLVMExegesis MCInstrDescView.cpp PerfHelper.cpp RegisterAliasing.cpp - SnippetGenerator.cpp RegisterValue.cpp + SchedClassResolution.cpp + SnippetGenerator.cpp Target.cpp Uops.cpp ) diff --git a/tools/llvm-exegesis/lib/SchedClassResolution.cpp b/tools/llvm-exegesis/lib/SchedClassResolution.cpp new file mode 100644 index 00000000000..c122ae91306 --- /dev/null +++ b/tools/llvm-exegesis/lib/SchedClassResolution.cpp @@ -0,0 +1,243 @@ +//===-- SchedClassResolution.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SchedClassResolution.h" +#include "BenchmarkResult.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/FormatVariadic.h" +#include +#include +#include + +namespace llvm { +namespace exegesis { + +// Return the non-redundant list of WriteProcRes used by the given sched class. +// The scheduling model for LLVM is such that each instruction has a certain +// number of uops which consume resources which are described by WriteProcRes +// entries. Each entry describe how many cycles are spent on a specific ProcRes +// kind. +// For example, an instruction might have 3 uOps, one dispatching on P0 +// (ProcResIdx=1) and two on P06 (ProcResIdx = 7). +// Note that LLVM additionally denormalizes resource consumption to include +// usage of super resources by subresources. So in practice if there exists a +// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by +// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed +// by P06 are also consumed by P016. In the figure below, parenthesized cycles +// denote implied usage of superresources by subresources: +// P0 P06 P016 +// uOp1 1 (1) (1) +// uOp2 1 (1) +// uOp3 1 (1) +// ============================= +// 1 3 3 +// Eventually we end up with three entries for the WriteProcRes of the +// instruction: +// {ProcResIdx=1, Cycles=1} // P0 +// {ProcResIdx=7, Cycles=3} // P06 +// {ProcResIdx=10, Cycles=3} // P016 +// +// Note that in this case, P016 does not contribute any cycles, so it would +// be removed by this function. +// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca. +static llvm::SmallVector +getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc, + const llvm::MCSubtargetInfo &STI) { + llvm::SmallVector Result; + const auto &SM = STI.getSchedModel(); + const unsigned NumProcRes = SM.getNumProcResourceKinds(); + + // This assumes that the ProcResDescs are sorted in topological order, which + // is guaranteed by the tablegen backend. + llvm::SmallVector ProcResUnitUsage(NumProcRes); + for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc), + *const WPREnd = STI.getWriteProcResEnd(&SCDesc); + WPR != WPREnd; ++WPR) { + const llvm::MCProcResourceDesc *const ProcResDesc = + SM.getProcResource(WPR->ProcResourceIdx); + if (ProcResDesc->SubUnitsIdxBegin == nullptr) { + // This is a ProcResUnit. + Result.push_back({WPR->ProcResourceIdx, WPR->Cycles}); + ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles; + } else { + // This is a ProcResGroup. First see if it contributes any cycles or if + // it has cycles just from subunits. + float RemainingCycles = WPR->Cycles; + for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; + SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; + ++SubResIdx) { + RemainingCycles -= ProcResUnitUsage[*SubResIdx]; + } + if (RemainingCycles < 0.01f) { + // The ProcResGroup contributes no cycles of its own. + continue; + } + // The ProcResGroup contributes `RemainingCycles` cycles of its own. + Result.push_back({WPR->ProcResourceIdx, + static_cast(std::round(RemainingCycles))}); + // Spread the remaining cycles over all subunits. + for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; + SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; + ++SubResIdx) { + ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits; + } + } + } + return Result; +} + +// Distributes a pressure budget as evenly as possible on the provided subunits +// given the already existing port pressure distribution. +// +// The algorithm is as follows: while there is remaining pressure to +// distribute, find the subunits with minimal pressure, and distribute +// remaining pressure equally up to the pressure of the unit with +// second-to-minimal pressure. +// For example, let's assume we want to distribute 2*P1256 +// (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is: +// DensePressure = P0 P1 P2 P3 P4 P5 P6 P7 +// 0.1 0.3 0.2 0.0 0.0 0.5 0.5 0.5 +// RemainingPressure = 2.0 +// We sort the subunits by pressure: +// Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)] +// We'll first start by the subunits with minimal pressure, which are at +// the beginning of the sorted array. In this example there is one (P2). +// The subunit with second-to-minimal pressure is the next one in the +// array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles +// from the budget. +// Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)] +// RemainingPressure = 1.9 +// We repeat this process: distribute 0.2 pressure on each of the minimal +// P2 and P1, decrease budget by 2*0.2: +// Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)] +// RemainingPressure = 1.5 +// There are no second-to-minimal subunits so we just share the remaining +// budget (1.5 cycles) equally: +// Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)] +// RemainingPressure = 0.0 +// We stop as there is no remaining budget to distribute. +static void distributePressure(float RemainingPressure, + llvm::SmallVector Subunits, + llvm::SmallVector &DensePressure) { + // Find the number of subunits with minimal pressure (they are at the + // front). + llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) { + return DensePressure[A] < DensePressure[B]; + }); + const auto getPressureForSubunit = [&DensePressure, + &Subunits](size_t I) -> float & { + return DensePressure[Subunits[I]]; + }; + size_t NumMinimalSU = 1; + while (NumMinimalSU < Subunits.size() && + getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) { + ++NumMinimalSU; + } + while (RemainingPressure > 0.0f) { + if (NumMinimalSU == Subunits.size()) { + // All units are minimal, just distribute evenly and be done. + for (size_t I = 0; I < NumMinimalSU; ++I) { + getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; + } + return; + } + // Distribute the remaining pressure equally. + const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1); + const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU); + assert(MinimalPressure < SecondToMinimalPressure); + const float Increment = SecondToMinimalPressure - MinimalPressure; + if (RemainingPressure <= NumMinimalSU * Increment) { + // There is not enough remaining pressure. + for (size_t I = 0; I < NumMinimalSU; ++I) { + getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; + } + return; + } + // Bump all minimal pressure subunits to `SecondToMinimalPressure`. + for (size_t I = 0; I < NumMinimalSU; ++I) { + getPressureForSubunit(I) = SecondToMinimalPressure; + RemainingPressure -= SecondToMinimalPressure; + } + while (NumMinimalSU < Subunits.size() && + getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) { + ++NumMinimalSU; + } + } +} + +std::vector> computeIdealizedProcResPressure( + const llvm::MCSchedModel &SM, + llvm::SmallVector WPRS) { + // DensePressure[I] is the port pressure for Proc Resource I. + llvm::SmallVector DensePressure(SM.getNumProcResourceKinds()); + llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A, + const llvm::MCWriteProcResEntry &B) { + return A.ProcResourceIdx < B.ProcResourceIdx; + }); + for (const llvm::MCWriteProcResEntry &WPR : WPRS) { + // Get units for the entry. + const llvm::MCProcResourceDesc *const ProcResDesc = + SM.getProcResource(WPR.ProcResourceIdx); + if (ProcResDesc->SubUnitsIdxBegin == nullptr) { + // This is a ProcResUnit. + DensePressure[WPR.ProcResourceIdx] += WPR.Cycles; + } else { + // This is a ProcResGroup. + llvm::SmallVector Subunits(ProcResDesc->SubUnitsIdxBegin, + ProcResDesc->SubUnitsIdxBegin + + ProcResDesc->NumUnits); + distributePressure(WPR.Cycles, Subunits, DensePressure); + } + } + // Turn dense pressure into sparse pressure by removing zero entries. + std::vector> Pressure; + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + if (DensePressure[I] > 0.0f) + Pressure.emplace_back(I, DensePressure[I]); + } + return Pressure; +} + +ResolvedSchedClass::ResolvedSchedClass(const llvm::MCSubtargetInfo &STI, + unsigned ResolvedSchedClassId, + bool WasVariant) + : SchedClassId(ResolvedSchedClassId), + SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)), + WasVariant(WasVariant), + NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)), + IdealizedProcResPressure(computeIdealizedProcResPressure( + STI.getSchedModel(), NonRedundantWriteProcRes)) { + assert((SCDesc == nullptr || !SCDesc->isVariant()) && + "ResolvedSchedClass should never be variant"); +} + +static unsigned ResolveVariantSchedClassId(const llvm::MCSubtargetInfo &STI, + unsigned SchedClassId, + const llvm::MCInst &MCI) { + const auto &SM = STI.getSchedModel(); + while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant()) + SchedClassId = + STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID()); + return SchedClassId; +} + +std::pair +ResolvedSchedClass::resolveSchedClassId( + const llvm::MCSubtargetInfo &SubtargetInfo, + const llvm::MCInstrInfo &InstrInfo, const llvm::MCInst &MCI) { + unsigned SchedClassId = InstrInfo.get(MCI.getOpcode()).getSchedClass(); + const bool WasVariant = SchedClassId && SubtargetInfo.getSchedModel() + .getSchedClassDesc(SchedClassId) + ->isVariant(); + SchedClassId = ResolveVariantSchedClassId(SubtargetInfo, SchedClassId, MCI); + return std::make_pair(SchedClassId, WasVariant); +} + +} // namespace exegesis +} // namespace llvm diff --git a/tools/llvm-exegesis/lib/SchedClassResolution.h b/tools/llvm-exegesis/lib/SchedClassResolution.h new file mode 100644 index 00000000000..00f65ee2edf --- /dev/null +++ b/tools/llvm-exegesis/lib/SchedClassResolution.h @@ -0,0 +1,58 @@ +//===-- SchedClassResolution.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Analysis output for benchmark results. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SCHEDCLASSRESOLUTION_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SCHEDCLASSRESOLUTION_H + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace exegesis { + +// Computes the idealized ProcRes Unit pressure. This is the expected +// distribution if the CPU scheduler can distribute the load as evenly as +// possible. +std::vector> computeIdealizedProcResPressure( + const llvm::MCSchedModel &SM, + llvm::SmallVector WPRS); + +// An llvm::MCSchedClassDesc augmented with some additional data. +struct ResolvedSchedClass { + ResolvedSchedClass(const llvm::MCSubtargetInfo &STI, + unsigned ResolvedSchedClassId, bool WasVariant); + + static std::pair + resolveSchedClassId(const llvm::MCSubtargetInfo &SubtargetInfo, + const llvm::MCInstrInfo &InstrInfo, + const llvm::MCInst &MCI); + + const unsigned SchedClassId; + const llvm::MCSchedClassDesc *const SCDesc; + const bool WasVariant; // Whether the original class was variant. + const llvm::SmallVector + NonRedundantWriteProcRes; + const std::vector> IdealizedProcResPressure; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_SCHEDCLASSRESOLUTION_H diff --git a/unittests/tools/llvm-exegesis/X86/CMakeLists.txt b/unittests/tools/llvm-exegesis/X86/CMakeLists.txt index 5078e7ba76c..88c38d0d7e8 100644 --- a/unittests/tools/llvm-exegesis/X86/CMakeLists.txt +++ b/unittests/tools/llvm-exegesis/X86/CMakeLists.txt @@ -15,10 +15,10 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(LLVMExegesisX86Tests AssemblerTest.cpp - AnalysisTest.cpp BenchmarkResultTest.cpp - SnippetGeneratorTest.cpp RegisterAliasingTest.cpp + SchedClassResolutionTest.cpp + SnippetGeneratorTest.cpp TargetTest.cpp ) target_link_libraries(LLVMExegesisX86Tests PRIVATE diff --git a/unittests/tools/llvm-exegesis/X86/AnalysisTest.cpp b/unittests/tools/llvm-exegesis/X86/SchedClassResolutionTest.cpp similarity index 85% rename from unittests/tools/llvm-exegesis/X86/AnalysisTest.cpp rename to unittests/tools/llvm-exegesis/X86/SchedClassResolutionTest.cpp index 76ca39138c4..9e745461142 100644 --- a/unittests/tools/llvm-exegesis/X86/AnalysisTest.cpp +++ b/unittests/tools/llvm-exegesis/X86/SchedClassResolutionTest.cpp @@ -1,4 +1,4 @@ -//===-- AnalysisTest.cpp ---------------------------------------*- C++ -*-===// +//===-- SchedClassResolutionTest.cpp ----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Analysis.h" +#include "SchedClassResolution.h" #include #include @@ -23,9 +23,9 @@ namespace { using testing::Pair; using testing::UnorderedElementsAre; -class AnalysisTest : public ::testing::Test { +class SchedClassResolutionTest : public ::testing::Test { protected: - AnalysisTest() { + SchedClassResolutionTest() { const std::string TT = "x86_64-unknown-linux"; std::string error; const llvm::Target *const TheTarget = @@ -78,20 +78,20 @@ protected: uint16_t P0156Idx = 0; }; -TEST_F(AnalysisTest, ComputeIdealizedProcResPressure_2P0) { +TEST_F(SchedClassResolutionTest, ComputeIdealizedProcResPressure_2P0) { const auto Pressure = computeIdealizedProcResPressure(STI->getSchedModel(), {{P0Idx, 2}}); EXPECT_THAT(Pressure, UnorderedElementsAre(Pair(P0Idx, 2.0))); } -TEST_F(AnalysisTest, ComputeIdealizedProcResPressure_2P05) { +TEST_F(SchedClassResolutionTest, ComputeIdealizedProcResPressure_2P05) { const auto Pressure = computeIdealizedProcResPressure(STI->getSchedModel(), {{P05Idx, 2}}); EXPECT_THAT(Pressure, UnorderedElementsAre(Pair(P0Idx, 1.0), Pair(P5Idx, 1.0))); } -TEST_F(AnalysisTest, ComputeIdealizedProcResPressure_2P05_2P0156) { +TEST_F(SchedClassResolutionTest, ComputeIdealizedProcResPressure_2P05_2P0156) { const auto Pressure = computeIdealizedProcResPressure( STI->getSchedModel(), {{P05Idx, 2}, {P0156Idx, 2}}); EXPECT_THAT(Pressure, @@ -99,7 +99,8 @@ TEST_F(AnalysisTest, ComputeIdealizedProcResPressure_2P05_2P0156) { Pair(P5Idx, 1.0), Pair(P6Idx, 1.0))); } -TEST_F(AnalysisTest, ComputeIdealizedProcResPressure_1P1_1P05_2P0156) { +TEST_F(SchedClassResolutionTest, + ComputeIdealizedProcResPressure_1P1_1P05_2P0156) { const auto Pressure = computeIdealizedProcResPressure( STI->getSchedModel(), {{P1Idx, 1}, {P05Idx, 1}, {P0156Idx, 2}}); EXPECT_THAT(Pressure, -- 2.11.0