[llvm-mca] Move the logic that computes the block throughput into Support.h. NFC

author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Fri, 1 Jun 2018 14:35:21 +0000 (14:35 +0000)

committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Fri, 1 Jun 2018 14:35:21 +0000 (14:35 +0000)
author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Fri, 1 Jun 2018 14:35:21 +0000 (14:35 +0000)
committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Fri, 1 Jun 2018 14:35:21 +0000 (14:35 +0000)
diff --git a/tools/llvm-mca/SummaryView.cpp b/tools/llvm-mca/SummaryView.cpp

index c4746c6..5cb5c23 100644 (file)
--- a/tools/llvm-mca/SummaryView.cpp
+++ b/tools/llvm-mca/SummaryView.cpp
@@ -24,6 +24,14 @@ namespace mca {
  
  using namespace llvm;
  
+SummaryView::SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
+                         unsigned Width)
+    : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
+      NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+      ProcResourceMasks(Model.getNumProcResourceKinds(), 0) {
+  computeProcResourceMasks(SM, ProcResourceMasks);
+}
+
  void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
    // We are only interested in the "instruction dispatched" events generated by
    // the dispatch stage for instructions that are part of iteration #0.
@@ -41,48 +49,14 @@ void SummaryView::onInstructionEvent(const HWInstructionEvent &Event) {
    const InstrDesc &Desc = Inst.getDesc();
    NumMicroOps += Desc.NumMicroOps;
    for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
-    if (!RU.second.size())
-      continue;
-
-    assert(RU.second.NumUnits && "Expected more than one unit used!");
-    if (ProcResourceUsage.find(RU.first) == ProcResourceUsage.end()) {
-      ProcResourceUsage[RU.first] = RU.second.size();
-      continue;
-    }
-
-    ProcResourceUsage[RU.first] += RU.second.size();
-  }
-}
-
-double SummaryView::getBlockRThroughput() const {
-  assert(NumMicroOps && "Expected at least one micro opcode!");
-
-  SmallVector<uint64_t, 8> Masks(SM.getNumProcResourceKinds());
-  computeProcResourceMasks(SM, Masks);
-
-  // The block throughput is bounded from above by the hardware dispatch
-  // throughput. That is because the DispatchWidth is an upper bound on the
-  // number of opcodes that can be part of a single dispatch group.
-  double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
-
-  // The block throughput is also limited by the amount of hardware parallelism.
-  // The number of available resource units affects the resource pressure
-  // distributed, as well as how many blocks can be executed every cycle.
-  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
-    uint64_t Mask = Masks[I];
-    const auto It = ProcResourceUsage.find_as(Mask);
-    if (It != ProcResourceUsage.end()) {
-      const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
-      unsigned NumUnits = MCDesc.NumUnits;
-      double Throughput = static_cast<double>(It->second) / NumUnits;
-      Max = std::max(Max, Throughput);
+    if (RU.second.size()) {
+      const auto It = find(ProcResourceMasks, RU.first);
+      assert(It != ProcResourceMasks.end() &&
+             "Invalid processor resource mask!");
+      ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] +=
+          RU.second.size();
      }
    }
-
-  // The block reciprocal throughput is computed as the MAX of:
-  //  -  (#uOps / DispatchWidth)
-  //  -  (#units / resource cycles) for every consumed processor resource.
-  return Max;
  }
  
  void SummaryView::printView(raw_ostream &OS) const {
@@ -90,7 +64,8 @@ void SummaryView::printView(raw_ostream &OS) const {
    unsigned Instructions = Source.size();
    unsigned TotalInstructions = Instructions * Iterations;
    double IPC = (double)TotalInstructions / TotalCycles;
-  double BlockRThroughput = getBlockRThroughput();
+  double BlockRThroughput = computeBlockRThroughput(
+      SM, DispatchWidth, NumMicroOps, ProcResourceUsage);
  
    std::string Buffer;
    raw_string_ostream TempStream(Buffer);
diff --git a/tools/llvm-mca/SummaryView.h b/tools/llvm-mca/SummaryView.h

index fe8a5e2..04f4a87 100644 (file)
--- a/tools/llvm-mca/SummaryView.h
+++ b/tools/llvm-mca/SummaryView.h
@@ -45,10 +45,15 @@ class SummaryView : public View {
    unsigned TotalCycles;
    // The total number of micro opcodes contributed by a block of instructions.
    unsigned NumMicroOps;
-  // For each processor resource, this map stores the cumulative number of
-  // resource cycles consumed by a block of instructions. The resource mask ID
-  // is used as the key value to access elements of this map.
-  llvm::DenseMap<uint64_t, unsigned> ProcResourceUsage;
+  // For each processor resource, this vector stores the cumulative number of
+  // resource cycles consumed by the analyzed code block.
+  llvm::SmallVector<unsigned, 8> ProcResourceUsage;
+
+  // Each processor resource is associated with a so-called processor resource
+  // mask. This vector allows to correlate processor resource IDs with processor
+  // resource masks. There is exactly one element per each processor resource
+  // declared by the scheduling model.
+  llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
  
    // Compute the reciprocal throughput for the analyzed code block.
    // The reciprocal block throughput is computed as the MAX between:
@@ -58,9 +63,7 @@ class SummaryView : public View {
  
  public:
    SummaryView(const llvm::MCSchedModel &Model, const SourceMgr &S,
-              unsigned Width)
-      : SM(Model), Source(S), DispatchWidth(Width), TotalCycles(0),
-        NumMicroOps(0) {}
+              unsigned Width);
  
    void onCycleEnd() override { ++TotalCycles; }
  
diff --git a/tools/llvm-mca/Support.cpp b/tools/llvm-mca/Support.cpp

index e97cc51..fac237f 100644 (file)
--- a/tools/llvm-mca/Support.cpp
+++ b/tools/llvm-mca/Support.cpp
@@ -48,4 +48,32 @@ void computeProcResourceMasks(const MCSchedModel &SM,
      ProcResourceID++;
    }
  }
+
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+                               unsigned NumMicroOps,
+                               ArrayRef<unsigned> ProcResourceUsage) {
+  // The block throughput is bounded from above by the hardware dispatch
+  // throughput. That is because the DispatchWidth is an upper bound on the
+  // number of opcodes that can be part of a single dispatch group.
+  double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+  // The block throughput is also limited by the amount of hardware parallelism.
+  // The number of available resource units affects the resource pressure
+  // distribution, as well as how many blocks can be executed every cycle.
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    unsigned ResourceCycles = ProcResourceUsage[I];
+    if (!ResourceCycles)
+      continue;
+
+    const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+    double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
+    Max = std::max(Max, Throughput);
+  }
+
+  // The block reciprocal throughput is computed as the MAX of:
+  //  - (NumMicroOps / DispatchWidth)
+  //  - (NumUnits / ResourceCycles)   for every consumed processor resource.
+  return Max;
+}
+
  } // namespace mca
diff --git a/tools/llvm-mca/Support.h b/tools/llvm-mca/Support.h

index a29ef8b..98302e7 100644 (file)
--- a/tools/llvm-mca/Support.h
+++ b/tools/llvm-mca/Support.h
@@ -15,6 +15,7 @@
  #ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H
  #define LLVM_TOOLS_LLVM_MCA_SUPPORT_H
  
+#include "llvm/ADT/ArrayRef.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/MC/MCSchedule.h"
  
@@ -44,6 +45,14 @@ namespace mca {
  /// problems with simple bit manipulation operations.
  void computeProcResourceMasks(const llvm::MCSchedModel &SM,
                                llvm::SmallVectorImpl<uint64_t> &Masks);
+
+/// Compute the reciprocal block throughput from a set of processor resource
+/// cycles. The reciprocal block throughput is computed as the MAX between:
+///  - NumMicroOps / DispatchWidth
+///  - ProcResourceCycles / #ProcResourceUnits  (for every consumed resource).
+double computeBlockRThroughput(const llvm::MCSchedModel &SM,
+                               unsigned DispatchWidth, unsigned NumMicroOps,
+                               llvm::ArrayRef<unsigned> ProcResourceUsage);
  } // namespace mca
  
  #endif
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Fri, 1 Jun 2018 14:35:21 +0000 (14:35 +0000)
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Fri, 1 Jun 2018 14:35:21 +0000 (14:35 +0000)
tools/llvm-mca/SummaryView.cpp		patch \| blob \| history
tools/llvm-mca/SummaryView.h		patch \| blob \| history
tools/llvm-mca/Support.cpp		patch \| blob \| history
tools/llvm-mca/Support.h		patch \| blob \| history