.. code-block:: none
- # LLVM-MCA-BEGIN My Code Region
+ # LLVM-MCA-BEGIN
...
# LLVM-MCA-END
-Multiple regions can be specified provided that they do not overlap. A code
-region can have an optional description. If no user-defined region is specified,
-then :program:`llvm-mca` assumes a default region which contains every
-instruction in the input file. Every region is analyzed in isolation, and the
-final performance report is the union of all the reports generated for every
-code region.
+If no user-defined region is specified, then :program:`llvm-mca` assumes a
+default region which contains every instruction in the input file. Every region
+is analyzed in isolation, and the final performance report is the union of all
+the reports generated for every code region.
+
+Code regions can have names. For example:
+
+.. code-block:: none
+
+ # LLVM-MCA-BEGIN A simple example
+ add %eax, %eax
+ # LLVM-MCA-END
+
+The code from the example above defines a region named "A simple example" with a
+single instruction in it. Note how the region name doesn't have to be repeated
+in the ``LLVM-MCA-END`` directive. In the absence of overlapping regions,
+an anonymous ``LLVM-MCA-END`` directive always ends the currently active user
+defined region.
+
+Example of nesting regions:
+
+.. code-block:: none
+
+ # LLVM-MCA-BEGIN foo
+ add %eax, %edx
+ # LLVM-MCA-BEGIN bar
+ sub %eax, %edx
+ # LLVM-MCA-END bar
+ # LLVM-MCA-END foo
+
+Example of overlapping regions:
+
+.. code-block:: none
+
+ # LLVM-MCA-BEGIN foo
+ add %eax, %edx
+ # LLVM-MCA-BEGIN bar
+ sub %eax, %edx
+ # LLVM-MCA-END foo
+ add %eax, %edx
+ # LLVM-MCA-END bar
+
+Note that multiple anonymous regions cannot overlap. Also, overlapping regions
+cannot have the same name.
Inline assembly directives may be used from source code to annotate the
assembly text:
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck %s
+
+testloop:
+# LLVM-MCA-BEGIN upper
+ leal 42(%rdi), %eax
+# LLVM-MCA-BEGIN lower
+ imull %esi, %eax
+# LLVM-MCA-END upper
+ leal 42(%rdi), %eax
+# LLVM-MCA-END lower
+ imull %esi, %eax
+
+# CHECK: [0] Code Region - upper
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 205
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.46
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
+# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: 0.99 1.01 - - - - - - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - leal 42(%rdi), %eax
+# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %eax
+
+# CHECK: [1] Code Region - lower
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 204
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.47
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: 1.00 1.00 - - - - - - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %eax
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - leal 42(%rdi), %eax
--- /dev/null
+# RUN: not llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 %s 2>&1 | FileCheck %s
+
+# LLVM-MCA-BEGIN foo
+add %eax, %eax
+# LLVM-MCA-BEGIN foo
+add %eax, %eax
+
+# CHECK: llvm-mca-markers-11.s:5:2: error: overlapping regions cannot have the same name
+# CHECK-NEXT: # LLVM-MCA-BEGIN foo
+# CHECK-NEXT: ^
+# CHECK-NEXT: llvm-mca-markers-11.s:3:2: note: region foo was previously defined here
+# CHECK-NEXT: # LLVM-MCA-BEGIN foo
+# CHECK-NEXT: ^
--- /dev/null
+# RUN: not llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 %s 2>&1 | FileCheck %s
+
+# LLVM-MCA-BEGIN
+add %eax, %eax
+# LLVM-MCA-BEGIN
+add %eax, %eax
+
+# CHECK: llvm-mca-markers-12.s:5:2: error: found multiple overlapping anonymous regions
+# CHECK-NEXT: # LLVM-MCA-BEGIN
+# CHECK-NEXT: ^
+# CHECK-NEXT: llvm-mca-markers-12.s:3:2: note: Previous anonymous region was defined here
+# CHECK-NEXT: # LLVM-MCA-BEGIN
+# CHECK-NEXT: ^
# LLVM-MCA-END
-# CHECK: llvm-mca-markers-6.s:5:2: warning: Ignoring invalid region start
-# CHECK-NEXT: # LLVM-MCA-BEGIN bar
+# CHECK: llvm-mca-markers-6.s:7:2: error: found an invalid region end directive
+# CHECK-NEXT: # LLVM-MCA-END
+# CHECK-NEXT: ^
+# CHECK-NEXT: llvm-mca-markers-6.s:7:2: note: unable to find an active anonymous region
+# CHECK-NEXT: # LLVM-MCA-END
# CHECK-NEXT: ^
-# CHECK-NEXT: error: no assembly instructions found.
# LLVM-MCA-END
-# CHECK: llvm-mca-markers-7.s:7:2: warning: Ignoring invalid region end
+# CHECK: llvm-mca-markers-7.s:7:2: error: found an invalid region end directive
+# CHECK-NEXT: # LLVM-MCA-END
+# CHECK-NEXT: ^
+# CHECK-NEXT: llvm-mca-markers-7.s:7:2: note: unable to find an active anonymous region
# CHECK-NEXT: # LLVM-MCA-END
# CHECK-NEXT: ^
--- /dev/null
+# RUN: not llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 %s 2>&1 | FileCheck %s
+
+# LLVM-MCA-END foo
+
+# CHECK: llvm-mca-markers-8.s:3:2: error: found an invalid region end directive
+# CHECK-NEXT: # LLVM-MCA-END foo
+# CHECK-NEXT: ^
+# CHECK-NEXT: llvm-mca-markers-8.s:3:2: note: unable to find an active region named foo
+# CHECK-NEXT: # LLVM-MCA-END foo
+# CHECK-NEXT: ^
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck %s
+
+testloop:
+# LLVM-MCA-BEGIN outer
+ leal 42(%rdi), %eax
+# LLVM-MCA-BEGIN inner
+ imull %esi, %eax
+# LLVM-MCA-END inner
+ leal 42(%rdi), %eax
+# LLVM-MCA-END outer
+ imull %esi, %eax
+
+# CHECK: [0] Code Region - outer
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 300
+# CHECK-NEXT: Total Cycles: 205
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 1.95
+# CHECK-NEXT: IPC: 1.46
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
+# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+# CHECK-NEXT: 1 1 0.50 leal 42(%rdi), %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: 1.00 2.00 - - - - - - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - - - - - - - leal 42(%rdi), %eax
+# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %eax
+# CHECK-NEXT: 1.00 - - - - - - - - - - - - - leal 42(%rdi), %eax
+
+# CHECK: [1] Code Region - inner
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 100
+# CHECK-NEXT: Total Cycles: 303
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 2
+# CHECK-NEXT: uOps Per Cycle: 0.66
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 3 1.00 imull %esi, %eax
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - JALU0
+# CHECK-NEXT: [1] - JALU1
+# CHECK-NEXT: [2] - JDiv
+# CHECK-NEXT: [3] - JFPA
+# CHECK-NEXT: [4] - JFPM
+# CHECK-NEXT: [5] - JFPU0
+# CHECK-NEXT: [6] - JFPU1
+# CHECK-NEXT: [7] - JLAGU
+# CHECK-NEXT: [8] - JMul
+# CHECK-NEXT: [9] - JSAGU
+# CHECK-NEXT: [10] - JSTC
+# CHECK-NEXT: [11] - JVALU0
+# CHECK-NEXT: [12] - JVALU1
+# CHECK-NEXT: [13] - JVIMUL
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
+# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
+# CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %esi, %eax
namespace llvm {
namespace mca {
-CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S) {
+CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {
// Create a default region for the input code sequence.
Regions.emplace_back(make_unique<CodeRegion>("", SMLoc()));
}
}
void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
- assert(!Regions.empty() && "Missing Default region");
- const CodeRegion &CurrentRegion = *Regions.back();
- if (CurrentRegion.startLoc().isValid() && !CurrentRegion.endLoc().isValid()) {
- SM.PrintMessage(Loc, SourceMgr::DK_Warning,
- "Ignoring invalid region start");
- return;
+ if (ActiveRegions.empty()) {
+ // Remove the default region if there is at least one user defined region.
+ // By construction, only the default region has an invalid start location.
+ if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() &&
+ !Regions[0]->endLoc().isValid()) {
+ ActiveRegions[Description] = 0;
+ Regions[0] = make_unique<CodeRegion>(Description, Loc);
+ return;
+ }
+ } else {
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ const CodeRegion &R = *Regions[It->second];
+ if (Description.empty()) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ "found multiple overlapping anonymous regions");
+ SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note,
+ "Previous anonymous region was defined here");
+ FoundErrors = true;
+ return;
+ }
+
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ "overlapping regions cannot have the same name");
+ SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note,
+ "region " + Description + " was previously defined here");
+ FoundErrors = true;
+ return;
+ }
}
- // Remove the default region if there are user defined regions.
- if (!CurrentRegion.startLoc().isValid())
- Regions.erase(Regions.begin());
+ ActiveRegions[Description] = Regions.size();
Regions.emplace_back(make_unique<CodeRegion>(Description, Loc));
+ return;
}
-void CodeRegions::endRegion(SMLoc Loc) {
- assert(!Regions.empty() && "Missing Default region");
- CodeRegion &CurrentRegion = *Regions.back();
- if (CurrentRegion.endLoc().isValid()) {
- SM.PrintMessage(Loc, SourceMgr::DK_Warning,
- "Ignoring invalid region end");
+void CodeRegions::endRegion(StringRef Description, SMLoc Loc) {
+ if (Description.empty()) {
+ // Special case where there is only one user defined region,
+ // and this LLVM-MCA-END directive doesn't provide a region name.
+ // In this case, we assume that the user simply wanted to just terminate
+ // the only active region.
+ if (ActiveRegions.size() == 1) {
+ auto It = ActiveRegions.begin();
+ Regions[It->second]->setEndLocation(Loc);
+ ActiveRegions.erase(It);
+ return;
+ }
+
+ // Special case where the region end marker applies to the default region.
+ if (ActiveRegions.empty() && Regions.size() == 1 &&
+ !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) {
+ Regions[0]->setEndLocation(Loc);
+ return;
+ }
+ }
+
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ Regions[It->second]->setEndLocation(Loc);
+ ActiveRegions.erase(It);
return;
}
- CurrentRegion.setEndLocation(Loc);
+ FoundErrors = true;
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ "found an invalid region end directive");
+ if (!Description.empty()) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Note,
+ "unable to find an active region named " + Description);
+ } else {
+ SM.PrintMessage(Loc, SourceMgr::DK_Note,
+ "unable to find an active anonymous region");
+ }
}
void CodeRegions::addInstruction(const MCInst &Instruction) {
- const SMLoc &Loc = Instruction.getLoc();
- const auto It =
- std::find_if(Regions.rbegin(), Regions.rend(),
- [Loc](const UniqueCodeRegion &Region) {
- return Region->isLocInRange(Loc);
- });
- if (It != Regions.rend())
- (*It)->addInstruction(Instruction);
+ SMLoc Loc = Instruction.getLoc();
+ for (UniqueCodeRegion &Region : Regions)
+ if (Region->isLocInRange(Loc))
+ Region->addInstruction(Instruction);
}
} // namespace mca
llvm::StringRef getDescription() const { return Description; }
};
+class CodeRegionParseError final : public Error {};
+
class CodeRegions {
// A source manager. Used by the tool to generate meaningful warnings.
llvm::SourceMgr &SM;
using UniqueCodeRegion = std::unique_ptr<CodeRegion>;
std::vector<UniqueCodeRegion> Regions;
+ llvm::StringMap<unsigned> ActiveRegions;
+ bool FoundErrors;
CodeRegions(const CodeRegions &) = delete;
CodeRegions &operator=(const CodeRegions &) = delete;
const_iterator end() const { return Regions.cend(); }
void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
- void endRegion(llvm::SMLoc Loc);
+ void endRegion(llvm::StringRef Description, llvm::SMLoc Loc);
void addInstruction(const llvm::MCInst &Instruction);
llvm::SourceMgr &getSourceMgr() const { return SM; }
return Region->empty();
});
}
+
+ bool isValid() const { return !FoundErrors; }
};
} // namespace mca
Comment = Comment.drop_front(Position);
if (Comment.consume_front("LLVM-MCA-END")) {
- Regions.endRegion(Loc);
+ // Skip spaces and tabs.
+ Position = Comment.find_first_not_of(" \t");
+ if (Position < Comment.size())
+ Comment = Comment.drop_front(Position);
+ Regions.endRegion(Comment, Loc);
return;
}
return 1;
}
const mca::CodeRegions &Regions = *RegionsOrErr;
+
+ // Early exit if errors were found by the code region parsing logic.
+ if (!Regions.isValid())
+ return 1;
+
if (Regions.empty()) {
WithColor::error() << "no assembly instructions found.\n";
return 1;