From 0f51d6d4e975f86e455e1059d97b67d2baed27cd Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Mon, 18 Jun 2018 14:00:30 +0000 Subject: [PATCH] [llvm-mca] Add tests for XOP and AVX512 instructions that implicitly clear the upper portion of a super-register. When the destination register of a XOP instruction is an XMM register, bits [255:128] of the corresponding YMM register are cleared. When the destination register of a EVEX encoded instruction is an XMM/YMM register, the upper bits of the corresponding ZMM are cleared. On processors that feature AVX512, a write to an XMM registers always clears the upper portion of the corresponding ZMM register if the instruction is VEX or EVEX encoded. These new tests show some interesting cases which aren't correctly analyzed by llvm-mca. The lack of knowledge related to the implicit update on the super-registers is addressed by D48225. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334945 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/Generic/avx512-super-registers-1.s | 86 ++++++++++++++++++++++ .../X86/Generic/avx512-super-registers-2.s | 86 ++++++++++++++++++++++ .../X86/Generic/avx512-super-registers-3.s | 86 ++++++++++++++++++++++ .../llvm-mca/X86/Generic/xop-super-registers-1.s | 86 ++++++++++++++++++++++ .../llvm-mca/X86/Generic/xop-super-registers-2.s | 86 ++++++++++++++++++++++ 5 files changed, 430 insertions(+) create mode 100644 test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s create mode 100644 test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s create mode 100644 test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s create mode 100644 test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s create mode 100644 test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s diff --git a/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s b/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s new file mode 100644 index 00000000000..52e6fc4f302 --- /dev/null +++ b/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s + + vmulps %zmm0, %zmm1, %zmm2 + vaddps %xmm1, %xmm1, %xmm2 + vmulps %ymm2, %ymm3, %ymm4 + vaddps %xmm4, %xmm5, %xmm6 + vmulps %xmm6, %xmm3, %xmm4 + vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 600 +# CHECK-NEXT: Total Cycles: 2103 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 1 5 1.00 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: 1 5 1.00 vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SBDivider +# CHECK-NEXT: [1] - SBFPDivider +# CHECK-NEXT: [2] - SBPort0 +# CHECK-NEXT: [3] - SBPort1 +# CHECK-NEXT: [4] - SBPort4 +# CHECK-NEXT: [5] - SBPort5 +# CHECK-NEXT: [6.0] - SBPort23 +# CHECK-NEXT: [6.1] - SBPort23 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] +# CHECK-NEXT: - - 3.00 3.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 01234 + +# CHECK: [0,0] DeeeeeER . . . . . . . . vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: [0,1] DeeeE--R . . . . . . . . vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [0,3] D==========eeeER . . . . . . vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: [0,4] .D============eeeeeER . . . . . vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: [0,5] .D=================eeeER . . . . . vaddps %xmm4, %xmm5, %xmm0 +# CHECK-NEXT: [1,0] .D====================eeeeeER . . . . vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: [1,1] .DeeeE----------------------R . . . . vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D========================eeeeeER . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [1,3] . D=============================eeeER . . vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: [1,4] . D================================eeeeeER . vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: [1,5] . D=====================================eeeER vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 11.0 0.5 0.0 vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: 1. 2 1.0 1.0 12.0 vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 2. 2 15.5 0.0 0.0 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 3. 2 20.5 0.0 0.0 vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: 4. 2 23.0 0.0 0.0 vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: 5. 2 28.0 0.0 0.0 vaddps %xmm4, %xmm5, %xmm0 diff --git a/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s b/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s new file mode 100644 index 00000000000..b63c1c80e9b --- /dev/null +++ b/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s + + vmulps %zmm0, %zmm1, %zmm2 + vaddps %ymm1, %ymm1, %ymm2 + vmulps %zmm2, %zmm3, %zmm4 + vaddps %xmm4, %xmm5, %xmm6 + vmulps %xmm6, %xmm3, %xmm4 + vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 600 +# CHECK-NEXT: Total Cycles: 2103 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %ymm1, %ymm1, %ymm2 +# CHECK-NEXT: 1 5 1.00 vmulps %zmm2, %zmm3, %zmm4 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: 1 5 1.00 vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SBDivider +# CHECK-NEXT: [1] - SBFPDivider +# CHECK-NEXT: [2] - SBPort0 +# CHECK-NEXT: [3] - SBPort1 +# CHECK-NEXT: [4] - SBPort4 +# CHECK-NEXT: [5] - SBPort5 +# CHECK-NEXT: [6.0] - SBPort23 +# CHECK-NEXT: [6.1] - SBPort23 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] +# CHECK-NEXT: - - 3.00 3.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm1, %ymm1, %ymm2 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %zmm2, %zmm3, %zmm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 01234 + +# CHECK: [0,0] DeeeeeER . . . . . . . . vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: [0,1] DeeeE--R . . . . . . . . vaddps %ymm1, %ymm1, %ymm2 +# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . . . vmulps %zmm2, %zmm3, %zmm4 +# CHECK-NEXT: [0,3] D==========eeeER . . . . . . vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: [0,4] .D============eeeeeER . . . . . vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: [0,5] .D=================eeeER . . . . . vaddps %xmm4, %xmm5, %xmm0 +# CHECK-NEXT: [1,0] .D====================eeeeeER . . . . vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: [1,1] .DeeeE----------------------R . . . . vaddps %ymm1, %ymm1, %ymm2 +# CHECK-NEXT: [1,2] . D========================eeeeeER . . . vmulps %zmm2, %zmm3, %zmm4 +# CHECK-NEXT: [1,3] . D=============================eeeER . . vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: [1,4] . D================================eeeeeER . vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: [1,5] . D=====================================eeeER vaddps %xmm4, %xmm5, %xmm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 11.0 0.5 0.0 vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: 1. 2 1.0 1.0 12.0 vaddps %ymm1, %ymm1, %ymm2 +# CHECK-NEXT: 2. 2 15.5 0.0 0.0 vmulps %zmm2, %zmm3, %zmm4 +# CHECK-NEXT: 3. 2 20.5 0.0 0.0 vaddps %xmm4, %xmm5, %xmm6 +# CHECK-NEXT: 4. 2 23.0 0.0 0.0 vmulps %xmm6, %xmm3, %xmm4 +# CHECK-NEXT: 5. 2 28.0 0.0 0.0 vaddps %xmm4, %xmm5, %xmm0 diff --git a/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s b/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s new file mode 100644 index 00000000000..9dde249c5d3 --- /dev/null +++ b/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s + + vmulps %zmm0, %zmm1, %zmm2 + vaddps %xmm16, %xmm17, %xmm2 + vmulps %ymm2, %ymm3, %ymm4 + vaddps %xmm4, %xmm18, %xmm6 + vmulps %xmm6, %xmm19, %xmm4 + vaddps %xmm4, %xmm20, %xmm0 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 600 +# CHECK-NEXT: Total Cycles: 2103 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm16, %xmm17, %xmm2 +# CHECK-NEXT: 1 5 1.00 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm4, %xmm18, %xmm6 +# CHECK-NEXT: 1 5 1.00 vmulps %xmm6, %xmm19, %xmm4 +# CHECK-NEXT: 1 3 1.00 vaddps %xmm4, %xmm20, %xmm0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SBDivider +# CHECK-NEXT: [1] - SBFPDivider +# CHECK-NEXT: [2] - SBPort0 +# CHECK-NEXT: [3] - SBPort1 +# CHECK-NEXT: [4] - SBPort4 +# CHECK-NEXT: [5] - SBPort5 +# CHECK-NEXT: [6.0] - SBPort23 +# CHECK-NEXT: [6.1] - SBPort23 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] +# CHECK-NEXT: - - 3.00 3.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm16, %xmm17, %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm4, %xmm18, %xmm6 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm6, %xmm19, %xmm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm4, %xmm20, %xmm0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 01234 + +# CHECK: [0,0] DeeeeeER . . . . . . . . vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: [0,1] DeeeE--R . . . . . . . . vaddps %xmm16, %xmm17, %xmm2 +# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [0,3] D==========eeeER . . . . . . vaddps %xmm4, %xmm18, %xmm6 +# CHECK-NEXT: [0,4] .D============eeeeeER . . . . . vmulps %xmm6, %xmm19, %xmm4 +# CHECK-NEXT: [0,5] .D=================eeeER . . . . . vaddps %xmm4, %xmm20, %xmm0 +# CHECK-NEXT: [1,0] .D====================eeeeeER . . . . vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: [1,1] .DeeeE----------------------R . . . . vaddps %xmm16, %xmm17, %xmm2 +# CHECK-NEXT: [1,2] . D========================eeeeeER . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [1,3] . D=============================eeeER . . vaddps %xmm4, %xmm18, %xmm6 +# CHECK-NEXT: [1,4] . D================================eeeeeER . vmulps %xmm6, %xmm19, %xmm4 +# CHECK-NEXT: [1,5] . D=====================================eeeER vaddps %xmm4, %xmm20, %xmm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 11.0 0.5 0.0 vmulps %zmm0, %zmm1, %zmm2 +# CHECK-NEXT: 1. 2 1.0 1.0 12.0 vaddps %xmm16, %xmm17, %xmm2 +# CHECK-NEXT: 2. 2 15.5 0.0 0.0 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 3. 2 20.5 0.0 0.0 vaddps %xmm4, %xmm18, %xmm6 +# CHECK-NEXT: 4. 2 23.0 0.0 0.0 vmulps %xmm6, %xmm19, %xmm4 +# CHECK-NEXT: 5. 2 28.0 0.0 0.0 vaddps %xmm4, %xmm20, %xmm0 diff --git a/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s b/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s new file mode 100644 index 00000000000..94cf5fcc4f6 --- /dev/null +++ b/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s + + vmulps %ymm0, %ymm1, %ymm2 + vfrczpd %xmm1, %xmm2 + vmulps %ymm2, %ymm3, %ymm4 + vaddps %ymm4, %ymm5, %ymm6 + vmulps %ymm6, %ymm3, %ymm4 + vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 600 +# CHECK-NEXT: Total Cycles: 2103 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 3 1.00 vfrczpd %xmm1, %xmm2 +# CHECK-NEXT: 1 5 1.00 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: 1 5 1.00 vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SBDivider +# CHECK-NEXT: [1] - SBFPDivider +# CHECK-NEXT: [2] - SBPort0 +# CHECK-NEXT: [3] - SBPort1 +# CHECK-NEXT: [4] - SBPort4 +# CHECK-NEXT: [5] - SBPort5 +# CHECK-NEXT: [6.0] - SBPort23 +# CHECK-NEXT: [6.1] - SBPort23 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] +# CHECK-NEXT: - - 3.00 3.00 - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - 1.00 - - - - vfrczpd %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 01234 + +# CHECK: [0,0] DeeeeeER . . . . . . . . vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,1] DeeeE--R . . . . . . . . vfrczpd %xmm1, %xmm2 +# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [0,3] D==========eeeER . . . . . . vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [0,4] .D============eeeeeER . . . . . vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [0,5] .D=================eeeER . . . . . vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: [1,0] .D====================eeeeeER . . . . vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [1,1] .DeeeE----------------------R . . . . vfrczpd %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D========================eeeeeER . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [1,3] . D=============================eeeER . . vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [1,4] . D================================eeeeeER . vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [1,5] . D=====================================eeeER vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 11.0 0.5 0.0 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1. 2 1.0 1.0 12.0 vfrczpd %xmm1, %xmm2 +# CHECK-NEXT: 2. 2 15.5 0.0 0.0 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 3. 2 20.5 0.0 0.0 vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: 4. 2 23.0 0.0 0.0 vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: 5. 2 28.0 0.0 0.0 vaddps %ymm4, %ymm5, %ymm0 diff --git a/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s b/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s new file mode 100644 index 00000000000..5d656b7bc50 --- /dev/null +++ b/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s + + vmulps %ymm0, %ymm1, %ymm2 + vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2 + vmulps %ymm2, %ymm3, %ymm4 + vaddps %ymm4, %ymm5, %ymm6 + vmulps %ymm6, %ymm3, %ymm4 + vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 600 +# CHECK-NEXT: Total Cycles: 2103 +# CHECK-NEXT: Dispatch Width: 4 +# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 1.00 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2 +# CHECK-NEXT: 1 5 1.00 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: 1 5 1.00 vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: 1 3 1.00 vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SBDivider +# CHECK-NEXT: [1] - SBFPDivider +# CHECK-NEXT: [2] - SBPort0 +# CHECK-NEXT: [3] - SBPort1 +# CHECK-NEXT: [4] - SBPort4 +# CHECK-NEXT: [5] - SBPort5 +# CHECK-NEXT: [6.0] - SBPort23 +# CHECK-NEXT: [6.1] - SBPort23 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] +# CHECK-NEXT: - - 3.00 2.00 - 1.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - 1.00 - - vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: - - 1.00 - - - - - vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 01234 + +# CHECK: [0,0] DeeeeeER . . . . . . . . vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,1] DeE----R . . . . . . . . vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [0,3] D==========eeeER . . . . . . vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [0,4] .D============eeeeeER . . . . . vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [0,5] .D=================eeeER . . . . . vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: [1,0] .D====================eeeeeER . . . . vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [1,1] .DeE------------------------R . . . . vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D========================eeeeeER . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [1,3] . D=============================eeeER . . vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [1,4] . D================================eeeeeER . vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [1,5] . D=====================================eeeER vaddps %ymm4, %ymm5, %ymm0 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 11.0 0.5 0.0 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1. 2 1.0 1.0 14.0 vpermil2pd $16, %xmm3, %xmm5, %xmm1, %xmm2 +# CHECK-NEXT: 2. 2 15.5 0.0 0.0 vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: 3. 2 20.5 0.0 0.0 vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: 4. 2 23.0 0.0 0.0 vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: 5. 2 28.0 0.0 0.0 vaddps %ymm4, %ymm5, %ymm0 -- 2.11.0