From 259800c8fc5c1f7d3be7dd875023363c894d153f Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 10 Apr 2018 12:50:03 +0000 Subject: [PATCH] [llvm-mca] Increase the default number of iterations to 100. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@329694 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CommandGuide/llvm-mca.rst | 2 +- test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s | 42 +++++------ test/tools/llvm-mca/X86/BtVer2/resources-f16c.s | 33 ++++++++- test/tools/llvm-mca/X86/cpus.s | 92 ++++++++++++++++++------- test/tools/llvm-mca/X86/default-iterations.s | 32 +++++++-- tools/llvm-mca/README.txt | 2 +- tools/llvm-mca/SourceMgr.h | 2 +- 7 files changed, 147 insertions(+), 58 deletions(-) diff --git a/docs/CommandGuide/llvm-mca.rst b/docs/CommandGuide/llvm-mca.rst index d669b37013d..3a1b544f6b8 100644 --- a/docs/CommandGuide/llvm-mca.rst +++ b/docs/CommandGuide/llvm-mca.rst @@ -95,7 +95,7 @@ option specifies "``-``", then the output will also be sent to standard output. .. option:: -iterations= Specify the number of iterations to run. If this flag is set to 0, then the - tool sets the number of iterations to a default value (i.e. 70). + tool sets the number of iterations to a default value (i.e. 100). .. option:: -noalias= diff --git a/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index e60933b54c6..8d99bb7a977 100644 --- a/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -16,14 +16,12 @@ vsqrtps %xmm0, %xmm2 vaddps %ymm0, %ymm1, %ymm2 vsqrtps %ymm0, %ymm2 - -# CHECK: Iterations: 70 -# CHECK-NEXT: Instructions: 560 -# CHECK-NEXT: Total Cycles: 4416 +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 6306 # CHECK-NEXT: Dispatch Width: 2 # CHECK-NEXT: IPC: 0.13 - # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -42,7 +40,6 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2 - # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -59,14 +56,14 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL - # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - 3.00 63.00 6.01 5.99 - - - 1.00 1.00 1.00 3.00 +# CHECK-NEXT: - - - 3.00 63.00 6.01 5.99 - - - 1.00 1.00 1.00 3.00 + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: -# CHECK-NEXT: - - - - - 2.00 1.00 - - - - 0.03 0.97 2.00 vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - 0.01 0.99 - - - - 0.97 0.03 - vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - 2.00 1.00 - - - - 0.02 0.98 2.00 vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - 0.01 0.99 - - - - 0.98 0.02 - vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 @@ -74,11 +71,10 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - 42.00 - 2.00 - - - - - - - vsqrtps %ymm0, %ymm2 - - # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 0 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 + # CHECK: [0,0] DeeeeER . . . . . . . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,1] .DeE--R . . . . . . . . . . . . . vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [0,2] . DeeeER . . . . . . . . . . . . . vcvttps2dq %xmm0, %xmm2 @@ -93,19 +89,19 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: [1,3] . . DeeE----------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: [1,4] . . DeeeE--------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2 - # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage -# CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 1.0 1.0 28.0 vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 2 1.0 1.0 30.5 vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 3. 2 1.0 1.0 29.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 4. 2 1.0 1.0 28.0 vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2 +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 2 1.0 1.0 28.0 vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 2 1.0 1.0 30.5 vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 3. 2 1.0 1.0 29.5 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4. 2 1.0 1.0 28.0 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2 + diff --git a/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s b/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s index e2768a576cd..814a8b374fe 100644 --- a/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s +++ b/test/tools/llvm-mca/X86/BtVer2/resources-f16c.s @@ -12,6 +12,30 @@ vcvtps2ph $0, %xmm0, (%rax) vcvtps2ph $0, %ymm0, %xmm2 vcvtps2ph $0, %ymm0, (%rax) +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 800 +# CHECK-NEXT: Total Cycles: 1503 +# CHECK-NEXT: Dispatch Width: 2 +# CHECK-NEXT: IPC: 0.53 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: 2 3 2.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 2 8 2.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 6 2.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 3 11 2.00 * vcvtps2ph $0, %ymm0, (%rax) + # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 # CHECK-NEXT: [1] - JALU1 @@ -28,6 +52,10 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [12] - JVALU1 # CHECK-NEXT: [13] - JVIMUL +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 2.00 2.00 - 12.00 2.00 - 2.00 12.00 - - - + # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtph2ps %xmm0, %xmm2 @@ -36,5 +64,6 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: - - - - - - 2.00 1.00 - - 2.00 - - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: - - - - - - 1.00 - - - 1.00 - - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - - 1.80 0.20 - 2.00 - - - 2.00 - - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - - 0.20 1.80 - 2.00 - - 1.00 2.00 - - - vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - - 1.86 0.14 - 2.00 - - - 2.00 - - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - - 0.14 1.86 - 2.00 - - 1.00 2.00 - - - vcvtps2ph $0, %ymm0, (%rax) + diff --git a/test/tools/llvm-mca/X86/cpus.s b/test/tools/llvm-mca/X86/cpus.s index 8ef71d50b9a..85d58810205 100644 --- a/test/tools/llvm-mca/X86/cpus.s +++ b/test/tools/llvm-mca/X86/cpus.s @@ -1,27 +1,73 @@ -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge < %s | FileCheck --check-prefix=ALL --check-prefix=IVYBRIDGE %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell < %s | FileCheck --check-prefix=ALL --check-prefix=HASWELL %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell < %s | FileCheck --check-prefix=ALL --check-prefix=BROADWELL %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl < %s | FileCheck --check-prefix=ALL --check-prefix=KNL %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake < %s | FileCheck --check-prefix=ALL --check-prefix=SKX %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 < %s | FileCheck --check-prefix=ALL --check-prefix=SKX-AVX512 %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=slm < %s | FileCheck --check-prefix=ALL --check-prefix=SLM %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=IVYBRIDGE %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=HASWELL %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BROADWELL %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=KNL %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SKX %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SKX-AVX512 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=slm -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SLM %s add %edi, %eax -# ALL: Iterations: 70 -# ALL-NEXT: Instructions: 70 - -# BTVER2: Dispatch Width: 2 -# ZNVER1: Dispatch Width: 4 -# SANDYBRIDGE: Dispatch Width: 4 -# IVYBRIDGE: Dispatch Width: 4 -# HASWELL: Dispatch Width: 4 -# BROADWELL: Dispatch Width: 4 -# KNL: Dispatch Width: 4 -# SKX: Dispatch Width: 6 -# SKX-AVX512: Dispatch Width: 6 -# SLM: Dispatch Width: 2 +# BTVER2: Iterations: 100 +# BTVER2-NEXT: Instructions: 100 +# BTVER2-NEXT: Total Cycles: 103 +# BTVER2-NEXT: Dispatch Width: 2 +# BTVER2-NEXT: IPC: 0.97 + +# SLM: Iterations: 100 +# SLM-NEXT: Instructions: 100 +# SLM-NEXT: Total Cycles: 103 +# SLM-NEXT: Dispatch Width: 2 +# SLM-NEXT: IPC: 0.97 + +# BROADWELL: Iterations: 100 +# BROADWELL-NEXT: Instructions: 100 +# BROADWELL-NEXT: Total Cycles: 103 +# BROADWELL-NEXT: Dispatch Width: 4 +# BROADWELL-NEXT: IPC: 0.97 + +# HASWELL: Iterations: 100 +# HASWELL-NEXT: Instructions: 100 +# HASWELL-NEXT: Total Cycles: 103 +# HASWELL-NEXT: Dispatch Width: 4 +# HASWELL-NEXT: IPC: 0.97 + +# IVYBRIDGE: Iterations: 100 +# IVYBRIDGE-NEXT: Instructions: 100 +# IVYBRIDGE-NEXT: Total Cycles: 103 +# IVYBRIDGE-NEXT: Dispatch Width: 4 +# IVYBRIDGE-NEXT: IPC: 0.97 + +# KNL: Iterations: 100 +# KNL-NEXT: Instructions: 100 +# KNL-NEXT: Total Cycles: 103 +# KNL-NEXT: Dispatch Width: 4 +# KNL-NEXT: IPC: 0.97 + +# SANDYBRIDGE: Iterations: 100 +# SANDYBRIDGE-NEXT: Instructions: 100 +# SANDYBRIDGE-NEXT: Total Cycles: 103 +# SANDYBRIDGE-NEXT: Dispatch Width: 4 +# SANDYBRIDGE-NEXT: IPC: 0.97 + +# ZNVER1: Iterations: 100 +# ZNVER1-NEXT: Instructions: 100 +# ZNVER1-NEXT: Total Cycles: 103 +# ZNVER1-NEXT: Dispatch Width: 4 +# ZNVER1-NEXT: IPC: 0.97 + +# SKX: Iterations: 100 +# SKX-NEXT: Instructions: 100 +# SKX-NEXT: Total Cycles: 103 +# SKX-NEXT: Dispatch Width: 6 +# SKX-NEXT: IPC: 0.97 + +# SKX-AVX512: Iterations: 100 +# SKX-AVX512-NEXT: Instructions: 100 +# SKX-AVX512-NEXT: Total Cycles: 103 +# SKX-AVX512-NEXT: Dispatch Width: 6 +# SKX-AVX512-NEXT: IPC: 0.97 diff --git a/test/tools/llvm-mca/X86/default-iterations.s b/test/tools/llvm-mca/X86/default-iterations.s index 336c5297a7a..4440a2ea2b8 100644 --- a/test/tools/llvm-mca/X86/default-iterations.s +++ b/test/tools/llvm-mca/X86/default-iterations.s @@ -1,11 +1,29 @@ -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=0 < %s 2>&1 | FileCheck --check-prefix=DEFAULT %s -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 < %s 2>&1 | FileCheck --check-prefix=CUSTOM %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=0 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=CUSTOM %s add %eax, %eax -# DEFAULT: Iterations: 70 -# DEFAULT-NEXT: Instructions: 70 +# CUSTOM: Iterations: 1 +# CUSTOM-NEXT: Instructions: 1 +# CUSTOM-NEXT: Total Cycles: 4 +# CUSTOM-NEXT: Dispatch Width: 2 +# CUSTOM-NEXT: IPC: 0.25 + +# DEFAULT: Iterations: 100 +# DEFAULT-NEXT: Instructions: 100 +# DEFAULT-NEXT: Total Cycles: 103 +# DEFAULT-NEXT: Dispatch Width: 2 +# DEFAULT-NEXT: IPC: 0.97 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.50 addl %eax, %eax -# CUSTOM: Iterations: 1 -# CUSTOM-NEXT: Instructions: 1 diff --git a/tools/llvm-mca/README.txt b/tools/llvm-mca/README.txt index 99d67dee1a8..eedbd87e8e8 100644 --- a/tools/llvm-mca/README.txt +++ b/tools/llvm-mca/README.txt @@ -59,7 +59,7 @@ sequence of MCInst is then analyzed by a 'Backend' module to generate a performance report. The Backend module internally emulates the execution of the machine code -sequence in a loop of iterations (which by default is 70). At the end of this +sequence in a loop of iterations (which by default is 100). At the end of this process, the backend collects a number of statistics which are then printed out in the form of a report. diff --git a/tools/llvm-mca/SourceMgr.h b/tools/llvm-mca/SourceMgr.h index d7695705107..4d4781f3b3a 100644 --- a/tools/llvm-mca/SourceMgr.h +++ b/tools/llvm-mca/SourceMgr.h @@ -28,7 +28,7 @@ class SourceMgr { const InstVec &Sequence; unsigned Current; unsigned Iterations; - static const unsigned DefaultIterations = 70; + static const unsigned DefaultIterations = 100; public: SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations) -- 2.11.0