OSDN Git Service

perf annotate: Add fusion logic for AMD microarchs
authorRavi Bangoria <ravi.bangoria@amd.com>
Sat, 11 Sep 2021 04:38:54 +0000 (10:08 +0530)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 15 Sep 2021 20:54:52 +0000 (17:54 -0300)
AMD family 15h and above microarchs fuse a subset of cmp/test/ALU
instructions with branch instructions[1][2]. Add perf annotate
fused instruction support for these microarchs.

Before:
         │       testb  $0x80,0x51(%rax)
         │    ┌──jne    5b3
    0.78 │    │  mov    %r13,%rdi
         │    │→ callq  mark_page_accessed
    1.08 │5b3:└─→mov    0x8(%r13),%rax

After:
         │    ┌──testb  $0x80,0x51(%rax)
         │    ├──jne    5b3
    0.78 │    │  mov    %r13,%rdi
         │    │→ callq  mark_page_accessed
    1.08 │5b3:└─→mov    0x8(%r13),%rax

[1] https://bugzilla.kernel.org/attachment.cgi?id=298553
[2] https://bugzilla.kernel.org/attachment.cgi?id=298555

Committer testing:

On a:

  $ grep -m1 "model name" /proc/cpuinfo
  model name : AMD Ryzen 9 3900X 12-Core Processor
  $

  Samples: 44K of event 'cycles', 4000 Hz, Event count (approx.): 7533249650
  _int_malloc  /usr/lib64/libc-2.33.so [Percent: local period]
  Percent│    ┌──test   %eax,%eax
         │    ├──jne    884
         │    │↓ jmpq   943
         │    │  nop
         │878:│  add    $0x10,%rdx
    0.64 │    │  add    %eax,%eax
    0.57 │    │↓ je     cc9
    0.77 │884:└─→test   %esi,%eax
         │     ↑ je     878
         │       mov    0x18(%rdx),%r15

Reported-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https //lore.kernel.org/r/20210911043854.8373-2-ravi.bangoria@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/arch/x86/annotate/instructions.c
tools/perf/util/annotate.c

index 24ea12e..3058726 100644 (file)
@@ -144,9 +144,32 @@ static struct ins x86__instructions[] = {
        { .name = "xorps",      .ops = &mov_ops, },
 };
 
-static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+static bool amd__ins_is_fused(struct arch *arch, const char *ins1,
                              const char *ins2)
 {
+       if (strstr(ins2, "jmp"))
+               return false;
+
+       /* Family >= 15h supports cmp/test + branch fusion */
+       if (arch->family >= 0x15 && (strstarts(ins1, "test") ||
+           (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) {
+               return true;
+       }
+
+       /* Family >= 19h supports some ALU + branch fusion */
+       if (arch->family >= 0x19 && (strstarts(ins1, "add") ||
+           strstarts(ins1, "sub") || strstarts(ins1, "and") ||
+           strstarts(ins1, "inc") || strstarts(ins1, "dec") ||
+           strstarts(ins1, "or") || strstarts(ins1, "xor"))) {
+               return true;
+       }
+
+       return false;
+}
+
+static bool intel__ins_is_fused(struct arch *arch, const char *ins1,
+                               const char *ins2)
+{
        if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
                return false;
 
@@ -184,6 +207,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
        if (ret == 3) {
                arch->family = family;
                arch->model = model;
+               arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ?
+                                       amd__ins_is_fused :
+                                       intel__ins_is_fused;
                return 0;
        }
 
index 0bae061..b55f354 100644 (file)
@@ -183,7 +183,6 @@ static struct arch architectures[] = {
                .init = x86__annotate_init,
                .instructions = x86__instructions,
                .nr_instructions = ARRAY_SIZE(x86__instructions),
-               .ins_is_fused = x86__ins_is_fused,
                .objdump =  {
                        .comment_char = '#',
                },