Re: [PATCH 2/2] perf annotate: Add fusion logic for AMD microarchs
From: Arnaldo Carvalho de Melo <acme@kernel.org>
Date: 2021-09-09 20:33:17
Also in:
lkml
Em Mon, Sep 06, 2021 at 04:26:40PM +0530, Ravi Bangoria escreveu:
quoted hunk ↗ jump to hunk
AMD family 15h and above microarchs fuse a subset of cmp/test/ALU instructions with branch instructions[1][2]. Add perf annotate fused instruction support for these microarchs. Before: │ testb $0x80,0x51(%rax) │ ┌──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax After: │ ┌──testb $0x80,0x51(%rax) │ ├──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax [1] https://bugzilla.kernel.org/attachment.cgi?id=298553 [2] https://bugzilla.kernel.org/attachment.cgi?id=298555 Reported-by: Kim Phillips <redacted> Signed-off-by: Ravi Bangoria <redacted> --- tools/perf/arch/x86/annotate/instructions.c | 37 ++++++++++++++++++++- tools/perf/util/annotate.c | 1 + 2 files changed, 37 insertions(+), 1 deletion(-)diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 24ea12ec7e02..46d7124cc4e1 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c@@ -144,8 +144,31 @@ static struct ins x86__instructions[] = { { .name = "xorps", .ops = &mov_ops, }, }; -static bool x86__ins_is_fused(struct arch *arch, const char *ins1, +static bool amd__ins_is_fused(struct arch *arch, const char *ins1, const char *ins2) +{ + if (strstr(ins2, "jmp")) + return false; + + /* Family >= 15h supports cmp/test + branch fusion */ + if (arch->family >= 0x15 && (strstarts(ins1, "test") || + (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) { + return true; + } + + /* Family >= 19h supports some ALU + branch fusion */ + if (arch->family >= 0x19 && (strstarts(ins1, "add") || + strstarts(ins1, "sub") || strstarts(ins1, "and") || + strstarts(ins1, "inc") || strstarts(ins1, "dec") || + strstarts(ins1, "or") || strstarts(ins1, "xor"))) { + return true; + } + + return false; +} + +static bool intel__ins_is_fused(struct arch *arch, const char *ins1, + const char *ins2) { if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp")) return false;@@ -172,6 +195,15 @@ static bool x86__ins_is_fused(struct arch *arch, const char *ins1, return false; } +static bool x86__ins_is_fused(struct arch *arch, const char *ins1, + const char *ins2) +{ + if (strstarts(arch->vendor, "AuthenticAMD")) + return amd__ins_is_fused(arch, ins1, ins2); + + return intel__ins_is_fused(arch, ins1, ins2); +} +
Can we instead make x86__ins_is_fused be a pointer and instead of storing arch->vendor we set it to one of amd__ins_is_fused() or intel__ins_is_fused()? I.e. here:
quoted hunk ↗ jump to hunk
static int x86__cpuid_parse(struct arch *arch, char *cpuid) { unsigned int family, model, stepping;@@ -184,6 +216,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid) if (ret == 3) { arch->family = family; arch->model = model; + arch->vendor = strndup(cpuid, 12);
x86__ins_is_fused = strstarts(cpuid, "AuthenticAMD") ? amd__ins_is_fused : intel__ins_is_fused; ?
quoted hunk ↗ jump to hunk
+ if (!arch->vendor) + return -1; return 0; }diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0bae061b2d6d..88326bb990b5 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c@@ -77,6 +77,7 @@ struct arch { bool sorted_instructions; bool initialized; void *priv; + char *vendor; unsigned int model; unsigned int family; int (*init)(struct arch *arch, char *cpuid);-- 2.27.0
-- - Arnaldo