Thread (4 messages) 4 messages, 4 authors, 2021-12-18

Re: [PATCH v3] perf arm-spe: Synthesize SPE instruction events

From: Namhyung Kim <namhyung@kernel.org>
Date: 2021-12-17 00:54:00
Also in: linux-arm-kernel, lkml

On Thu, Dec 16, 2021 at 7:24 AM German Gomez [off-list ref] wrote:
Synthesize instruction events for every ARM SPE record.

Arm SPE implements a hardware-based sample period, and perf implements a
software-based one. Add a warning message to inform the user of this.

Signed-off-by: German Gomez <redacted>
Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung

quoted hunk ↗ jump to hunk
---
Changes since v2
  - Rebase patch on top of https://lore.kernel.org/r/20211201220855.1260688-1-namhyung@kernel.org (local)
  - Don't error out when using unsupported sample period type.
  - Store instructions_sample_period into samples.
Changes since v1 [https://lore.kernel.org/all/20211117142833.226629-1-german.gomez@arm.com (local)]
  - Generate events with "--itrace=i" instead of "--itrace=o".
  - Generate events with virt_addr, phys_addr, and data_src values.
---
 tools/perf/util/arm-spe.c | 62 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 8a3828f86901..d2b64e3f588b 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -58,6 +58,8 @@ struct arm_spe {
        u8                              sample_branch;
        u8                              sample_remote_access;
        u8                              sample_memory;
+       u8                              sample_instructions;
+       u64                             instructions_sample_period;

        u64                             l1d_miss_id;
        u64                             l1d_access_id;
@@ -68,6 +70,7 @@ struct arm_spe {
        u64                             branch_miss_id;
        u64                             remote_access_id;
        u64                             memory_id;
+       u64                             instructions_id;

        u64                             kernel_start;
@@ -90,6 +93,7 @@ struct arm_spe_queue {
        u64                             time;
        u64                             timestamp;
        struct thread                   *thread;
+       u64                             period_instructions;
 };

 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
        speq->pid = -1;
        speq->tid = -1;
        speq->cpu = -1;
+       speq->period_instructions = 0;

        /* params set */
        params.get_trace = arm_spe_get_trace;
@@ -353,6 +358,35 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
        return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 }

+static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
+                                            u64 spe_events_id, u64 data_src)
+{
+       struct arm_spe *spe = speq->spe;
+       struct arm_spe_record *record = &speq->decoder->record;
+       union perf_event *event = speq->event_buf;
+       struct perf_sample sample = { .ip = 0, };
+
+       /*
+        * Handles perf instruction sampling period.
+        */
+       speq->period_instructions++;
+       if (speq->period_instructions < spe->instructions_sample_period)
+               return 0;
+       speq->period_instructions = 0;
+
+       arm_spe_prep_sample(spe, speq, event, &sample);
+
+       sample.id = spe_events_id;
+       sample.stream_id = spe_events_id;
+       sample.addr = record->virt_addr;
+       sample.phys_addr = record->phys_addr;
+       sample.data_src = data_src;
+       sample.period = spe->instructions_sample_period;
+       sample.weight = record->latency;
+
+       return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
 #define SPE_MEM_TYPE   (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
                         ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
                         ARM_SPE_REMOTE_ACCESS)
@@ -482,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
                        return err;
        }

+       if (spe->sample_instructions) {
+               err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
+               if (err)
+                       return err;
+       }
+
        return 0;
 }
@@ -1110,7 +1150,29 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
                        return err;
                spe->memory_id = id;
                arm_spe_set_event_name(evlist, id, "memory");
+               id += 1;
+       }
+
+       if (spe->synth_opts.instructions) {
+               if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+                       pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
+                       goto synth_instructions_out;
+               }
+               if (spe->synth_opts.period > 1)
+                       pr_warning("Arm SPE has a hardware-based sample period.\n"
+                                  "Additional instruction events will be discarded by --itrace\n");
+
+               spe->sample_instructions = true;
+               attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+               attr.sample_period = spe->synth_opts.period;
+               spe->instructions_sample_period = attr.sample_period;
+               err = arm_spe_synth_event(session, &attr, id);
+               if (err)
+                       return err;
+               spe->instructions_id = id;
+               arm_spe_set_event_name(evlist, id, "instructions");
        }
+synth_instructions_out:

        return 0;
 }
--
2.25.1
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help