Re: [PATCH v3 3/4] perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver
From: Mark Rutland <mark.rutland@arm.com>
Date: 2016-06-16 15:40:13
Also in:
linux-arm-kernel, lkml
Hi, On Thu, Jun 09, 2016 at 06:24:51PM -0700, Tai Nguyen wrote:
+#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/module.h> +#include <linux/io.h> +#include <linux/clk.h> +#include <linux/of_platform.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> +#include <linux/of_irq.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/perf_event.h> +#include <linux/module.h> +#include <linux/cpumask.h> +#include <linux/slab.h> +#include <linux/mfd/syscon.h> +#include <linux/regmap.h>
Minor nit: please sort these alphabetically (it makes scanning them by-eye easier and can help when there are conflicts). [...]
+static struct dev_ext_attribute l3c_pmu_format_attrs[] = {
+ PMU_FORMAT_EXT_ATTR(l3c_eventid, "config:0-7"),
+ PMU_FORMAT_EXT_ATTR(l3c_agentid, "config1:0-9"),
+};
+
+static struct dev_ext_attribute iob_pmu_format_attrs[] = {
+ PMU_FORMAT_EXT_ATTR(iob_eventid, "config:0-7"),
+ PMU_FORMAT_EXT_ATTR(iob_agentid, "config1:0-63"),
+};
+
+static struct dev_ext_attribute mcb_pmu_format_attrs[] = {
+ PMU_FORMAT_EXT_ATTR(mcb_eventid, "config:0-5"),
+ PMU_FORMAT_EXT_ATTR(mcb_agentid, "config1:0-9"),
+};
+
+static struct dev_ext_attribute mc_pmu_format_attrs[] = {
+ PMU_FORMAT_EXT_ATTR(mc_eventid, "config:0-28"),
+};Please make these structures const.
+static struct attribute_group pmu_format_attr_group = {
+ .name = "format",
+ .attrs = NULL, /* Filled in xgene_pmu_alloc_attrs */
+};
I think you should just have a (static const) format_attr_group for each
class, which you reuse and share across instances, rather than bothering
with dynamic allocation (which I think is broken -- more on that below).
To save some pain, I think you can use a compound literal to initialise
each of those in one go:
static const struct attribute_group l3c_pmu_format_attr_group = {
.name = "format",
.attrs = (const dev_ext_attribute[]) {
PMU_FORMAT_EXT_ATTR(l3c_eventid, "config:0-7"),
PMU_FORMAT_EXT_ATTR(l3c_agentid, "config1:0-9"),
},
};
You'll need a full pmu attr group for each class also, but you don't
lose much by doing so.
+
+/*
+ * sysfs event attributes
+ */
+static ssize_t _xgene_pmu_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_ext_attribute *eattr;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+ return sprintf(buf, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+
+#define PMU_EVENT_EXT_ATTR(_name, _config) \
+ PMU_EXT_ATTR(_name, _xgene_pmu_event_show, (unsigned long)_config)
+
+static struct dev_ext_attribute l3c_pmu_events_attrs[] = {
+ PMU_EVENT_EXT_ATTR(cycle-count, 0x00),
+ PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01),
+ PMU_EVENT_EXT_ATTR(read-hit, 0x02),
+ PMU_EVENT_EXT_ATTR(read-miss, 0x03),
+ PMU_EVENT_EXT_ATTR(write-need-replacement, 0x06),
+ PMU_EVENT_EXT_ATTR(write-not-need-replacement, 0x07),
+ PMU_EVENT_EXT_ATTR(tq-full, 0x08),
+ PMU_EVENT_EXT_ATTR(ackq-full, 0x09),
+ PMU_EVENT_EXT_ATTR(wdb-full, 0x0a),
+ PMU_EVENT_EXT_ATTR(bank-fifo-full, 0x0b),
+ PMU_EVENT_EXT_ATTR(odb-full, 0x0c),
+ PMU_EVENT_EXT_ATTR(wbq-full, 0x0d),
+ PMU_EVENT_EXT_ATTR(bank-conflict-fifo-issue, 0x0e),
+ PMU_EVENT_EXT_ATTR(bank-fifo-issue, 0x0f),
+};
+
+static struct dev_ext_attribute iob_pmu_events_attrs[] = {
+ PMU_EVENT_EXT_ATTR(cycle-count, 0x00),
+ PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01),
+ PMU_EVENT_EXT_ATTR(axi0-read, 0x02),
+ PMU_EVENT_EXT_ATTR(axi0-read-partial, 0x03),
+ PMU_EVENT_EXT_ATTR(axi1-read, 0x04),
+ PMU_EVENT_EXT_ATTR(axi1-read-partial, 0x05),
+ PMU_EVENT_EXT_ATTR(csw-read-block, 0x06),
+ PMU_EVENT_EXT_ATTR(csw-read-partial, 0x07),
+ PMU_EVENT_EXT_ATTR(axi0-write, 0x10),
+ PMU_EVENT_EXT_ATTR(axi0-write-partial, 0x11),
+ PMU_EVENT_EXT_ATTR(axi1-write, 0x13),
+ PMU_EVENT_EXT_ATTR(axi1-write-partial, 0x14),
+ PMU_EVENT_EXT_ATTR(csw-inbound-dirty, 0x16),
+};
+
+static struct dev_ext_attribute mcb_pmu_events_attrs[] = {
+ PMU_EVENT_EXT_ATTR(cycle-count, 0x00),
+ PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01),
+ PMU_EVENT_EXT_ATTR(csw-read, 0x02),
+ PMU_EVENT_EXT_ATTR(csw-write-request, 0x03),
+ PMU_EVENT_EXT_ATTR(mcb-csw-stall, 0x04),
+ PMU_EVENT_EXT_ATTR(cancel-read-gack, 0x05),
+};
+
+static struct dev_ext_attribute mc_pmu_events_attrs[] = {
+ PMU_EVENT_EXT_ATTR(cycle-count, 0x00),
+ PMU_EVENT_EXT_ATTR(cycle-count-div-64, 0x01),
+ PMU_EVENT_EXT_ATTR(act-cmd-sent, 0x02),
+ PMU_EVENT_EXT_ATTR(pre-cmd-sent, 0x03),
+ PMU_EVENT_EXT_ATTR(rd-cmd-sent, 0x04),
+ PMU_EVENT_EXT_ATTR(rda-cmd-sent, 0x05),
+ PMU_EVENT_EXT_ATTR(wr-cmd-sent, 0x06),
+ PMU_EVENT_EXT_ATTR(wra-cmd-sent, 0x07),
+ PMU_EVENT_EXT_ATTR(pde-cmd-sent, 0x08),
+ PMU_EVENT_EXT_ATTR(sre-cmd-sent, 0x09),
+ PMU_EVENT_EXT_ATTR(prea-cmd-sent, 0x0a),
+ PMU_EVENT_EXT_ATTR(ref-cmd-sent, 0x0b),
+ PMU_EVENT_EXT_ATTR(rd-rda-cmd-sent, 0x0c),
+ PMU_EVENT_EXT_ATTR(wr-wra-cmd-sent, 0x0d),
+ PMU_EVENT_EXT_ATTR(in-rd-collision, 0x0e),
+ PMU_EVENT_EXT_ATTR(in-wr-collision, 0x0f),
+ PMU_EVENT_EXT_ATTR(collision-queue-not-empty, 0x10),
+ PMU_EVENT_EXT_ATTR(collision-queue-full, 0x11),
+ PMU_EVENT_EXT_ATTR(mcu-request, 0x12),
+ PMU_EVENT_EXT_ATTR(mcu-rd-request, 0x13),
+ PMU_EVENT_EXT_ATTR(mcu-hp-rd-request, 0x14),
+ PMU_EVENT_EXT_ATTR(mcu-wr-request, 0x15),
+ PMU_EVENT_EXT_ATTR(mcu-rd-proceed-all, 0x16),
+ PMU_EVENT_EXT_ATTR(mcu-rd-proceed-cancel, 0x17),
+ PMU_EVENT_EXT_ATTR(mcu-rd-response, 0x18),
+ PMU_EVENT_EXT_ATTR(mcu-rd-proceed-speculative-all, 0x19),
+ PMU_EVENT_EXT_ATTR(mcu-rd-proceed-speculative-cancel, 0x1a),
+ PMU_EVENT_EXT_ATTR(mcu-wr-proceed-all, 0x1b),
+ PMU_EVENT_EXT_ATTR(mcu-wr-proceed-cancel, 0x1c),
+};
+
+static struct attribute_group pmu_events_attr_group = {
+ .name = "events",
+ .attrs = NULL, /* Filled in xgene_pmu_alloc_attrs */
+};My comments for the format groups apply here too.
+
+/*
+ * sysfs cpumask attributes
+ */
+static ssize_t xgene_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct xgene_pmu_dev *pmu_dev = to_pmu_dev(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, &pmu_dev->parent->cpu);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, xgene_pmu_cpumask_show, NULL);
+
+static struct attribute *xgene_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group pmu_cpumask_attr_group = {
+ .attrs = xgene_pmu_cpumask_attrs,
+};Please make these const.
+
+static const struct attribute_group *pmu_attr_groups[] = {
+ &pmu_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &pmu_events_attr_group,
+ NULL
+};As mentioned earlier, you'll need one of these per class. [...]
+static int xgene_perf_event_init(struct perf_event *event)
+{
+ struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config, config1;
+
+ /* Test the event attr type check for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * SOC PMU counters are shared across all cores.
+ * Therefore, it does not support per-process mode.
+ * Also, it does not support event sampling mode.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* SOC counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+ /*
+ * Many perf core operations (eg. events rotation) operate on a
+ * single CPU context. This is obvious for CPU PMUs, where one
+ * expects the same sets of events being observed on all CPUs,
+ * but can lead to issues for off-core PMUs, where each
+ * event could be theoretically assigned to a different CPU. To
+ * mitigate this, we enforce CPU assignment to one, selected
+ * processor (the one described in the "cpumask" attribute).
+ */
+ event->cpu = cpumask_first(&pmu_dev->parent->cpu);I didn't spot a perf_pmu_migrate_context call anywhere. Do you not plan to handle hotplug?
+ + config = event->attr.config; + config1 = event->attr.config1; + + hwc->config = config; + /* + * Each bit of the config1 field represents an agent from which the + * request of the event come. The event is counted only if it's caused + * by a request of an agent has the bit set. + * By default, the event is counted for all agents. + */ + if (config1) + hwc->extra_reg.config = config1; + else + hwc->extra_reg.config = 0xFFFFFFFFFFFFFFFFULL; +
If you treated the bits as having the opposite meaning (i.e. a bit set means ignore an agent), then the zero default would not have to be special-cased here. Currently 0 and 0xFFFFFFFFFFFFFFFFULL mean the same thing, which is a little odd. You also need to verify the event grouping, e.g. avoiding cross-pmu groups. See what we do in the arm-ccn driver. [...]
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, count) + != prev_raw_count) + return;
Please leave the '!=' dangling on the first line. It makes it clearer that there is more to the expression. [...]
+static struct attribute **alloc_attrs(struct device *dev, u32 n,
+ struct dev_ext_attribute *dev_attr)
+{
+ struct attribute **attrs;
+ int i;
+
+ /* Alloc n + 1 (for terminating NULL) */
+ attrs = devm_kcalloc(dev, n + 1, sizeof(struct attribute *),
+ GFP_KERNEL);
+ if (!attrs)
+ return attrs;
+
+ for (i = 0; i < n; i++)
+ attrs[i] = &dev_attr[i].attr.attr;
+ return attrs;
+}
+
+static int
+xgene_pmu_alloc_attrs(struct device *dev, struct xgene_pmu_dev *pmu_dev)
+{
+ struct attribute **attrs;
+
+ if (pmu_dev->nformat_attrs) {
+ attrs = alloc_attrs(dev, pmu_dev->nformat_attrs,
+ pmu_dev->format_attr);
+ if (!attrs)
+ return -ENOMEM;
+ pmu_format_attr_group.attrs = attrs;
+ }
+
+ if (pmu_dev->nevents_attrs) {
+ attrs = alloc_attrs(dev, pmu_dev->nevents_attrs,
+ pmu_dev->events_attr);
+ if (!attrs)
+ return -ENOMEM;
+ pmu_events_attr_group.attrs = attrs;
+ }
+
+ pmu_dev->attr_groups = pmu_attr_groups;
+
+ return 0;
+}I don't see why we need to dynamically allocate anything, given we make a full copy of each of the existing arrays without modification. We should just be able to use them as-is. As I mentioned above, please just statically allocate the structures you need and share them. [...]
+static int
+xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
+{
+ struct device *dev = xgene_pmu->dev;
+ struct xgene_pmu_dev *pmu;
+ int rc;
+
+ pmu = devm_kzalloc(dev, sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+ pmu->parent = xgene_pmu;
+ pmu->inf = &ctx->inf;
+ ctx->pmu_dev = pmu;
+
+ switch (pmu->inf->type) {
+ case PMU_TYPE_L3C:
+ pmu->format_attr = l3c_pmu_format_attrs;
+ pmu->nformat_attrs = ARRAY_SIZE(l3c_pmu_format_attrs);
+ pmu->events_attr = l3c_pmu_events_attrs;
+ pmu->nevents_attrs = ARRAY_SIZE(l3c_pmu_events_attrs);
+ break;
+ case PMU_TYPE_IOB:
+ pmu->format_attr = iob_pmu_format_attrs;
+ pmu->nformat_attrs = ARRAY_SIZE(iob_pmu_format_attrs);
+ pmu->events_attr = iob_pmu_events_attrs;
+ pmu->nevents_attrs = ARRAY_SIZE(iob_pmu_events_attrs);
+ break;
+ case PMU_TYPE_MCB:
+ if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
+ goto dev_err;
+ pmu->format_attr = mcb_pmu_format_attrs;
+ pmu->nformat_attrs = ARRAY_SIZE(mcb_pmu_format_attrs);
+ pmu->events_attr = mcb_pmu_events_attrs;
+ pmu->nevents_attrs = ARRAY_SIZE(mcb_pmu_events_attrs);
+ break;
+ case PMU_TYPE_MC:
+ if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
+ goto dev_err;
+ pmu->format_attr = mc_pmu_format_attrs;
+ pmu->nformat_attrs = ARRAY_SIZE(mc_pmu_format_attrs);
+ pmu->events_attr = mc_pmu_events_attrs;
+ pmu->nevents_attrs = ARRAY_SIZE(mc_pmu_events_attrs);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = xgene_pmu_alloc_attrs(dev, pmu);
+ if (rc) {
+ dev_err(dev, "%s PMU: Failed to alloc attributes\n", ctx->name);
+ goto dev_err;
+ }
+
+ rc = xgene_init_perf(pmu, ctx->name);
+ if (rc) {
+ dev_err(dev, "%s PMU: Failed to init perf driver\n", ctx->name);
+ goto dev_err;
+ }
+
+ dev_info(dev, "%s PMU registered\n", ctx->name);
+
+ /* All attribute allocations can be free'd after perf_register_pmu */
+ deallocate_attrs(dev, pmu);As far as I can see, that is not true. The core perf code doesn't make a copy of the attrs, it just continues to use them as-is. So this looks to be broken and very unsafe. Please get rid of the dynamic allocation and copying. [...]
+static irqreturn_t _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
+{
+ struct perf_event *event = NULL;
+ struct perf_sample_data data;You never sample, so don't need perf_sample_data.
+ struct xgene_pmu *xgene_pmu; + struct hw_perf_event *hwc; + int idx; + u32 val; + + /* Get interrupt counter source */ + val = readl(pmu_dev->inf->csr + PMU_PMOVSR); + idx = ffs(val) - 1;
bit 0 is never set? What if multiple counters overflow? You should iterate over all the counters which have overflown (as we do in the arm-ccn driver).
+ if (!(val & PMU_OVERFLOW_MASK)) + goto out; + event = pmu_dev->pmu_counter_event[idx]; + + /* Ignore if we don't have an event. */ + if (!event) + goto out; + + hwc = &event->hw; + + xgene_perf_event_update(event, hwc, idx); + perf_sample_data_init(&data, 0, hwc->last_period);
You aren't sampling, so don't need this.
+ if (!xgene_perf_event_set_period(event, hwc, idx)) + goto out; + +out: + /* Clear interrupt flag */ + xgene_pmu = pmu_dev->parent; + if (xgene_pmu->version == 1)
It would be better to consistently use the PCP_PMU_V{1,2} defines.
[...]
+ version = (dev_id == PCP_PMU_V1) ? 1 : 2;
As above, why not set version to the dev_id, and use the PCP_PMU_V* defines consistently when comparing xgene_pmu::version?
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "No IRQ resource\n");
+ rc = -EINVAL;
+ goto err;
+ }
+ rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr, IRQF_SHARED,
+ dev_name(&pdev->dev), xgene_pmu);
+ if (rc) {
+ dev_err(&pdev->dev, "Could not request IRQ %d\n", irq);
+ goto err;
+ }+ /* Pick one core to use for cpumask attributes */ + cpumask_set_cpu(smp_processor_id(), &xgene_pmu->cpu);
You also need to ensure that the IRQ occurs on this CPU. Thanks, Mark.