Thread (23 messages) 23 messages, 4 authors, 2022-02-24

Re: [PATCH v4 2/8] hwtracing: Add trace function support for HiSilicon PCIe Tune and Trace device

From: Yicong Yang <hidden>
Date: 2022-02-21 13:13:54
Also in: linux-iommu, linux-pci, linux-perf-users, lkml

Hi Jonathan,

On 2022/2/21 19:18, Jonathan Cameron wrote:
On Mon, 21 Feb 2022 16:43:01 +0800
Yicong Yang [off-list ref] wrote:
quoted
HiSilicon PCIe tune and trace device(PTT) is a PCIe Root Complex
integrated Endpoint(RCiEP) device, providing the capability
to dynamically monitor and tune the PCIe traffic, and trace
the TLP headers.

Add the driver for the device to enable the trace function.
This patch adds basic function of trace, including the device's
probe and initialization, functions for trace buffer allocation
and trace enable/disable, register an interrupt handler to
simply response to the DMA events. The user interface of trace
will be added in the following patch.

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Hi Yicong,

A few really minor things inline, particularly one place
where you can improve the error handling.
It's always fiddly to handle errors in a pci_walk_bus() but
in this case it's not too difficult as you just need to store
the retval somewhere in the private data then retrieve it
after the pci_walk_bus() call.
Thanks for the quick reply!

The pci_walk_bus() in this patch will fail only if the memory allocation
of filter struct fails. We won't allocate memory in the pci_bus_walk()
after Patch 4 so it will never fail. Maybe I can add some comments
mentioning this.

I also expressed this inline.
Thanks,

Jonathan


quoted
---
 drivers/Makefile                 |   1 +
 drivers/hwtracing/Kconfig        |   2 +
 drivers/hwtracing/ptt/Kconfig    |  11 +
 drivers/hwtracing/ptt/Makefile   |   2 +
 drivers/hwtracing/ptt/hisi_ptt.c | 370 +++++++++++++++++++++++++++++++
 drivers/hwtracing/ptt/hisi_ptt.h | 149 +++++++++++++
 6 files changed, 535 insertions(+)
 create mode 100644 drivers/hwtracing/ptt/Kconfig
 create mode 100644 drivers/hwtracing/ptt/Makefile
 create mode 100644 drivers/hwtracing/ptt/hisi_ptt.c
 create mode 100644 drivers/hwtracing/ptt/hisi_ptt.h
diff --git a/drivers/Makefile b/drivers/Makefile
index a110338c860c..ab3411e4eba5 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -175,6 +175,7 @@ obj-$(CONFIG_USB4)		+= thunderbolt/
 obj-$(CONFIG_CORESIGHT)		+= hwtracing/coresight/
 obj-y				+= hwtracing/intel_th/
 obj-$(CONFIG_STM)		+= hwtracing/stm/
+obj-$(CONFIG_HISI_PTT)		+= hwtracing/ptt/
 obj-$(CONFIG_ANDROID)		+= android/
 obj-$(CONFIG_NVMEM)		+= nvmem/
 obj-$(CONFIG_FPGA)		+= fpga/
diff --git a/drivers/hwtracing/Kconfig b/drivers/hwtracing/Kconfig
index 13085835a636..911ee977103c 100644
--- a/drivers/hwtracing/Kconfig
+++ b/drivers/hwtracing/Kconfig
@@ -5,4 +5,6 @@ source "drivers/hwtracing/stm/Kconfig"
 
 source "drivers/hwtracing/intel_th/Kconfig"
 
+source "drivers/hwtracing/ptt/Kconfig"
+
 endmenu
diff --git a/drivers/hwtracing/ptt/Kconfig b/drivers/hwtracing/ptt/Kconfig
new file mode 100644
index 000000000000..41fa83921a07
--- /dev/null
+++ b/drivers/hwtracing/ptt/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config HISI_PTT
+	tristate "HiSilicon PCIe Tune and Trace Device"
+	depends on ARM64 && PCI && HAS_DMA && HAS_IOMEM
+	help
+	  HiSilicon PCIe Tune and Trace Device exists as a PCIe RCiEP
+	  device, and it provides support for PCIe traffic tuning and
+	  tracing TLP headers to the memory.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called hisi_ptt.
diff --git a/drivers/hwtracing/ptt/Makefile b/drivers/hwtracing/ptt/Makefile
new file mode 100644
index 000000000000..908c09a98161
--- /dev/null
+++ b/drivers/hwtracing/ptt/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_HISI_PTT) += hisi_ptt.o
diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c
new file mode 100644
index 000000000000..a5b4f09ccd1e
--- /dev/null
+++ b/drivers/hwtracing/ptt/hisi_ptt.c
@@ -0,0 +1,370 @@
...
quoted
+static void hisi_ptt_free_trace_buf(struct hisi_ptt *hisi_ptt)
+{
+	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+	struct device *dev = &hisi_ptt->pdev->dev;
+	int i;
+
+	if (!ctrl->trace_buf)
+		return;
+
+	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
+		if (ctrl->trace_buf[i].addr)
+			dma_free_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
+					  ctrl->trace_buf[i].addr,
+					  ctrl->trace_buf[i].dma);
+
+	kfree(ctrl->trace_buf);
+	ctrl->trace_buf = NULL;
+}
+
+static int hisi_ptt_alloc_trace_buf(struct hisi_ptt *hisi_ptt)
+{
+	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
+	struct device *dev = &hisi_ptt->pdev->dev;
+	int i;
+
+	hisi_ptt->trace_ctrl.buf_index = 0;
+
+	/* If the trace buffer has already been allocated, zero it. */
+	if (ctrl->trace_buf) {
+		for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
+			memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
+		return 0;
+	}
+
+	ctrl->trace_buf = kcalloc(HISI_PTT_TRACE_BUF_CNT, sizeof(struct hisi_ptt_dma_buffer),
Slight preference for sizeof(*ctrl->trace_buf) as it saves a reviewer from scrolling down
to check the type is correct.
will change to that.
quoted
+				  GFP_KERNEL);
+	if (!ctrl->trace_buf)
+		return -ENOMEM;
+
+	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
+		ctrl->trace_buf[i].addr = dma_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
+							     &ctrl->trace_buf[i].dma,
+							     GFP_KERNEL);
+		if (!ctrl->trace_buf[i].addr) {
+			hisi_ptt_free_trace_buf(hisi_ptt);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
...
quoted
+
+static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
+{
+	struct hisi_ptt_filter_desc *filter;
+	struct hisi_ptt *hisi_ptt = data;
+	struct list_head *target_list;
+
+	target_list = pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT ?
+		      &hisi_ptt->port_filters : &hisi_ptt->req_filters;
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
As below, if this happens we'll be left in an odd intermediate state
where the error is not communicated up to the probe function but the
filters are only partly set up.
The memory allocation will be moved to workqueue function after patch 4
and the function won't fail anymore. I'd like to comment here to mention
that the allocation here is temporary and in the following patch this
function will always succeed.

Thanks,
Yicong
quoted
+		return -ENOMEM;
+
+	filter->pdev = pdev;
+	list_add_tail(&filter->list, target_list);
+
+	/* Update the available port mask */
+	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)
+		hisi_ptt->port_mask |= hisi_ptt_get_filter_val(pdev);
+
+	return 0;
+}
+
+static void hisi_ptt_release_filters(struct hisi_ptt *hisi_ptt)
+{
+	struct hisi_ptt_filter_desc *filter, *tfilter;
+
+	list_for_each_entry_safe(filter, tfilter, &hisi_ptt->req_filters, list) {
+		list_del(&filter->list);
+		kfree(filter);
+	}
+
+	list_for_each_entry_safe(filter, tfilter, &hisi_ptt->port_filters, list) {
+		list_del(&filter->list);
+		kfree(filter);
+	}
+}
+
+static void hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
+{
+	struct pci_dev *pdev = hisi_ptt->pdev;
+	struct pci_bus *bus;
+	u32 reg;
+
+	INIT_LIST_HEAD(&hisi_ptt->port_filters);
+	INIT_LIST_HEAD(&hisi_ptt->req_filters);
+
+	/*
+	 * The device range register provides the information about the
+	 * root ports which the RCiEP can control and trace. The RCiEP
+	 * and the root ports it support are on the same PCIe core, with
+	 * same domain number but maybe different bus number. The device
+	 * range register will tell us which root ports we can support,
+	 * Bit[31:16] indicates the upper BDF numbers of the root port,
+	 * while Bit[15:0] indicates the lower.
+	 */
+	reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
+	hisi_ptt->upper = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
+	hisi_ptt->lower = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
+
+	bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper));
+	if (bus)
+		pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
Error handling needed for the hisi_ptt_init_filters call though that will require
placing a retval somewhere in hisi_ptt so we can know there was an error.
quoted
+
+	hisi_ptt->trace_ctrl.default_cpu = cpumask_first(cpumask_of_node(dev_to_node(&pdev->dev)));
+}
+
+/*
+ * The DMA of PTT trace can only use direct mapping, due to some
+ * hardware restriction. Check whether there is an IOMMU or the
+ * policy of the IOMMU domain is passthrough, otherwise the trace
+ * cannot work.
+ *
+ * The PTT device is supposed to behind the ARM SMMUv3, which
+ * should have passthrough the device by a quirk.
Trivial but perhaps a clearer wording is:

The PTT Device is behind an ARM SMMUv3 which should be set to
passthrough for this device using a quirk.

quoted
+ */
+static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
+{
+	struct iommu_domain *iommu_domain;
+
+	iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
+	if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
+		return 0;
+
+	return -EOPNOTSUPP;
+}
+
+static int hisi_ptt_probe(struct pci_dev *pdev,
+			  const struct pci_device_id *id)
+{
+	struct hisi_ptt *hisi_ptt;
+	int ret;
+
+	ret = hisi_ptt_check_iommu_mapping(pdev);
+	if (ret) {
+		pci_err(pdev, "cannot work with non-direct DMA mapping.\n");
+		return ret;
+	}
+
+	hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
+	if (!hisi_ptt)
+		return -ENOMEM;
+
+	mutex_init(&hisi_ptt->mutex);
+	hisi_ptt->pdev = pdev;
+	pci_set_drvdata(pdev, hisi_ptt);
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		pci_err(pdev, "failed to enable device, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
+	if (ret) {
+		pci_err(pdev, "failed to remap io memory, ret = %d.\n", ret);
+		return ret;
+	}
+
+	hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
+
+	ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		pci_err(pdev, "failed to set 64 bit dma mask, ret = %d.\n", ret);
+		return ret;
+	}
+	pci_set_master(pdev);
+
+	ret = hisi_ptt_register_irq(hisi_ptt);
+	if (ret)
+		return ret;
+
+	hisi_ptt_init_ctrls(hisi_ptt);
There are some elements of this call that can fail so probably should return an
error code an have appropriate cleanup in here.
quoted
+
+	return 0;
+}
...
.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help