Re: [PATCH v3 2/3] powerpc/pseries: Export hardware trace macro dump via debugfs
From: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Date: 2024-09-20 16:49:20
Madhavan Srinivasan [off-list ref] writes:
quoted hunk ↗ jump to hunk
This patch adds debugfs interface to export Hardware Trace Macro (HTM) function data in a LPAR. New hypervisor call "H_HTM" has been defined to setup, configure, control and dump the HTM data. This patch supports only dumping of HTM data in a LPAR. New debugfs folder called "htmdump" has been added under /sys/kernel/debug/arch path which contains files need to pass required parameters for the H_HTM dump function. New Kconfig option called "CONFIG_HTMDUMP" has been in platform/pseries for the same. With this module loaded, list of files in debugfs path /sys/kernel/debug/powerpc/htmdump coreindexonchip htmtype nodalchipindex nodeindex trace Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> --- Changelog v2: - Made driver as modules based on review comments Changelog v1: - Changed from tristate to bool with dependency flags - Trimmed the include headers arch/powerpc/platforms/pseries/Kconfig | 9 ++ arch/powerpc/platforms/pseries/Makefile | 1 + arch/powerpc/platforms/pseries/htmdump.c | 130 +++++++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 arch/powerpc/platforms/pseries/htmdump.cdiff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index afc0f6a61337..a66be66d690e 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig@@ -128,6 +128,15 @@ config CMM will be reused for other LPARs. The interface allows firmware to balance memory across many LPARs. +config HTMDUMP + tristate "PHYP HTM data dumper" + depends on PPC_PSERIES && DEBUG_FS + default m + help + Select this option, if you want to enable the kernel debugfs + interface to dump the Hardware Trace Macro (HTM) function data + in the LPAR. + config HV_PERF_CTRS bool "Hypervisor supplied PMU events (24x7 & GPCI)" default ydiff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 7bf506f6b8c8..3f3e3492e436 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile@@ -19,6 +19,7 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_CMM) += cmm.o +obj-$(CONFIG_HTMDUMP) += htmdump.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.odiff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c new file mode 100644 index 000000000000..54c28525c4a7 --- /dev/null +++ b/arch/powerpc/platforms/pseries/htmdump.c@@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) IBM Corporation, 2024 + */ + +#define pr_fmt(fmt) "htmdump: " fmt + +#include <linux/debugfs.h> + +#include <asm/machdep.h> +#include <asm/plpar_wrappers.h> + +/* This enables us to keep track of the memory removed from each node. */ +struct htmdump_entry { + void *buf; + struct dentry *dir; + char name[16]; +};
How does dir and name gets used? It isn't that obvious, so maybe a comment will be gr8.
+static u32 nodeindex;
+static u32 nodalchipindex;
+static u32 coreindexonchip;
+static u32 htmtype;
+static struct dentry *htmdump_debugfs_dir;
+static struct htmdump_entry *ent;
+
+#define BUFFER_SIZE PAGE_SIZE
+
+static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct htmdump_entry *ent = filp->private_data;
+ unsigned long page, read_size, available;
+ loff_t offset;
+ long rc;
+
+ page = ALIGN_DOWN(*ppos, BUFFER_SIZE);
+ offset = (*ppos) % BUFFER_SIZE;
+
+ rc = htm_get_dump_hardware(nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, virt_to_phys(ent->buf), BUFFER_SIZE, page);
+
+ switch (rc) {
+ case H_SUCCESS:
+ case H_PARTIAL:
+ break;
+ case H_NOT_AVAILABLE:
+ return 0;Minor nits for error returns here... Is returning 0 correct here? Maybe it is (since 0 means no data read), but wanted to confirm if we should return -ENODATA, or -ENODEV (not sure what does H_NOT_AVAILABLE here means) #define ENODATA 61 /* No data available */
+ case H_BUSY: + case H_LONG_BUSY_ORDER_1_MSEC: + case H_LONG_BUSY_ORDER_10_MSEC: + case H_LONG_BUSY_ORDER_100_MSEC: + case H_LONG_BUSY_ORDER_1_SEC: + case H_LONG_BUSY_ORDER_10_SEC: + case H_LONG_BUSY_ORDER_100_SEC:
Similarly for above maybe -EBUSY perhaps, instead of -EINVAL #define EBUSY 16 /* Device or resource busy */
+ case H_PARAMETER:
+ case H_P2:
+ case H_P3:
+ case H_P4:
+ case H_P5:
+ case H_P6:
+ case H_STATE:
+ case H_AUTHORITY:
+ return -EINVAL;
+ }
+
+ available = BUFFER_SIZE - offset;
+ read_size = min(count, available);
+ *ppos += read_size;
+ return simple_read_from_buffer(ubuf, count, &offset, ent->buf, available);
+}
+
+static const struct file_operations htmdump_fops = {
+ .llseek = default_llseek,
+ .read = htmdump_read,
+ .open = simple_open,
+};
+
+static int htmdump_init_debugfs(void)
+{
+ ent = kcalloc(1, sizeof(struct htmdump_entry), GFP_KERNEL);
+ if (!ent) {
+ pr_err("Failed to allocate ent\n");
+ return -EINVAL;return value can be -ENOMEM;
+ }
+
+ ent->buf = kmalloc(BUFFER_SIZE, GFP_KERNEL);
+ if (!ent->buf) {
+ pr_err("Failed to allocate htmdump buf\n");
+ return -ENOMEM;kfree(ent)?
+ }
+
+ pr_debug("%s: ent:%lx buf:%lx\n",
+ __func__, (long unsigned int)ent, (long unsigned int)ent->buf);
+maybe %p perhaps? <Documentation/core-api/printk-formats.rst> Plain Pointers -------------- :: %p abcdef12 or 00000000abcdef12 Pointers printed without a specifier extension (i.e unadorned %p) are hashed to prevent leaking information about the kernel memory layout. This has the added benefit of providing a unique identifier. <...> ...the aim of printing the address is to provide more information for debugging, use %p and boot the kernel with the ``no_hash_pointers`` parameter during debugging, which will print all %p addresses unmodified. If you *really* always want the unmodified address, see %px below.
+ htmdump_debugfs_dir = debugfs_create_dir("htmdump",
+ arch_debugfs_dir);
+
+ debugfs_create_u32("nodeindex", 0600,
+ htmdump_debugfs_dir, &nodeindex);
+ debugfs_create_u32("nodalchipindex", 0600,
+ htmdump_debugfs_dir, &nodalchipindex);
+ debugfs_create_u32("coreindexonchip", 0600,
+ htmdump_debugfs_dir, &coreindexonchip);
+ debugfs_create_u32("htmtype", 0600,
+ htmdump_debugfs_dir, &htmtype);
+ debugfs_create_file("trace", 0400, htmdump_debugfs_dir, ent, &htmdump_fops);
+
+ return 0;
+}
+
+static int __init htmdump_init(void)
+{
+ if (htmdump_init_debugfs())
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __exit htmdump_exit(void)
+{
+ debugfs_remove_recursive(htmdump_debugfs_dir);
+ kfree(ent->buf);
+ kfree(ent);
+}
+
+module_init(htmdump_init);
+module_exit(htmdump_exit);
+MODULE_DESCRIPTION("PHYP Hardware Trace Macro (HTM) data dumper");
+MODULE_LICENSE("GPL");
--
2.45.2