Re: [PATCH v3 23/27] powerpc/powernv/pmem: Add debug IOCTLs
From: Frederic Barrat <hidden>
Date: 2020-03-04 15:22:07
Also in:
linux-mm, lkml, nvdimm
Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
quoted hunk ↗ jump to hunk
From: Alastair D'Silva <redacted> These IOCTLs provide low level access to the card to aid in debugging controller/FPGA firmware. Signed-off-by: Alastair D'Silva <redacted> --- arch/powerpc/platforms/powernv/pmem/Kconfig | 6 + arch/powerpc/platforms/powernv/pmem/ocxl.c | 249 ++++++++++++++++++++ include/uapi/nvdimm/ocxl-pmem.h | 32 +++ 3 files changed, 287 insertions(+)diff --git a/arch/powerpc/platforms/powernv/pmem/Kconfig b/arch/powerpc/platforms/powernv/pmem/Kconfig index c5d927520920..3f44429d70c9 100644 --- a/arch/powerpc/platforms/powernv/pmem/Kconfig +++ b/arch/powerpc/platforms/powernv/pmem/Kconfig@@ -12,4 +12,10 @@ config OCXL_PMEM Select N if unsure. +config OCXL_PMEM_DEBUG + bool "OpenCAPI Persistent Memory debugging" + depends on OCXL_PMEM + help + Enables low level IOCTLs for OpenCAPI Persistent Memory firmware development + endifdiff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c index e01f6f9fc180..d4ce5e9e0521 100644 --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c@@ -1050,6 +1050,235 @@ int req_controller_health_perf(struct ocxlpmem *ocxlpmem) GLOBAL_MMIO_HCI_REQ_HEALTH_PERF); } +#ifdef CONFIG_OCXL_PMEM_DEBUG +/** + * enable_fwdebug() - Enable FW debug on the controller + * @ocxlpmem: the device metadata + * Return: 0 on success, negative on failure + */ +static int enable_fwdebug(const struct ocxlpmem *ocxlpmem) +{ + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_HCI_FW_DEBUG); +} + +/** + * disable_fwdebug() - Disable FW debug on the controller + * @ocxlpmem: the device metadata + * Return: 0 on success, negative on failure + */ +static int disable_fwdebug(const struct ocxlpmem *ocxlpmem) +{ + return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCIC, + OCXL_LITTLE_ENDIAN, + GLOBAL_MMIO_HCI_FW_DEBUG); +} + +static int ioctl_fwdebug(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_fwdebug __user *uarg) +{ + struct ioctl_ocxl_pmem_fwdebug args; + u64 val; + int i; + int rc; + + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + // Buffer size must be a multiple of 8 + if ((args.buf_size & 0x07)) + return -EINVAL; + + if (args.buf_size > ocxlpmem->admin_command.data_size) + return -EINVAL; + + mutex_lock(&ocxlpmem->admin_command.lock); + + rc = enable_fwdebug(ocxlpmem); + if (rc) + goto out; + + rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_FW_DEBUG); + if (rc) + goto out; + + // Write DebugAction & FunctionCode + val = ((u64)args.debug_action << 56) | ((u64)args.function_code << 40); + + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.request_offset + 0x08, + OCXL_LITTLE_ENDIAN, val); + if (rc) + goto out; + + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.request_offset + 0x10, + OCXL_LITTLE_ENDIAN, args.debug_parameter_1); + if (rc) + goto out; + + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.request_offset + 0x18, + OCXL_LITTLE_ENDIAN, args.debug_parameter_2); + if (rc) + goto out; + + for (i = 0x20; i < 0x38; i += 0x08) + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, + ocxlpmem->admin_command.request_offset + i, + OCXL_LITTLE_ENDIAN, 0); + if (rc) + goto out;
rc is the for loop body. The rc test is not.
+
+
+ // Populate admin command buffer
+ if (args.buf_size) {
+ for (i = 0; i < args.buf_size; i += sizeof(u64)) {
+ u64 val;
+
+ if (copy_from_user(&val, &args.buf[i], sizeof(u64)))
+ return -EFAULT;need to get rc and goto out because of the mutex
+
+ rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + i,
+ OCXL_HOST_ENDIAN, val);
+ if (rc)
+ goto out;
+ }
+ }
+
+ rc = admin_command_execute(ocxlpmem);
+ if (rc)
+ goto out;
+
+ rc = admin_command_complete_timeout(ocxlpmem,
+ ocxlpmem->timeouts[ADMIN_COMMAND_FW_DEBUG]);
+ if (rc < 0)
+ goto out;
+
+ rc = admin_response(ocxlpmem);
+ if (rc < 0)
+ goto out;
+ if (rc != STATUS_SUCCESS) {
+ warn_status(ocxlpmem, "Unexpected status from FW Debug", rc);
+ goto out;
+ }
+
+ if (args.buf_size) {
+ for (i = 0; i < args.buf_size; i += sizeof(u64)) {
+ u64 val;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ ocxlpmem->admin_command.data_offset + i,
+ OCXL_HOST_ENDIAN, &val);
+ if (rc)
+ goto out;
+
+ if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
+ rc = -EFAULT;
+ goto out;
+ }
+ }
+ }
+
+ rc = admin_response_handled(ocxlpmem);
+ if (rc)
+ goto out;
+
+ rc = disable_fwdebug(ocxlpmem);
+ if (rc)
+ goto out;
+
+out:
+ mutex_unlock(&ocxlpmem->admin_command.lock);
+ return rc;
+}
+
+static int ioctl_shutdown(struct ocxlpmem *ocxlpmem)
+{
+ int rc;
+
+ mutex_lock(&ocxlpmem->admin_command.lock);
+
+ rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_SHUTDOWN);
+ if (rc)
+ goto out;
+
+ rc = admin_command_execute(ocxlpmem);
+ if (rc)
+ goto out;
+
+ rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_SHUTDOWN);
+ if (rc < 0) {
+ dev_warn(&ocxlpmem->dev, "Shutdown timed out\n");
+ goto out;
+ }
+
+ rc = 0;
+ goto out;We can remove that goto. No admin_response_handled()? Is that shutting down the full adapter and we have nobody to talk to? What happens next?
+
+out:
+ mutex_unlock(&ocxlpmem->admin_command.lock);
+ return rc;
+}
+
+static int ioctl_mmio_write(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_mmio __user *uarg)
+{
+ struct scm_ioctl_mmio args;
+
+ if (copy_from_user(&args, uarg, sizeof(args)))
+ return -EFAULT;
+
+ return ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, args.address,
+ OCXL_LITTLE_ENDIAN, args.val);
+}
+
+static int ioctl_mmio_read(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_mmio __user *uarg)
+{
+ struct ioctl_ocxl_pmem_mmio args;
+ int rc;
+
+ if (copy_from_user(&args, uarg, sizeof(args)))
+ return -EFAULT;
+
+ rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, args.address,
+ OCXL_LITTLE_ENDIAN, &args.val);
+ if (rc)
+ return rc;
+
+ if (copy_to_user(uarg, &args, sizeof(args)))
+ return -EFAULT;
+
+ return 0;
+}
+#else /* CONFIG_OCXL_PMEM_DEBUG */
+static int ioctl_fwdebug(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_fwdebug __user *uarg)
+{
+ return -EPERM;
+}
+
+static int ioctl_shutdown(struct ocxlpmem *ocxlpmem)
+{
+ return -EPERM;
+}
+
+static int ioctl_mmio_write(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_mmio __user *uarg)
+{
+ return -EPERM;
+}
+
+static int ioctl_mmio_read(struct ocxlpmem *ocxlpmem,
+ struct ioctl_ocxl_pmem_mmio __user *uarg)
+{
+ return -EPERM;
+}The 'else' clause could be dropped, the ioctls will return EINVAL, which is fine, I think.
quoted hunk ↗ jump to hunk
+#endif /* CONFIG_OCXL_PMEM_DEBUG */ + static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args) { struct ocxlpmem *ocxlpmem = file->private_data;@@ -1091,6 +1320,26 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args) case IOCTL_OCXL_PMEM_REQUEST_HEALTH: rc = req_controller_health_perf(ocxlpmem); break; + + case IOCTL_OCXL_PMEM_FWDEBUG: + rc = ioctl_fwdebug(ocxlpmem, + (struct ioctl_ocxl_pmem_fwdebug __user *)args); + break; + + case IOCTL_OCXL_PMEM_SHUTDOWN: + rc = ioctl_shutdown(ocxlpmem); + break; + + case IOCTL_OCXL_PMEM_MMIO_WRITE: + rc = ioctl_mmio_write(ocxlpmem, + (struct ioctl_ocxl_pmem_mmio __user *)args); + break; + + case IOCTL_OCXL_PMEM_MMIO_READ: + rc = ioctl_mmio_read(ocxlpmem, + (struct ioctl_ocxl_pmem_mmio __user *)args); + break; + } return rc;diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h index 0d03abb44001..e20a4f8be82a 100644 --- a/include/uapi/nvdimm/ocxl-pmem.h +++ b/include/uapi/nvdimm/ocxl-pmem.h@@ -6,6 +6,28 @@ #include <linux/types.h> #include <linux/ioctl.h> +enum ocxlpmem_fwdebug_action { + OCXL_PMEM_FWDEBUG_READ_CONTROLLER_MEMORY = 0x01, + OCXL_PMEM_FWDEBUG_WRITE_CONTROLLER_MEMORY = 0x02, + OCXL_PMEM_FWDEBUG_ENABLE_FUNCTION = 0x03, + OCXL_PMEM_FWDEBUG_DISABLE_FUNCTION = 0x04, + OCXL_PMEM_FWDEBUG_GET_PEL = 0x05, // Retrieve Persistent Error Log +}; + +struct ioctl_ocxl_pmem_buffer_info { + __u32 admin_command_buffer_size; // out + __u32 near_storage_buffer_size; // out +}; + +struct ioctl_ocxl_pmem_fwdebug { // All args are inputs + enum ocxlpmem_fwdebug_action debug_action;
More kernel ABI problems. My interpretation of the "enumeration specifiers" section of C99 is that we can't rely on the size of the enum.
quoted hunk ↗ jump to hunk
+ __u16 function_code; + __u16 buf_size; // Size of optional data buffer + __u64 debug_parameter_1; + __u64 debug_parameter_2; + __u8 *buf; // Pointer to optional in/out data buffer +}; + #define OCXL_PMEM_ERROR_LOG_ACTION_RESET (1 << (32-32)) #define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW (1 << (53-32)) #define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE (1 << (54-32))@@ -66,6 +88,11 @@ struct ioctl_ocxl_pmem_controller_stats { __u64 cache_write_latency; /* nanoseconds */ }; +struct ioctl_ocxl_pmem_mmio { + __u64 address; /* Offset in global MMIO space */ + __u64 val; /* value to write/was read */ +};
Can we group all the debug data structures together in the header file, with a comment indicating that they may not be available in the kernel, depending on the config? Fred
quoted hunk ↗ jump to hunk
+ struct ioctl_ocxl_pmem_eventfd { __s32 eventfd; __u32 reserved;@@ -92,4 +119,9 @@ struct ioctl_ocxl_pmem_eventfd { #define IOCTL_OCXL_PMEM_EVENT_CHECK _IOR(OCXL_PMEM_MAGIC, 0x07, __u64) #define IOCTL_OCXL_PMEM_REQUEST_HEALTH _IO(OCXL_PMEM_MAGIC, 0x08) +#define IOCTL_OCXL_PMEM_FWDEBUG _IOWR(OCXL_PMEM_MAGIC, 0xf0, struct ioctl_ocxl_pmem_fwdebug) +#define IOCTL_OCXL_PMEM_MMIO_WRITE _IOW(OCXL_PMEM_MAGIC, 0xf1, struct ioctl_ocxl_pmem_mmio) +#define IOCTL_OCXL_PMEM_MMIO_READ _IOWR(OCXL_PMEM_MAGIC, 0xf2, struct ioctl_ocxl_pmem_mmio) +#define IOCTL_OCXL_PMEM_SHUTDOWN _IO(OCXL_PMEM_MAGIC, 0xf3) + #endif /* _UAPI_OCXL_SCM_H */