Thread (26 messages) 26 messages, 8 authors, 2017-03-28

[PATCH V13 08/10] ras: acpi / apei: generate trace event for unrecognized CPER section

From: Shiju Jose <hidden>
Date: 2017-03-23 18:44:30
Also in: kvmarm

Tested-by: Shiju Jose <redacted>
quoted hunk ↗ jump to hunk
-----Original Message-----
From: Tyler Baicar [mailto:tbaicar at codeaurora.org]
Sent: 21 March 2017 22:47
To: christoffer.dall at linaro.org; marc.zyngier at arm.com;
pbonzini at redhat.com; rkrcmar at redhat.com; linux at armlinux.org.uk;
catalin.marinas at arm.com; will.deacon at arm.com; rjw at rjwysocki.net;
lenb at kernel.org; matt at codeblueprint.co.uk; robert.moore at intel.com;
lv.zheng at intel.com; nkaje at codeaurora.org; zjzhang at codeaurora.org;
mark.rutland at arm.com; james.morse at arm.com; akpm at linux-foundation.org;
eun.taik.lee at samsung.com; sandeepa.s.prabhu at gmail.com;
labbott at redhat.com; shijie.huang at arm.com; rruigrok at codeaurora.org;
paul.gortmaker at windriver.com; tn at semihalf.com; fu.wei at linaro.org;
rostedt at goodmis.org; bristot at redhat.com; linux-arm-
kernel at lists.infradead.org; kvmarm at lists.cs.columbia.edu;
kvm at vger.kernel.org; linux-kernel at vger.kernel.org; linux-
acpi at vger.kernel.org; linux-efi at vger.kernel.org; devel at acpica.org;
Suzuki.Poulose at arm.com; punit.agrawal at arm.com; astone at redhat.com;
harba at codeaurora.org; hanjun.guo at linaro.org; John Garry; Shiju Jose;
joe at perches.com
Cc: Tyler Baicar
Subject: [PATCH V13 08/10] ras: acpi / apei: generate trace event for
unrecognized CPER section

UEFI spec allows for non-standard section in Common Platform Error
Record. This is defined in section N.2.3 of UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match with any
section type that the kernel knows how to parse, trace event is not
generated for such section. And thus user is not able to know happening
of such hardware error, including error record of non-standard section.

This commit generates a trace event which contains raw error data for
unrecognized CPER section.

Signed-off-by: Tyler Baicar <redacted>
CC: Jonathan (Zhixiong) Zhang <redacted>
---
 drivers/acpi/apei/ghes.c | 24 ++++++++++++++++++++++--
 drivers/ras/ras.c        |  1 +
 include/ras/ras_event.h  | 45
+++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
7e3e5e0..3ecbacc 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -45,11 +45,13 @@
 #include <linux/aer.h>
 #include <linux/nmi.h>
 #include <linux/sched/clock.h>
+#include <linux/uuid.h>

 #include <acpi/actbl1.h>
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
 #include <asm/tlbflush.h>
+#include <ras/ras_event.h>

 #include "apei-internal.h"
@@ -454,11 +456,21 @@ static void ghes_do_proc(struct ghes *ghes,  {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	uuid_le sec_type;
+	uuid_le *fru_id = &NULL_UUID_LE;
+	char *fru_text = "";

 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		sec_type = *(uuid_le *)gdata->section_type;
+
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (uuid_le *)gdata->fru_id;
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+
+		if (!uuid_le_cmp(sec_type,
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
@@ -469,7 +481,7 @@ static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		else if (!uuid_le_cmp(sec_type,
 				      CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
@@ -502,6 +514,14 @@ static void ghes_do_proc(struct ghes *ghes,

 		}
 #endif
+#ifdef CONFIG_RAS
+		else if (trace_unknown_sec_event_enabled()) {
+			void *unknown_err =
acpi_hest_generic_data_payload(gdata);
+			trace_unknown_sec_event(&sec_type,
+					fru_id, fru_text, sec_sev,
+					unknown_err, gdata->error_data_length);
+		}
+#endif
 	}
 }
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index
b67dd36..fb2500b 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -27,3 +27,4 @@ static int __init ras_init(void)
EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index
1791a12..5861b6f 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,51 @@
 );

 /*
+ * Unknown Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(unknown_sec_event,
+
+	TP_PROTO(const uuid_le *sec_type,
+		 const uuid_le *fru_id,
+		 const char *fru_text,
+		 const u8 sev,
+		 const u8 *err,
+		 const u32 len),
+
+	TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+	TP_STRUCT__entry(
+		__array(char, sec_type, 16)
+		__array(char, fru_id, 16)
+		__string(fru_text, fru_text)
+		__field(u8, sev)
+		__field(u32, len)
+		__dynamic_array(u8, buf, len)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->sec_type, sec_type, sizeof(uuid_le));
+		memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+		__assign_str(fru_text, fru_text);
+		__entry->sev = sev;
+		__entry->len = len;
+		memcpy(__get_dynamic_array(buf), err, len);
+	),
+
+	TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d;
raw data:%s",
+		  __entry->sev, __entry->sec_type,
+		  __entry->fru_id, __get_str(fru_text),
+		  __entry->len,
+		  __print_hex(__get_dynamic_array(buf), __entry->len)) );
+
+/*
  * PCIe AER Trace event
  *
  * These events are generated when hardware detects a corrected or
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a
Linux Foundation Collaborative Project.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help