Inter-revision diff: patch 5

Comparing v1 (message) to v5 (message)

--- v1
+++ v5
@@ -1,55 +1,70 @@
-In the EDR path, AER registers are cleared *after* DPC error event is
-processed. The process stack in EDR is:
+Print the contents of Device Control Register of the device which
+detected the error. This might help in faster error diagnosis.
 
-  edr_handle_event()
-    dpc_process_error()
-    pci_aer_raw_clear_status()
-    pcie_do_recovery()
+It is easy to test this by using aer-inject:
 
-But in DPC path, AER status registers are cleared *while* processing
-the error. The process stack in DPC is:
+  $ aer-inject -s 00:03:0 corr-err-file
 
-  dpc_handler()
-    dpc_process_error()
-      pci_aer_clear_status()
-    pcie_do_recovery()
+The content of the corr-err-file is as below:
 
-In EDR path, AER status registers are cleared irrespective of whether
-the error was an RP PIO or unmasked uncorrectable error. But in DPC, the
-AER status registers are cleared only when it's an unmasked uncorrectable
-error.
+  AER
+  COR_STATUS BAD_TLP
+  HEADER_LOG 0 1 2 3
 
-This leads to two different behaviours for the same task (handling of
-DPC errors) in FFS systems and when native OS has control.
+Sample output from dummy error injected by aer-inject:
 
-Bring the same semantics for clearing the AER status register in EDR
-path and DPC path.
+  pcieport 0000:00:03.0: AER: Corrected error received: 0000:00:03.0
+  pcieport 0000:00:03.0: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver)
+  pcieport 0000:00:03.0:   device [1b36:000c] error status/mask=00000040/0000e000, devctl=0x000f <-- devctl added to the error log
+  pcieport 0000:00:03.0:    [ 6] BadTLP
 
 Signed-off-by: Naveen Naidu <naveennaidu479@gmail.com>
 ---
- drivers/pci/pcie/dpc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
+ drivers/pci/pci.h      |  2 ++
+ drivers/pci/pcie/aer.c | 10 ++++++++--
+ 2 files changed, 10 insertions(+), 2 deletions(-)
 
-diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
-index faf4a1e77fab..68899a3db126 100644
---- a/drivers/pci/pcie/dpc.c
-+++ b/drivers/pci/pcie/dpc.c
-@@ -288,7 +288,6 @@ void dpc_process_error(struct pci_dev *pdev)
- 		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
- 		 aer_get_device_error_info(pdev, &info)) {
- 		aer_print_error(pdev, &info);
--		pci_aer_clear_status(pdev);
- 	}
- }
+diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
+index eb88d8bfeaf7..48ed7f91113b 100644
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -437,6 +437,8 @@ struct aer_err_info {
+ 	u32 status;		/* COR/UNCOR Error Status */
+ 	u32 mask;		/* COR/UNCOR Error Mask */
+ 	struct aer_header_log_regs tlp;	/* TLP Header */
++
++	u16 devctl;
+ };
  
-@@ -297,6 +296,7 @@ static irqreturn_t dpc_handler(int irq, void *context)
- 	struct pci_dev *pdev = context;
+ /* Preliminary AER error information processed from Root port */
+diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
+index d3937f5384e4..fdeef9deb016 100644
+--- a/drivers/pci/pcie/aer.c
++++ b/drivers/pci/pcie/aer.c
+@@ -729,8 +729,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
+ 		   aer_error_severity_string[info->severity],
+ 		   aer_error_layer[layer], aer_agent_string[agent]);
  
- 	dpc_process_error(pdev);
-+	pci_aer_clear_status(pdev);
+-	pci_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
+-		   dev->vendor, dev->device, info->status, info->mask);
++	pci_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x, devctl=%#06x\n",
++		   dev->vendor, dev->device, info->status, info->mask, info->devctl);
  
- 	/* We configure DPC so it only triggers on ERR_FATAL */
- 	pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link);
+ 	__aer_print_error(dev, info);
+ 
+@@ -1083,6 +1083,12 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+ 	if (!aer)
+ 		return 0;
+ 
++	/*
++	 * Cache the value of Device Control Register now, because later the
++	 * device might not be available
++	 */
++	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &info->devctl);
++
+ 	if (info->severity == AER_CORRECTABLE) {
+ 		pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS,
+ 			&info->status);
 -- 
 2.25.1
 
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help