--- v3
+++ v5
@@ -1,55 +1,70 @@
-In the EDR path, AER registers are cleared *after* DPC error event is
-processed. The process stack in EDR is:
+Print the contents of Device Control Register of the device which
+detected the error. This might help in faster error diagnosis.
- edr_handle_event()
- dpc_process_error()
- pci_aer_raw_clear_status()
- pcie_do_recovery()
+It is easy to test this by using aer-inject:
-But in DPC path, AER status registers are cleared *while* processing
-the error. The process stack in DPC is:
+ $ aer-inject -s 00:03:0 corr-err-file
- dpc_handler()
- dpc_process_error()
- pci_aer_clear_status()
- pcie_do_recovery()
+The content of the corr-err-file is as below:
-In EDR path, AER status registers are cleared irrespective of whether
-the error was an RP PIO or unmasked uncorrectable error. But in DPC, the
-AER status registers are cleared only when it's an unmasked uncorrectable
-error.
+ AER
+ COR_STATUS BAD_TLP
+ HEADER_LOG 0 1 2 3
-This leads to two different behaviours for the same task (handling of
-DPC errors) in FFS systems and when native OS has control.
+Sample output from dummy error injected by aer-inject:
-Bring the same semantics for clearing the AER status register in EDR
-path and DPC path.
+ pcieport 0000:00:03.0: AER: Corrected error received: 0000:00:03.0
+ pcieport 0000:00:03.0: PCIe Bus Error: severity=Corrected, type=Data Link Layer, (Receiver)
+ pcieport 0000:00:03.0: device [1b36:000c] error status/mask=00000040/0000e000, devctl=0x000f <-- devctl added to the error log
+ pcieport 0000:00:03.0: [ 6] BadTLP
Signed-off-by: Naveen Naidu <naveennaidu479@gmail.com>
---
- drivers/pci/pcie/dpc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
+ drivers/pci/pci.h | 2 ++
+ drivers/pci/pcie/aer.c | 10 ++++++++--
+ 2 files changed, 10 insertions(+), 2 deletions(-)
-diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
-index faf4a1e77fab..68899a3db126 100644
---- a/drivers/pci/pcie/dpc.c
-+++ b/drivers/pci/pcie/dpc.c
-@@ -288,7 +288,6 @@ void dpc_process_error(struct pci_dev *pdev)
- dpc_get_aer_uncorrect_severity(pdev, &info) &&
- aer_get_device_error_info(pdev, &info)) {
- aer_print_error(pdev, &info);
-- pci_aer_clear_status(pdev);
- }
- }
+diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
+index eb88d8bfeaf7..48ed7f91113b 100644
+--- a/drivers/pci/pci.h
++++ b/drivers/pci/pci.h
+@@ -437,6 +437,8 @@ struct aer_err_info {
+ u32 status; /* COR/UNCOR Error Status */
+ u32 mask; /* COR/UNCOR Error Mask */
+ struct aer_header_log_regs tlp; /* TLP Header */
++
++ u16 devctl;
+ };
-@@ -297,6 +296,7 @@ static irqreturn_t dpc_handler(int irq, void *context)
- struct pci_dev *pdev = context;
+ /* Preliminary AER error information processed from Root port */
+diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
+index d3937f5384e4..fdeef9deb016 100644
+--- a/drivers/pci/pcie/aer.c
++++ b/drivers/pci/pcie/aer.c
+@@ -729,8 +729,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
+ aer_error_severity_string[info->severity],
+ aer_error_layer[layer], aer_agent_string[agent]);
- dpc_process_error(pdev);
-+ pci_aer_clear_status(pdev);
+- pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
+- dev->vendor, dev->device, info->status, info->mask);
++ pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x, devctl=%#06x\n",
++ dev->vendor, dev->device, info->status, info->mask, info->devctl);
- /* We configure DPC so it only triggers on ERR_FATAL */
- pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link);
+ __aer_print_error(dev, info);
+
+@@ -1083,6 +1083,12 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+ if (!aer)
+ return 0;
+
++ /*
++ * Cache the value of Device Control Register now, because later the
++ * device might not be available
++ */
++ pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &info->devctl);
++
+ if (info->severity == AER_CORRECTABLE) {
+ pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS,
+ &info->status);
--
2.25.1