RE: [net-next,v1,7/7] net: wangxun: add pcie error handler
From: Jiawen Wu <jiawenwu@trustnetic.com>
Date: 2026-02-04 06:11:20
On Wed, Feb 4, 2026 10:23 AM, Jakub Kicinski wrote:
This is an AI-generated review of your patch. The human sending this email says "Sending this out without checking since you have to repost. Please note that the AI code review may be wrong especially around defensive programming (which we recommend against)." For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html --- net: wangxun: add pcie error handler Support to check pcie error and invoke aer driver to recover pcie.quoted
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c index bb77bdce69d2..36eb06bd1798 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c[ ... ]quoted
@@ -3,10 +3,228 @@ #include <linux/netdevice.h> #include <linux/pci.h> +#include <linux/aer.h> #include "wx_type.h" #include "wx_lib.h" #include "wx_err.h" +#include "wx_hw.h" + +/** + * wx_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state + * + * This function is called after a PCI bus error affecting + * this device has been detected. + */ +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + + netdev = wx->netdev; + if (!netif_device_present(netdev)) + return PCI_ERS_RESULT_DISCONNECT; + + rtnl_lock(); + netif_device_detach(netdev); + + wx->io_err = true; + if (netif_running(netdev)) + wx->close_suspend(wx); + + if (state == pci_channel_io_perm_failure) { + rtnl_unlock(); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); + rtnl_unlock(); + + /* Request a slot reset. */ + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * wx_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device + * + * Restart the card from scratch, as if from a cold-boot. + */ +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev) +{ + struct wx *wx = pci_get_drvdata(pdev); + pci_ers_result_t result; + + if (pci_enable_device_mem(pdev)) { + wx_err(wx, "Cannot re-enable PCI device after reset.\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + /* make all bar access done before reset. */ + smp_mb__before_atomic(); + clear_bit(WX_STATE_DISABLED, wx->state); + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + pci_wake_from_d3(pdev, false); + + wx->do_reset(wx->netdev, false); + result = PCI_ERS_RESULT_RECOVERED; + } + + pci_aer_clear_nonfatal_status(pdev); + + return result; +} + +/** + * wx_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device + * + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. + */ +static void wx_io_resume(struct pci_dev *pdev) +{ + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + + netdev = wx->netdev; + rtnl_lock(); + if (netif_running(netdev)) + netdev->netdev_ops->ndo_open(netdev); + + wx->io_err = false; + netif_device_attach(netdev); + rtnl_unlock(); +} + +const struct pci_error_handlers wx_err_handler = { + .error_detected = wx_io_error_detected, + .slot_reset = wx_io_slot_reset, + .resume = wx_io_resume, +}; +EXPORT_SYMBOL(wx_err_handler); + +static bool wx_check_pcie_error(struct wx *wx) +{ + u16 vid, pci_cmd, devctl2; + u32 value; + + pci_read_config_word(wx->pdev, PCI_VENDOR_ID, &vid); + wx_warn(wx, "PCI vendor id is 0x%x\n", vid); + pci_read_config_word(wx->pdev, PCI_COMMAND, &pci_cmd); + wx_warn(wx, "PCI command reg is 0x%x\n", pci_cmd); + pcie_capability_read_word(wx->pdev, PCI_EXP_DEVCTL2, &devctl2); + wx_warn(wx, "Device Control2 Register: 0x%04x\n", devctl2); + + value = rd32(wx, WX_MIS_PWR); + wx_warn(wx, "MIS_PWR value is 0x%08x\n", value); + value = rd32(wx, WX_PX_IMS(0)); + wx_warn(wx, "PX_IMS0 value is 0x%08x\n", value); + if (test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) { + value = rd32(wx, WX_PX_IMS(1)); + wx_warn(wx, "PX_IMS1 value is 0x%08x\n", value); + } + value = rd32(wx, WX_TDB_TFCS); + wx_warn(wx, "Tx flow control Status[TDB_TFCS 0xCE00]: 0x%x\n", value); + + /* PCIe link loss or memory space can't access */ + if (vid == WX_FAILED_READ_CFG_WORD || !(pci_cmd & 0x2)) + return true; + + return false; +} + +static void wx_check_error_subtask(struct wx *wx) +{ + u32 sm; + + if (test_bit(WX_FLAG_ERROR_CHECK, wx->flags)) { + /* get PF semaphore */ + wr32(wx, WX_MIS_PF_SM, 1); + clear_bit(WX_FLAG_ERROR_CHECK, wx->flags); + } + + sm = rd32(wx, WX_MIS_PF_SM); + /* PCIe memory space access error */ + if (sm == U32_MAX) + goto out; + + /* PCIe error may be occurred in another port */ + if ((sm == 1 && wx_check_first_lan_up(wx))) + goto out; + + return; +out: + set_bit(WX_FLAG_NEED_PCIE_RECOVER, wx->flags); + wx_warn(wx, "Set PCIe recover on LAN %d\n", wx->bus.func); +} + +static bool wx_check_recovery_capability(struct pci_dev *dev) +{ +#if defined(__i386__) || defined(__x86_64__) + return true; +#else + /* check upstream bridge is root or PLX bridge, + * or if CPU is kunpeng 920 + */ + if (dev->bus->self->vendor == PCI_VENDOR_ID_PLX || + dev->bus->self->vendor == PCI_VENDOR_ID_HUAWEI)Can this dereference dev->bus->self without checking if it's NULL first? When the PCI device is directly attached to the root bus, dev->bus->self is NULL. On non-x86 platforms, this code checks the upstream bridge vendor without verifying that an upstream bridge exists. The function comment states "check upstream bridge is root or PLX bridge" but the code doesn't actually check for the root bus case. The x86 path returns true unconditionally, but the non-x86 path assumes dev->bus->self is non-NULL.
I think the comment here should be revised. On the non-x86 path, it should returns true only when the upstream bridge is PLX or CPU is Kunpeng 920. When dev->bus->self is null, it is necessary to return false. Although in reality, it is not null either when device is directly attached to the root bus.
quoted
+ return true; + else + return false; +#endif +}[ ... ]quoted
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 3171a98d81a6..653a98d9835b 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c@@ -234,6 +234,12 @@ static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr) if (eicr & NGBE_PX_MISC_IC_VF_MBOX) wx_msg_task(wx); + if (eicr & NGBE_PX_MISC_PCIE_REQ_ERR) { + wx_warn(wx, "PCIe Request Error founded on Lan %d\n", + wx->bus.func);This isn't a bug, but there's a typo: 'founded' should be 'found'.quoted
+ wx_pcie_error_handler(wx); + } + if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC)) wx_ptp_check_pps_event(wx);[ ... ]quoted
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index aa14958d439a..c832100bcb61 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c@@ -6,6 +6,7 @@ #include "../libwx/wx_type.h" #include "../libwx/wx_lib.h" +#include "../libwx/wx_err.h" #include "../libwx/wx_ptp.h" #include "../libwx/wx_hw.h" #include "../libwx/wx_sriov.h"@@ -178,6 +179,12 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) handle_nested_irq(sub_irq); nhandled++; } + if (eicr & TXGBE_PX_MISC_PCIE_REQ_ERR) { + wx_warn(wx, "PCIe Request Error founded on Lan %d\n", + wx->bus.func);This isn't a bug, but there's a typo: 'founded' should be 'found'.quoted
+ wx_pcie_error_handler(wx); + nhandled++; + } if (unlikely(eicr & TXGBE_PX_MISC_IC_TIMESYNC)) { wx_ptp_check_pps_event(wx); nhandled++;