Re: [PATCH net-next v4 5/5] net: wangxun: implement pci_error_handlers ops
From: Larysa Zaremba <hidden>
Date: 2026-06-02 11:30:34
On Mon, Jun 01, 2026 at 03:22:21PM +0800, Jiawen Wu wrote:
quoted hunk ↗ jump to hunk
Support AER driver to handle the PCIe errors. Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com> --- drivers/net/ethernet/wangxun/libwx/wx_err.c | 111 ++++++++++++++++++ drivers/net/ethernet/wangxun/libwx/wx_err.h | 2 + drivers/net/ethernet/wangxun/libwx/wx_type.h | 1 + drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 9 +- .../net/ethernet/wangxun/txgbe/txgbe_main.c | 8 +- 5 files changed, 127 insertions(+), 4 deletions(-)diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c index e249cea874be..ae5d14071f3d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c@@ -4,11 +4,122 @@ #include <linux/netdevice.h> #include <linux/pci.h> +#include <linux/aer.h> #include "wx_type.h" #include "wx_lib.h" #include "wx_err.h" +/** + * wx_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state + * + * Return: pci_ers_result_t. + * + * This function is called after a PCI bus error affecting + * this device has been detected. + */ +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + + if (!wx) + return PCI_ERS_RESULT_DISCONNECT; + + netdev = wx->netdev; + if (!netif_device_present(netdev)) + return PCI_ERS_RESULT_DISCONNECT; + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + rtnl_lock(); + netif_device_detach(netdev);
Sashiko says: "Calling netif_device_detach() stops new packets from entering the TX queue but doesn't wait for currently executing ndo_start_xmit operations to complete, which netif_tx_disable() would do." Device detaching does not grab __netif_tx_lock(), so it does seem like netif_device_detach() leaves some race possibilities.
quoted hunk ↗ jump to hunk
+ + if (netif_running(netdev) && wx->soft_quiesce) + wx->soft_quiesce(wx); + + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); + rtnl_unlock(); + + /* Request a slot reset. */ + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * wx_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device + * + * Return: pci_ers_result_t. + * + * Restart the card from scratch, as if from a cold-boot. + */ +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev) +{ + struct wx *wx = pci_get_drvdata(pdev); + pci_ers_result_t result; + + if (pci_enable_device_mem(pdev)) { + wx_err(wx, "Cannot re-enable PCI device after reset.\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + /* make all memory operations done before clearing the flag */ + smp_mb__before_atomic(); + clear_bit(WX_STATE_DISABLED, wx->state); + pci_set_master(pdev); + pci_restore_state(pdev); + pci_wake_from_d3(pdev, false); + + rtnl_lock(); + if (wx->do_reset) + wx->do_reset(wx->netdev, false); + rtnl_unlock(); + result = PCI_ERS_RESULT_RECOVERED; + } + + pci_aer_clear_nonfatal_status(pdev); + + return result; +} + +/** + * wx_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device + * + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. + */ +static void wx_io_resume(struct pci_dev *pdev) +{ + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + int err; + + netdev = wx->netdev; + rtnl_lock(); + if (netif_running(netdev)) { + err = netdev->netdev_ops->ndo_open(netdev); + if (err) { + wx_err(wx, "Failed to open netdev after reset\n"); + goto out; + } + } + netif_device_attach(netdev); +out: + rtnl_unlock(); +} + +const struct pci_error_handlers wx_err_handler = { + .error_detected = wx_io_error_detected, + .slot_reset = wx_io_slot_reset, + .resume = wx_io_resume, +}; +EXPORT_SYMBOL(wx_err_handler); + static void wx_pf_reset_subtask(struct wx *wx) { if (!test_and_clear_bit(WX_FLAG_NEED_PF_RESET, wx->flags))diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.h b/drivers/net/ethernet/wangxun/libwx/wx_err.h index a529bf54eeac..de5d02316230 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_err.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.h@@ -7,6 +7,8 @@ #ifndef _WX_ERR_H_ #define _WX_ERR_H_ +extern const struct pci_error_handlers wx_err_handler; + void wx_check_err_subtask(struct wx *wx); int wx_init_err_task(struct wx *wx); void wx_check_tx_hang_subtask(struct wx *wx);diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 1b25a52188f7..778e186e4555 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h@@ -1221,6 +1221,7 @@ enum wx_state { WX_STATE_PTP_RUNNING, WX_STATE_PTP_TX_IN_PROGRESS, WX_STATE_SERVICE_SCHED, + WX_STATE_DISABLED, WX_STATE_NBITS /* must be last */ };diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 2484d3177034..8e5a708ea809 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c@@ -589,7 +589,8 @@ static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) *enable_wake = !!wufc; wx_control_hw(wx, false); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static void ngbe_shutdown(struct pci_dev *pdev)@@ -879,6 +880,7 @@ static int ngbe_probe(struct pci_dev *pdev, goto err_register; pci_set_drvdata(pdev, wx); + pci_save_state(pdev); return 0;@@ -934,7 +936,8 @@ static void ngbe_remove(struct pci_dev *pdev) kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static int ngbe_suspend(struct pci_dev *pdev, pm_message_t state)@@ -961,6 +964,7 @@ static int ngbe_resume(struct pci_dev *pdev) wx_err(wx, "Cannot enable PCI device from suspend\n"); return err; } + clear_bit(WX_STATE_DISABLED, wx->state); pci_set_master(pdev); device_wakeup_disable(&pdev->dev);@@ -985,6 +989,7 @@ static struct pci_driver ngbe_driver = { .resume = ngbe_resume, .shutdown = ngbe_shutdown, .sriov_configure = wx_pci_sriov_configure, + .err_handler = &wx_err_handler, }; module_pci_driver(ngbe_driver);diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 816aba4a9099..e913e3553ced 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c@@ -570,7 +570,8 @@ static void txgbe_dev_shutdown(struct pci_dev *pdev) wx_control_hw(wx, false); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static void txgbe_shutdown(struct pci_dev *pdev)@@ -922,6 +923,7 @@ static int txgbe_probe(struct pci_dev *pdev, goto err_remove_phy; pci_set_drvdata(pdev, wx); + pci_save_state(pdev); netif_tx_stop_all_queues(netdev);@@ -996,7 +998,8 @@ static void txgbe_remove(struct pci_dev *pdev) kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static struct pci_driver txgbe_driver = {@@ -1006,6 +1009,7 @@ static struct pci_driver txgbe_driver = { .remove = txgbe_remove, .shutdown = txgbe_shutdown, .sriov_configure = wx_pci_sriov_configure, + .err_handler = &wx_err_handler, }; module_pci_driver(txgbe_driver);-- 2.51.0