RE: [PATCH net-next v4 2/5] net: wangxun: add Tx timeout process
From: Jiawen Wu <jiawenwu@trustnetic.com>
Date: 2026-06-03 02:23:53
On Tue, Jun 2, 2026 6:32 PM, Larysa Zaremba wrote:
On Mon, Jun 01, 2026 at 03:22:18PM +0800, Jiawen Wu wrote:quoted
Implement .ndo_tx_timeout to handle Tx side timeout event. When a Tx timeout event occur, it will trigger driver into reset process. The WX_HANG_CHECK_ARMED bit is set to indicate a potential hang. It will be cleared if a pause frame is received to avoid false hang detection caused by pause frames.In general, logic seems sound, below 1 small nit. There is also a seemingly sensible comment from Sashiko (pasted below), which I agree with. If you not schedule NAPI every time to check for hang, then without Rx you are relying on dev_watchdog anyway, so maybe internal hang detection is not that necessary?
The purpose of WX_TX_DETECT_HANG is to reuse the existing Tx cleanup path and detect hangs opportunistically when NAPI is already running due to normal Tx/Rx interrupt activity. For workloads with active traffic, this allows earlier detection than the generic dev_watchdog() timeout and provides driver-specific diagnostics before a full reset is triggered.
quoted
Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com> --- drivers/net/ethernet/wangxun/libwx/Makefile | 2 +- drivers/net/ethernet/wangxun/libwx/wx_err.c | 170 ++++++++++++++++++ drivers/net/ethernet/wangxun/libwx/wx_err.h | 16 ++ drivers/net/ethernet/wangxun/libwx/wx_hw.c | 17 +- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 37 ++++ drivers/net/ethernet/wangxun/libwx/wx_type.h | 19 +- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 14 ++ .../net/ethernet/wangxun/txgbe/txgbe_main.c | 14 ++ 8 files changed, 284 insertions(+), 5 deletions(-) create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_err.c create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_err.hdiff --git a/drivers/net/ethernet/wangxun/libwx/Makefile b/drivers/net/ethernet/wangxun/libwx/Makefile index a71b0ad77de3..c8724bb129aa 100644 --- a/drivers/net/ethernet/wangxun/libwx/Makefile +++ b/drivers/net/ethernet/wangxun/libwx/Makefile@@ -4,5 +4,5 @@ obj-$(CONFIG_LIBWX) += libwx.o -libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o wx_mbx.o wx_sriov.o +libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o wx_mbx.o wx_sriov.o wx_err.o libwx-objs += wx_vf.o wx_vf_lib.o wx_vf_common.odiff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c new file mode 100644 index 000000000000..982a438d009e --- /dev/null +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c@@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2026 Beijing WangXun Technology Co., Ltd. */ +/* Copyright (c) 1999 - 2026 Intel Corporation. */ + +#include <linux/netdevice.h> +#include <linux/pci.h> + +#include "wx_type.h" +#include "wx_lib.h" +#include "wx_err.h" + +static void wx_pf_reset_subtask(struct wx *wx) +{ + if (!test_and_clear_bit(WX_FLAG_NEED_PF_RESET, wx->flags)) + return; + + wx_warn(wx, "Reset adapter.\n"); + if (wx->do_reset) + wx->do_reset(wx->netdev); +} + +static void wx_reset_task(struct work_struct *work) +{ + struct wx *wx = container_of(work, struct wx, reset_task); + + rtnl_lock(); + + if (test_bit(WX_STATE_DOWN, wx->state) || + test_bit(WX_STATE_RESETTING, wx->state)) + goto out; + + wx_pf_reset_subtask(wx); + +out: + rtnl_unlock(); +} + +void wx_check_err_subtask(struct wx *wx) +{ + if (test_bit(WX_FLAG_NEED_PF_RESET, wx->flags)) + queue_work(wx->reset_wq, &wx->reset_task); +} +EXPORT_SYMBOL(wx_check_err_subtask); + +int wx_init_err_task(struct wx *wx) +{ + wx->reset_wq = alloc_workqueue("wx_reset_wq", WQ_UNBOUND | WQ_HIGHPRI, 1); + if (!wx->reset_wq) { + pr_err("Failed to create wx_reset_wq workqueue\n");This driver does not generally use pr_err().
I'll change it to wx_err().
quoted
+ return -ENOMEM; + } + + INIT_WORK(&wx->reset_task, wx_reset_task); + return 0; +} +EXPORT_SYMBOL(wx_init_err_task); + +static bool wx_ring_tx_pending(struct wx *wx) +{ + int i; + + for (i = 0; i < wx->num_tx_queues; i++) { + struct wx_ring *tx_ring = wx->tx_ring[i]; + + if (tx_ring->next_to_use != tx_ring->next_to_clean) + return true; + } + + return false; +} + +static bool wx_vf_tx_pending(struct wx *wx) +{ + struct wx_ring_feature *vmdq = &wx->ring_feature[RING_F_VMDQ]; + u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask); + u32 i, j; + + if (!wx->num_vfs) + return false; + + for (i = 0; i < wx->num_vfs; i++) { + for (j = 0; j < q_per_pool; j++) { + u32 h, t; + + h = rd32(wx, WX_PX_TR_RP_PV(q_per_pool, i, j)); + t = rd32(wx, WX_PX_TR_WP_PV(q_per_pool, i, j)); + + if (h != t) + return true; + } + } + + return false; +} + +static void wx_watchdog_flush_tx(struct wx *wx) +{ + if (!netif_running(wx->netdev)) + return; + if (netif_carrier_ok(wx->netdev)) + return; + + if (wx_ring_tx_pending(wx) || wx_vf_tx_pending(wx)) { + /* We've lost link, so the controller stops DMA, + * but we've got queued Tx work that's never going + * to get done, so reset controller to flush Tx. + * (Do the reset outside of interrupt context). + */ + wx_warn(wx, "initiating reset due to lost link with pending Tx work\n"); + set_bit(WX_FLAG_NEED_PF_RESET, wx->flags); + } +} + +static void wx_check_tx_hang(struct wx *wx) +{ + int i; + + /* If we're down or resetting, just bail */ + if (!netif_running(wx->netdev) || + test_bit(WX_STATE_RESETTING, wx->state)) + return; + + /* Force detection of hung controller */ + if (netif_carrier_ok(wx->netdev)) { + for (i = 0; i < wx->num_tx_queues; i++) + set_bit(WX_TX_DETECT_HANG, wx->tx_ring[i]->state);Sashiko says: If a software interrupt is not triggered, wouldn't the evaluation of this flag in wx_clean_tx_irq() fail to run unless incoming Rx traffic happens to arrive on the same interrupt vector?quoted
+ } +} + +void wx_check_tx_hang_subtask(struct wx *wx) +{ + wx_watchdog_flush_tx(wx); + wx_check_tx_hang(wx); +} +EXPORT_SYMBOL(wx_check_tx_hang_subtask); + +static void wx_tx_timeout_reset(struct wx *wx) +{ + if (test_bit(WX_STATE_DOWN, wx->state)) + return; + + set_bit(WX_FLAG_NEED_PF_RESET, wx->flags); + wx_warn(wx, "initiating reset due to tx timeout\n"); + wx_service_event_schedule(wx); +} + +void wx_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue) +{ + struct wx *wx = netdev_priv(netdev); + + wx_tx_timeout_reset(wx); +} +EXPORT_SYMBOL(wx_tx_timeout); + +void wx_handle_tx_hang(struct wx_ring *tx_ring, unsigned int next) +{ + struct wx *wx = netdev_priv(tx_ring->netdev); + + wx_warn(wx, + "Detected Tx Unit Hang: Queue %d, TDH %x, TDT %x, ntu %x, ntc %x, ntc.time_stamp %lx, jiffies %lx\n", + tx_ring->queue_index, + rd32(wx, WX_PX_TR_RP(tx_ring->reg_idx)), + rd32(wx, WX_PX_TR_WP(tx_ring->reg_idx)), + tx_ring->next_to_use, next, + tx_ring->tx_buffer_info[next].time_stamp, jiffies); + + netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + + wx_tx_timeout_reset(wx); +}[...]