Re: Re: [PATCH net v2] net: stmmac: fix transmit queue timed out after resume
From: "Russell King (Oracle)" <linux@armlinux.org.uk>
Date: 2026-01-15 12:09:40
Also in:
linux-doc, lkml
Subsystem:
networking drivers, stmmac ethernet driver, the rest · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
On Thu, Jan 15, 2026 at 03:08:53PM +0800, Tao Wang wrote:
quoted
While I agree with the change for stmmac_tso_xmit(), please explain why the change in stmmac_free_tx_buffer() is necessary. It seems to me that if this is missing in stmmac_free_tx_buffer(), the driver should have more problems than just TSO.The change in stmmac_free_tx_buffer() is intended to be generic for all users of last_segment, not only for the TSO path.
However, transmit is a hotpath, so work needs to be minimised for good performance. We don't want anything that is unnecessary in these paths. If we always explicitly set .last_segment when adding any packet to the ring, then there is absolutely no need to also do so when freeing them. Also, I think there's a similar issue with .is_jumbo. So, I think it would make more sense to have some helpers for setting up the tx_skbuff_dma entry. Maybe something like the below? I'll see if I can measure the performance impact of this later today, but I can't guarantee I'll get to that. The idea here is to ensure that all members with the exception of xsk_meta are fully initialised when an entry is populated. I haven't removed anything in the tx_q->tx_skbuff_dma entry release path yet, but with this in place, we should be able to eliminate the clearance of these in stmmac_tx_clean() and stmmac_free_tx_buffer(). Note that the driver assumes setting .buf to zero means the entry is cleared. dma_addr_t is a cookie which is device specific, and zero may be a valid DMA cookie. Only DMA_MAPPING_ERROR is invalid, and can be assumed to hold any meaning in driver code. So that needs fixing as well.
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a8a78fe7d01f..0e605d0f6a94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c@@ -1874,6 +1874,34 @@ static int init_dma_rx_desc_rings(struct net_device *dev, return ret; } +static void stmmac_set_tx_dma_entry(struct stmmac_tx_queue *tx_q, + unsigned int entry, + enum stmmac_txbuf_type type, + dma_addr_t addr, size_t len, + bool map_as_page) +{ + tx_q->tx_skbuff_dma[entry].buf = addr; + tx_q->tx_skbuff_dma[entry].len = len; + tx_q->tx_skbuff_dma[entry].buf_type = type; + tx_q->tx_skbuff_dma[entry].map_as_page = map_as_page; + tx_q->tx_skbuff_dma[entry].last_segment = false; + tx_q->tx_skbuff_dma[entry].is_jumbo = false; +} + +static void stmmac_set_tx_skb_dma_entry(struct stmmac_tx_queue *tx_q, + unsigned int entry, dma_addr_t addr, + size_t len, bool map_as_page) +{ + stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_SKB, addr, len, + map_as_page); +} + +static void stmmac_set_tx_dma_last_segment(struct stmmac_tx_queue *tx_q, + unsigned int entry) +{ + tx_q->tx_skbuff_dma[entry].last_segment = true; +} + /** * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue) * @priv: driver private structure
@@ -1919,11 +1947,8 @@ static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, p = tx_q->dma_tx + i; stmmac_clear_desc(priv, p); + stmmac_set_tx_skb_dma_entry(tx_q, i, 0, 0, false); - tx_q->tx_skbuff_dma[i].buf = 0; - tx_q->tx_skbuff_dma[i].map_as_page = false; - tx_q->tx_skbuff_dma[i].len = 0; - tx_q->tx_skbuff_dma[i].last_segment = false; tx_q->tx_skbuff[i] = NULL; }
@@ -2649,19 +2674,15 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) meta = xsk_buff_get_metadata(pool, xdp_desc.addr); xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len); - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX; - /* To return XDP buffer to XSK pool, we simple call * xsk_tx_completed(), so we don't need to fill up * 'buf' and 'xdpf'. */ - tx_q->tx_skbuff_dma[entry].buf = 0; - tx_q->xdpf[entry] = NULL; + stmmac_set_tx_dma_entry(tx_q, entry, STMMAC_TXBUF_T_XSK_TX, + 0, xdp_desc.len, false); + stmmac_set_tx_dma_last_segment(tx_q, entry); - tx_q->tx_skbuff_dma[entry].map_as_page = false; - tx_q->tx_skbuff_dma[entry].len = xdp_desc.len; - tx_q->tx_skbuff_dma[entry].last_segment = true; - tx_q->tx_skbuff_dma[entry].is_jumbo = false; + tx_q->xdpf[entry] = NULL; stmmac_set_desc_addr(priv, tx_desc, dma_addr);
@@ -2836,6 +2857,9 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, tx_q->tx_skbuff_dma[entry].map_as_page = false; } + /* This looks at tx_q->tx_skbuff_dma[tx_q->dirty_tx].is_jumbo + * and tx_q->tx_skbuff_dma[tx_q->dirty_tx].last_segment + */ stmmac_clean_desc3(priv, tx_q, p); tx_q->tx_skbuff_dma[entry].last_segment = false;
@@ -4494,10 +4518,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) * this DMA buffer right after the DMA engine completely finishes the * full buffer transmission. */ - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des; - tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_headlen(skb); - tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = false; - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; + stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des, skb_headlen(skb), + false); /* Prepare fragments */ for (i = 0; i < nfrags; i++) {
@@ -4512,17 +4534,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_tso_allocator(priv, des, skb_frag_size(frag), (i == nfrags - 1), queue); - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des; - tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag); - tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true; - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; + stmmac_set_tx_skb_dma_entry(tx_q, tx_q->cur_tx, des, + skb_frag_size(frag), true); } - tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true; + stmmac_set_tx_dma_last_segment(tx_q, tx_q->cur_tx); /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[tx_q->cur_tx] = skb; - tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; /* Manage tx mitigation */ tx_packets = (tx_q->cur_tx + 1) - first_tx;
@@ -4774,23 +4793,18 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (dma_mapping_error(priv->device, des)) goto dma_map_err; /* should reuse desc w/o issues */ - tx_q->tx_skbuff_dma[entry].buf = des; - + stmmac_set_tx_skb_dma_entry(tx_q, entry, des, len, true); stmmac_set_desc_addr(priv, desc, des); - tx_q->tx_skbuff_dma[entry].map_as_page = true; - tx_q->tx_skbuff_dma[entry].len = len; - tx_q->tx_skbuff_dma[entry].last_segment = last_segment; - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB; - /* Prepare the descriptor and set the own bit too */ stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion, priv->mode, 1, last_segment, skb->len); } + stmmac_set_tx_dma_last_segment(tx_q, entry); + /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[entry] = skb; - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB; /* According to the coalesce parameter the IC bit for the latest * segment is reset and the timer re-started to clean the tx status.
@@ -4869,14 +4883,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if (dma_mapping_error(priv->device, des)) goto dma_map_err; - tx_q->tx_skbuff_dma[first_entry].buf = des; - tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB; - tx_q->tx_skbuff_dma[first_entry].map_as_page = false; + stmmac_set_tx_skb_dma_entry(tx_q, first_entry, des, nopaged_len, + false); stmmac_set_desc_addr(priv, first, des); - tx_q->tx_skbuff_dma[first_entry].len = nopaged_len; - tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment; + if (last_segment) + stmmac_set_tx_dma_last_segment(tx_q, first_entry); if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)) {
@@ -5064,6 +5077,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; unsigned int entry = tx_q->cur_tx; + enum stmmac_txbuf_type buf_type; struct dma_desc *tx_desc; dma_addr_t dma_addr; bool set_ic;
@@ -5091,7 +5105,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, if (dma_mapping_error(priv->device, dma_addr)) return STMMAC_XDP_CONSUMED; - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO; + buf_type = STMMAC_TXBUF_T_XDP_NDO; } else { struct page *page = virt_to_page(xdpf->data);
@@ -5100,14 +5114,12 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, dma_sync_single_for_device(priv->device, dma_addr, xdpf->len, DMA_BIDIRECTIONAL); - tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX; + buf_type = STMMAC_TXBUF_T_XDP_TX; } - tx_q->tx_skbuff_dma[entry].buf = dma_addr; - tx_q->tx_skbuff_dma[entry].map_as_page = false; - tx_q->tx_skbuff_dma[entry].len = xdpf->len; - tx_q->tx_skbuff_dma[entry].last_segment = true; - tx_q->tx_skbuff_dma[entry].is_jumbo = false; + stmmac_set_tx_dma_entry(tx_q, entry, buf_type, dma_addr, xdpf->len, + false); + stmmac_set_tx_dma_last_segment(tx_q, entry); tx_q->xdpf[entry] = xdpf;
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!