[PATCH net-next v6 1/2] net: mana: Handle SKB if TX SGEs exceed hardware limit
From: Aditya Garg <hidden>
Date: 2025-11-18 11:26:22
Also in:
linux-hyperv, linux-rdma, lkml
Subsystem:
hyper-v/azure core and drivers, networking drivers, networking [general], the rest · Maintainers:
"K. Y. Srinivasan", Haiyang Zhang, Wei Liu, Dexuan Cui, Long Li, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs) per TX WQE. Exceeding this limit can cause TX failures. Add ndo_features_check() callback to validate SKB layout before transmission. For GSO SKBs that would exceed the hardware SGE limit, clear NETIF_F_GSO_MASK to enforce software segmentation in the stack. Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still exceed the SGE limit. Also, Add ethtool counter for SKBs linearized Co-developed-by: Dipayaan Roy <redacted> Signed-off-by: Dipayaan Roy <redacted> Signed-off-by: Aditya Garg <redacted> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> --- Changes in v6: * Replace #if logic with constant expression in if(). Changes in v5: * Drop skb_is_gso() check for disabling GSO in mana_features_check(). * Register .ndo_features_check conditionally to avoid unnecessary call. Changes in v4: * No change. --- drivers/net/ethernet/microsoft/mana/mana_en.c | 40 ++++++++++++++++++- .../ethernet/microsoft/mana/mana_ethtool.c | 2 + include/net/mana/gdma.h | 8 +++- include/net/mana/mana.h | 1 + 4 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 13f47be7aca6..7b49ab005e2d 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c@@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/pci.h> #include <linux/export.h> +#include <linux/skbuff.h> #include <net/checksum.h> #include <net/ip6_checksum.h>
@@ -329,6 +330,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) cq = &apc->tx_qp[txq_idx].tx_cq; tx_stats = &txq->stats; + BUILD_BUG_ON(MAX_TX_WQE_SGL_ENTRIES != MANA_MAX_TX_WQE_SGL_ENTRIES); + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES && + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { + /* GSO skb with Hardware SGE limit exceeded is not expected here + * as they are handled in mana_features_check() callback + */ + if (skb_linearize(skb)) { + netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n", + skb_shinfo(skb)->nr_frags, + skb_is_gso(skb)); + goto tx_drop_count; + } + apc->eth_stats.tx_linear_pkt_cnt++; + } + pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
@@ -442,8 +458,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) } } - WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); - if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { pkg.wqe_req.sgl = pkg.sgl_array; } else {
@@ -518,6 +532,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) +static netdev_features_t mana_features_check(struct sk_buff *skb, + struct net_device *ndev, + netdev_features_t features) +{ + if (skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { + /* Exceeds HW SGE limit. + * GSO case: + * Disable GSO so the stack will software-segment the skb + * into smaller skbs that fit the SGE budget. + * Non-GSO case: + * The xmit path will attempt skb_linearize() as a fallback. + */ + features &= ~NETIF_F_GSO_MASK; + } + return features; +} +#endif + static void mana_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *st) {
@@ -883,6 +916,9 @@ static const struct net_device_ops mana_devops = { .ndo_open = mana_open, .ndo_stop = mana_close, .ndo_select_queue = mana_select_queue, +#if (MAX_SKB_FRAGS + 2 > MANA_MAX_TX_WQE_SGL_ENTRIES) + .ndo_features_check = mana_features_check, +#endif .ndo_start_xmit = mana_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_get_stats64 = mana_get_stats64,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 99e811208683..0e2f4343ac67 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c@@ -18,6 +18,8 @@ static const struct mana_stats_desc mana_eth_stats[] = { {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, tx_cqe_unknown_type)}, + {"tx_linear_pkt_cnt", offsetof(struct mana_ethtool_stats, + tx_linear_pkt_cnt)}, {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, rx_coalesced_err)}, {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 2e4f2f3175e5..a4cf307859f8 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h@@ -486,6 +486,8 @@ struct gdma_wqe { #define INLINE_OOB_SMALL_SIZE 8 #define INLINE_OOB_LARGE_SIZE 24 +#define MANA_MAX_TX_WQE_SGL_ENTRIES 30 + #define MAX_TX_WQE_SIZE 512 #define MAX_RX_WQE_SIZE 256
@@ -592,6 +594,9 @@ enum { #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) #define GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE BIT(6) +/* Driver supports linearizing the skb when num_sge exceeds hardware limit */ +#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20) + /* Driver can send HWC periodically to query stats */ #define GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY BIT(21)
@@ -605,7 +610,8 @@ enum { GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \ GDMA_DRV_CAP_FLAG_1_HW_VPORT_LINK_AWARE | \ - GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY) + GDMA_DRV_CAP_FLAG_1_PERIODIC_STATS_QUERY | \ + GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE) #define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index d37f4cea0ac3..fb28b3cac067 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h@@ -377,6 +377,7 @@ struct mana_ethtool_stats { u64 wake_queue; u64 tx_cqe_err; u64 tx_cqe_unknown_type; + u64 tx_linear_pkt_cnt; u64 rx_coalesced_err; u64 rx_cqe_unknown_type; };
--
2.43.0