[PATCH v1 13/18] ibmveth: Add per-queue TX statistics reporting
From: Mingming Cao <hidden>
Date: 2026-06-30 14:55:16
Also in:
netdev
Subsystem:
ibm power virtual ethernet device driver, linux for powerpc (32-bit and 64-bit), networking drivers, the rest · Maintainers:
Nick Child, Madhavan Srinivasan, Michael Ellerman, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
Track transmit counters per TX queue to avoid cache line contention in the xmit hot path and expose per-queue visibility via ethtool -S and ndo_get_stats64() aggregation. Global tx_large_packets and tx_send_failed continue to be aggregated on the ethtool read path for backward compatibility with existing tools. Signed-off-by: Mingming Cao <redacted> Reviewed-by: Dave Marquardt <redacted> --- drivers/net/ethernet/ibm/ibmveth.c | 129 +++++++++++++++++++++++++---- drivers/net/ethernet/ibm/ibmveth.h | 13 +++ 2 files changed, 124 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 1c08082ffbd6..4e3f49b6346f 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c@@ -252,6 +252,33 @@ static void ibmveth_free_rx_qstats(struct ibmveth_adapter *adapter) adapter->rx_qstats = NULL; } +/** + * ibmveth_alloc_tx_qstats - Allocate per-queue TX statistics + * @adapter: ibmveth adapter structure + * + * Return: 0 on success, -ENOMEM on failure + */ +static int ibmveth_alloc_tx_qstats(struct ibmveth_adapter *adapter) +{ + adapter->tx_qstats = kcalloc(IBMVETH_MAX_QUEUES, + sizeof(struct ibmveth_tx_queue_stats), + GFP_KERNEL); + if (!adapter->tx_qstats) + return -ENOMEM; + + return 0; +} + +/** + * ibmveth_free_tx_qstats - Free per-queue TX statistics + * @adapter: ibmveth adapter structure + */ +static void ibmveth_free_tx_qstats(struct ibmveth_adapter *adapter) +{ + kfree(adapter->tx_qstats); + adapter->tx_qstats = NULL; +} + /** * ibmveth_alloc_rx_queues - Allocate per-queue RX resources * @adapter: ibmveth adapter structure
@@ -1628,6 +1655,10 @@ static int ibmveth_open(struct net_device *netdev) if (rc) goto out_cleanup_rx_interrupts; + rc = ibmveth_alloc_tx_qstats(adapter); + if (rc) + goto out_free_tx_resources; + netif_tx_start_all_queues(netdev); netdev_dbg(netdev, "open complete\n");
@@ -1668,6 +1699,7 @@ static int ibmveth_close(struct net_device *netdev) } } + ibmveth_free_tx_qstats(adapter); ibmveth_free_tx_resources(adapter); ibmveth_cleanup_rx_interrupts(adapter); ibmveth_update_rx_no_buffer(adapter);
@@ -1960,6 +1992,32 @@ static void ibmveth_aggregate_rx_qstats(struct ibmveth_adapter *adapter) adapter->rx_large_packets = total_large; } +/** + * ibmveth_aggregate_tx_qstats - Sum per-queue TX stats into globals + * @adapter: ibmveth adapter + * + * Cold path only (ethtool). Keeps legacy global counters meaningful for + * tools that read the adapter-level fields in ibmveth_stats[]. + */ +static void ibmveth_aggregate_tx_qstats(struct ibmveth_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + u64 total_large = 0; + u64 total_send_failed = 0; + int i; + + if (!adapter->tx_qstats) + return; + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + total_large += adapter->tx_qstats[i].large_packets; + total_send_failed += adapter->tx_qstats[i].send_failures; + } + + adapter->tx_large_packets = total_large; + adapter->tx_send_failed = total_send_failed; +} + static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct ibmveth_adapter *adapter = netdev_priv(dev);
@@ -1984,6 +2042,15 @@ static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data) ethtool_sprintf(&p, "rx%d_no_buffer_drops", i); } + for (i = 0; i < dev->real_num_tx_queues; i++) { + ethtool_sprintf(&p, "tx%d_packets", i); + ethtool_sprintf(&p, "tx%d_bytes", i); + ethtool_sprintf(&p, "tx%d_large_packets", i); + ethtool_sprintf(&p, "tx%d_dropped_packets", i); + ethtool_sprintf(&p, "tx%d_send_failures", i); + ethtool_sprintf(&p, "tx%d_checksum_offload", i); + } + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { ethtool_sprintf(&p, "pool%d_size", i); ethtool_sprintf(&p, "pool%d_active", i);
@@ -1999,6 +2066,7 @@ static int ibmveth_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return ARRAY_SIZE(ibmveth_stats) + adapter->num_rx_queues * IBMVETH_NUM_RX_QSTATS + + dev->real_num_tx_queues * IBMVETH_NUM_TX_QSTATS + IBMVETH_NUM_BUFF_POOLS * 3; default: return -EOPNOTSUPP;
@@ -2012,6 +2080,7 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev, int i, j; ibmveth_aggregate_rx_qstats(adapter); + ibmveth_aggregate_tx_qstats(adapter); for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++) data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
@@ -2030,6 +2099,19 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev, } } + for (j = 0; j < dev->real_num_tx_queues; j++) { + if (adapter->tx_qstats) { + data[i++] = adapter->tx_qstats[j].packets; + data[i++] = adapter->tx_qstats[j].bytes; + data[i++] = adapter->tx_qstats[j].large_packets; + data[i++] = adapter->tx_qstats[j].dropped_packets; + data[i++] = adapter->tx_qstats[j].send_failures; + data[i++] = adapter->tx_qstats[j].checksum_offload; + } else { + i += IBMVETH_NUM_TX_QSTATS; + } + } + for (j = 0; j < IBMVETH_NUM_BUFF_POOLS; j++) { data[i++] = adapter->rx_buff_pool[0][j].size; data[i++] = adapter->rx_buff_pool[0][j].active;
@@ -2152,8 +2234,10 @@ static int ibmveth_send(struct ibmveth_adapter *adapter, } static int ibmveth_is_packet_unsupported(struct sk_buff *skb, - struct net_device *netdev) + struct ibmveth_adapter *adapter, + int queue_num) { + struct net_device *netdev = adapter->netdev; struct ethhdr *ether_header; int ret = 0;
@@ -2161,7 +2245,8 @@ static int ibmveth_is_packet_unsupported(struct sk_buff *skb, if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) { netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n"); - netdev->stats.tx_dropped++; + if (adapter->tx_qstats) + adapter->tx_qstats[queue_num].dropped_packets++; ret = -EOPNOTSUPP; }
@@ -2177,7 +2262,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, int i, queue_num = skb_get_queue_mapping(skb); unsigned long mss = 0; - if (ibmveth_is_packet_unsupported(skb, netdev)) + if (ibmveth_is_packet_unsupported(skb, adapter, queue_num)) goto out; /* veth can't checksum offload UDP */ if (skb->ip_summed == CHECKSUM_PARTIAL &&
@@ -2188,7 +2273,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, skb_checksum_help(skb)) { netdev_err(netdev, "tx: failed to checksum packet\n"); - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].dropped_packets++; goto out; }
@@ -2200,6 +2285,8 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD); + adapter->tx_qstats[queue_num].checksum_offload++; + /* Need to zero out the checksum */ buf[0] = 0; buf[1] = 0;
@@ -2211,7 +2298,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) { if (adapter->fw_large_send_support) { mss = (unsigned long)skb_shinfo(skb)->gso_size; - adapter->tx_large_packets++; + adapter->tx_qstats[queue_num].large_packets++; } else if (!skb_is_gso_v6(skb)) { /* Put -1 in the IP checksum to tell phyp it * is a largesend packet. Put the mss in
@@ -2220,7 +2307,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, ip_hdr(skb)->check = 0xffff; tcp_hdr(skb)->check = cpu_to_be16(skb_shinfo(skb)->gso_size); - adapter->tx_large_packets++; + adapter->tx_qstats[queue_num].large_packets++; } }
@@ -2228,7 +2315,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, if (unlikely(skb->len > adapter->tx_ltb_size)) { netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n", skb->len, adapter->tx_ltb_size); - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].dropped_packets++; goto out; } memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb));
@@ -2245,7 +2332,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, if (unlikely(total_bytes != skb->len)) { netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n", skb->len, total_bytes); - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].dropped_packets++; goto out; } desc.fields.flags_len = desc_flags | skb->len;
@@ -2254,11 +2341,11 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, dma_wmb(); if (ibmveth_send(adapter, desc.desc, mss)) { - adapter->tx_send_failed++; - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].send_failures++; + adapter->tx_qstats[queue_num].dropped_packets++; } else { - netdev->stats.tx_packets++; - netdev->stats.tx_bytes += skb->len; + adapter->tx_qstats[queue_num].packets++; + adapter->tx_qstats[queue_num].bytes += skb->len; } out:
@@ -2759,12 +2846,13 @@ static netdev_features_t ibmveth_features_check(struct sk_buff *skb, } /** - * ibmveth_get_stats64 - Return aggregated per-queue RX statistics + * ibmveth_get_stats64 - Return aggregated per-queue statistics * @dev: network device * @stats: rtnl link statistics storage * - * Sums per-queue rx_qstats into rx_packets/rx_bytes for multi-queue mode. - * TX counters continue to come from netdev->stats (updated in start_xmit). + * Sums per-queue rx_qstats and tx_qstats into the rtnl counters. + * Callers use ndo_get_stats64(); avoid updating netdev->stats on the + * xmit/poll paths to keep per-queue counters off the hot cache line. */ static void ibmveth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
@@ -2779,9 +2867,14 @@ static void ibmveth_get_stats64(struct net_device *dev, } } - stats->tx_packets = dev->stats.tx_packets; - stats->tx_bytes = dev->stats.tx_bytes; - stats->tx_dropped = dev->stats.tx_dropped; + if (adapter->tx_qstats) { + for (i = 0; i < dev->real_num_tx_queues; i++) { + stats->tx_packets += adapter->tx_qstats[i].packets; + stats->tx_bytes += adapter->tx_qstats[i].bytes; + stats->tx_dropped += adapter->tx_qstats[i].dropped_packets; + } + } + stats->tx_errors = dev->stats.tx_errors; }
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index f7b20fd01acb..390c660af979 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h@@ -316,9 +316,21 @@ struct ibmveth_rx_queue_stats { u64 no_buffer_drops; }; +struct ibmveth_tx_queue_stats { + u64 packets; + u64 bytes; + u64 large_packets; + u64 dropped_packets; + u64 send_failures; + u64 checksum_offload; +}; + #define IBMVETH_NUM_RX_QSTATS \ (sizeof(struct ibmveth_rx_queue_stats) / sizeof(u64)) +#define IBMVETH_NUM_TX_QSTATS \ + (sizeof(struct ibmveth_tx_queue_stats) / sizeof(u64)) + struct ibmveth_buff_pool { u32 size; u32 index;
@@ -386,6 +398,7 @@ struct ibmveth_adapter { /* Multi-queue statistics */ struct ibmveth_hcall_stats hcall_stats; struct ibmveth_rx_queue_stats *rx_qstats; + struct ibmveth_tx_queue_stats *tx_qstats; /* Ethtool settings */ u8 duplex;
--
2.39.3 (Apple Git-146)