Thread (15 messages) 15 messages, 8 authors, 2024-11-04

Re: [resend PATCH 2/2] dim: pass dim_sample to net_dim() by reference

From: Caleb Sander <hidden>
Date: 2024-10-31 17:20:07
Also in: intel-wired-lan, linux-doc, linux-mediatek, linux-rdma, lkml, netdev, virtualization

On Thu, Oct 31, 2024 at 5:49 AM Louis Peens [off-list ref] wrote:
quoted hunk ↗ jump to hunk
On Wed, Oct 30, 2024 at 06:23:26PM -0600, Caleb Sander Mateos wrote:
quoted
net_dim() is currently passed a struct dim_sample argument by value.
struct dim_sample is 24 bytes. Since this is greater 16 bytes, x86-64
passes it on the stack. All callers have already initialized dim_sample
on the stack, so passing it by value requires pushing a duplicated copy
to the stack. Either witing to the stack and immediately reading it, or
perhaps dereferencing addresses relative to the stack pointer in a chain
of push instructions, seems to perform quite poorly.

In a heavy TCP workload, mlx5e_handle_rx_dim() consumes 3% of CPU time,
94% of which is attributed to the first push instruction to copy
dim_sample on the stack for the call to net_dim():
// Call ktime_get()
  0.26 |4ead2:   call   4ead7 <mlx5e_handle_rx_dim+0x47>
// Pass the address of struct dim in %rdi
       |4ead7:   lea    0x3d0(%rbx),%rdi
// Set dim_sample.pkt_ctr
       |4eade:   mov    %r13d,0x8(%rsp)
// Set dim_sample.byte_ctr
       |4eae3:   mov    %r12d,0xc(%rsp)
// Set dim_sample.event_ctr
  0.15 |4eae8:   mov    %bp,0x10(%rsp)
// Duplicate dim_sample on the stack
 94.16 |4eaed:   push   0x10(%rsp)
  2.79 |4eaf1:   push   0x10(%rsp)
  0.07 |4eaf5:   push   %rax
// Call net_dim()
  0.21 |4eaf6:   call   4eafb <mlx5e_handle_rx_dim+0x6b>

To allow the caller to reuse the struct dim_sample already on the stack,
pass the struct dim_sample by reference to net_dim().

Signed-off-by: Caleb Sander Mateos <redacted>
---
 Documentation/networking/net_dim.rst                   |  2 +-
 drivers/net/ethernet/amazon/ena/ena_netdev.c           |  2 +-
 drivers/net/ethernet/broadcom/bcmsysport.c             |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c              |  4 ++--
 drivers/net/ethernet/broadcom/genet/bcmgenet.c         |  2 +-
 drivers/net/ethernet/freescale/enetc/enetc.c           |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c        |  4 ++--
 drivers/net/ethernet/intel/ice/ice_txrx.c              |  4 ++--
 drivers/net/ethernet/intel/idpf/idpf_txrx.c            |  4 ++--
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c |  2 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.c            |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c      |  4 ++--
 drivers/net/ethernet/netronome/nfp/nfd3/dp.c           |  4 ++--
 drivers/net/ethernet/netronome/nfp/nfdk/dp.c           |  4 ++--
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c       |  2 +-
 drivers/net/virtio_net.c                               |  2 +-
 drivers/soc/fsl/dpio/dpio-service.c                    |  2 +-
 include/linux/dim.h                                    |  2 +-
 lib/dim/net_dim.c                                      | 10 +++++-----
 19 files changed, 31 insertions(+), 31 deletions(-)
--- snip --
quoted
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
index d215efc6cad0..f1c6c47564b1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
@@ -1177,11 +1177,11 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
                      pkts = r_vec->rx_pkts;
                      bytes = r_vec->rx_bytes;
              } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));

              dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-             net_dim(&r_vec->rx_dim, dim_sample);
+             net_dim(&r_vec->rx_dim, &dim_sample);
      }

      if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
              struct dim_sample dim_sample = {};
              unsigned int start;
@@ -1192,11 +1192,11 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
                      pkts = r_vec->tx_pkts;
                      bytes = r_vec->tx_bytes;
              } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));

              dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-             net_dim(&r_vec->tx_dim, dim_sample);
+             net_dim(&r_vec->tx_dim, &dim_sample);
      }

      return pkts_polled;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index dae5af7d1845..ebeb6ab4465c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -1287,11 +1287,11 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
                      pkts = r_vec->rx_pkts;
                      bytes = r_vec->rx_bytes;
              } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));

              dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-             net_dim(&r_vec->rx_dim, dim_sample);
+             net_dim(&r_vec->rx_dim, &dim_sample);
      }

      if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
              struct dim_sample dim_sample = {};
              unsigned int start;
@@ -1302,11 +1302,11 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
                      pkts = r_vec->tx_pkts;
                      bytes = r_vec->tx_bytes;
              } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));

              dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-             net_dim(&r_vec->tx_dim, dim_sample);
+             net_dim(&r_vec->tx_dim, &dim_sample);
      }

      return pkts_polled;
 }
--- snip ---
Hi Caleb. Looks like a fair enough update to me in general, but I am not an
expert on 'dim'. For the corresponding nfp driver changes feel free to add:

Signed-off-by: Louis Peens <redacted>
Hi Louis,
Thanks for the review. Did you mean "Reviewed-by"? If there was a
change you were suggesting, I missed it.

Best,
Caleb
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help