[PATCH net-next 17/24] net: amazon, aquanti, broadcom, cavium, engleder: Use nested-BH locking for XDP redirect.
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: 2023-12-15 17:10:41
Also in:
lkml
Subsystem:
amazon ethernet drivers, aquantia ethernet driver (atlantic), arm/cavium thunder network driver, broadcom bnxt_en 50 gigabit ethernet driver, networking drivers, the rest, xdp (express data path) · Maintainers:
Arthur Kiyanovski, David Arinzon, Sukhdeep Singh, Sunil Goutham, Michael Chan, Pavan Chebbi, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds, Alexei Starovoitov, Daniel Borkmann, David S. Miller, Jesper Dangaard Brouer, John Fastabend
The per-CPU variables used during bpf_prog_run_xdp() invocation and later during xdp_do_redirect() rely on disabled BH for their protection. Without locking in local_bh_disable() on PREEMPT_RT these data structure require explicit locking. This is a follow-up on the previous change which introduced bpf_run_lock.redirect_lock and uses it now within drivers. The simple way is to acquire the lock before bpf_prog_run_xdp() is invoked and hold it until the end of function. This does not always work because some drivers (cpsw, atlantic) invoke xdp_do_flush() in the same context. Acquiring the lock in bpf_prog_run_xdp() and dropping in xdp_do_redirect() (without touching drivers) does not work because not all driver, which use bpf_prog_run_xdp(), do support XDP_REDIRECT (and invoke xdp_do_redirect()). Ideally the minimal locking scope would be bpf_prog_run_xdp() + xdp_do_redirect() and everything else (error recovery, DMA unmapping, free/ alloc of memory, …) would happen outside of the locked section. Cc: Alexei Starovoitov <ast@kernel.org> Cc: Arthur Kiyanovski <akiyano@amazon.com> Cc: David Arinzon <darinzon@amazon.com> Cc: Igor Russkikh <redacted> Cc: Jesper Dangaard Brouer <hawk@kernel.org> Cc: John Fastabend <john.fastabend@gmail.com> Cc: Michael Chan <michael.chan@broadcom.com> Cc: Noam Dagan <redacted> Cc: Saeed Bishara <redacted> Cc: Shay Agroskin <redacted> Cc: Sunil Goutham <sgoutham@marvell.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + .../net/ethernet/aquantia/atlantic/aq_ring.c | 26 ++++++++++++------- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 1 + .../net/ethernet/cavium/thunder/nicvf_main.c | 3 ++- drivers/net/ethernet/engleder/tsnep_main.c | 17 +++++++----- 5 files changed, 31 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index b5bca48148309..cf075bc5e2b13 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c@@ -385,6 +385,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) if (!xdp_prog) goto out; + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); verdict = bpf_prog_run_xdp(xdp_prog, xdp); switch (verdict) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 694daeaf3e615..5d33d478d5109 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c@@ -458,7 +458,24 @@ static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic, if (xdp_buff_has_frags(xdp) && !prog->aux->xdp_has_frags) goto out_aborted; + local_lock_nested_bh(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(prog, xdp); + if (act == XDP_REDIRECT) { + if (xdp_do_redirect(aq_nic->ndev, xdp, prog) < 0) { + local_unlock_nested_bh(&bpf_run_lock.redirect_lock); + goto out_aborted; + } + local_unlock_nested_bh(&bpf_run_lock.redirect_lock); + + xdp_do_flush(); + u64_stats_update_begin(&rx_ring->stats.rx.syncp); + ++rx_ring->stats.rx.xdp_redirect; + u64_stats_update_end(&rx_ring->stats.rx.syncp); + aq_get_rxpages_xdp(buff, xdp); + } else { + local_unlock_nested_bh(&bpf_run_lock.redirect_lock); + } + switch (act) { case XDP_PASS: skb = aq_xdp_build_skb(xdp, aq_nic->ndev, buff);
@@ -481,15 +498,6 @@ static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic, u64_stats_update_end(&rx_ring->stats.rx.syncp); aq_get_rxpages_xdp(buff, xdp); break; - case XDP_REDIRECT: - if (xdp_do_redirect(aq_nic->ndev, xdp, prog) < 0) - goto out_aborted; - xdp_do_flush(); - u64_stats_update_begin(&rx_ring->stats.rx.syncp); - ++rx_ring->stats.rx.xdp_redirect; - u64_stats_update_end(&rx_ring->stats.rx.syncp); - aq_get_rxpages_xdp(buff, xdp); - break; default: fallthrough; case XDP_ABORTED:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 96f5ca778c67d..c4d989da7fade 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c@@ -253,6 +253,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */ orig_data = xdp.data; + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(xdp_prog, &xdp); tx_avail = bnxt_tx_avail(bp, txr);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index eff350e0bc2a8..8e1406101f71b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c@@ -554,7 +554,8 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, xdp_prepare_buff(&xdp, hard_start, data - hard_start, len, false); orig_data = xdp.data; - action = bpf_prog_run_xdp(prog, &xdp); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) + action = bpf_prog_run_xdp(prog, &xdp); len = xdp.data_end - xdp.data; /* Check if XDP program has changed headers */
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index df40c720e7b23..acda3502d274f 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c@@ -1268,6 +1268,7 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM; + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_PASS:
@@ -1309,14 +1310,16 @@ static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog, { u32 act; - act = bpf_prog_run_xdp(prog, xdp); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + act = bpf_prog_run_xdp(prog, xdp); - /* XDP_REDIRECT is the main action for zero-copy */ - if (likely(act == XDP_REDIRECT)) { - if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0) - goto out_failure; - *status |= TSNEP_XDP_REDIRECT; - return true; + /* XDP_REDIRECT is the main action for zero-copy */ + if (likely(act == XDP_REDIRECT)) { + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0) + goto out_failure; + *status |= TSNEP_XDP_REDIRECT; + return true; + } } switch (act) {
--
2.43.0