[net-next V8 PATCH 16/16] xdp: transition into using xdp_frame for ndo_xdp_xmit
From: Jesper Dangaard Brouer <hidden>
Date: 2018-03-31 12:07:09
Subsystem:
bpf [general] (safe dynamic programs and tools), bpf [networking] (tcx & tc bpf, sock_addr), intel ethernet drivers, networking drivers, networking [general], the rest, tun/tap driver, virtio net driver · Maintainers:
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi, Martin KaFai Lau, Tony Nguyen, Przemek Kitszel, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds, Willem de Bruijn, Jason Wang, "Michael S. Tsirkin"
Changing API ndo_xdp_xmit to take a struct xdp_frame instead of struct
xdp_buff. This brings xdp_return_frame and ndp_xdp_xmit in sync.
This builds towards changing the API further to become a bulk API,
because xdp_buff is not a queue-able object while xdp_frame is.
V4: Adjust for commit 59655a5b6c83 ("tuntap: XDP_TX can use native XDP")
V7: Adjust for commit d9314c474d4f ("i40e: add support for XDP_REDIRECT")
Signed-off-by: Jesper Dangaard Brouer <redacted>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 30 ++++++++++++++-----------
drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 21 +++++++++---------
drivers/net/tun.c | 19 ++++++++++------
drivers/net/virtio_net.c | 24 ++++++++++++--------
include/linux/netdevice.h | 4 ++-
net/core/filter.c | 17 +++++++++++++-
7 files changed, 72 insertions(+), 45 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index c8bf4d35fdea..87fb27ab9c24 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c@@ -2203,9 +2203,20 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, #define I40E_XDP_CONSUMED 1 #define I40E_XDP_TX 2 -static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, +static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, struct i40e_ring *xdp_ring); +static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, + struct i40e_ring *xdp_ring) +{ + struct xdp_frame *xdpf = convert_to_xdp_frame(xdp); + + if (unlikely(!xdpf)) + return I40E_XDP_CONSUMED; + + return i40e_xmit_xdp_ring(xdpf, xdp_ring); +} + /** * i40e_run_xdp - run an XDP program * @rx_ring: Rx ring being processed
@@ -2233,7 +2244,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, break; case XDP_TX: xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; - result = i40e_xmit_xdp_ring(xdp, xdp_ring); + result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); break; case XDP_REDIRECT: err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
@@ -3480,21 +3491,14 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * @xdp: data to transmit * @xdp_ring: XDP Tx ring **/ -static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, +static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, struct i40e_ring *xdp_ring) { u16 i = xdp_ring->next_to_use; struct i40e_tx_buffer *tx_bi; struct i40e_tx_desc *tx_desc; - struct xdp_frame *xdpf; + u32 size = xdpf->len; dma_addr_t dma; - u32 size; - - xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpf)) - return I40E_XDP_CONSUMED; - - size = xdpf->len; if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { xdp_ring->tx_stats.tx_busy++;
@@ -3684,7 +3688,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * * Returns Zero if sent, else an error code **/ -int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) { struct i40e_netdev_priv *np = netdev_priv(dev); unsigned int queue_index = smp_processor_id();
@@ -3697,7 +3701,7 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) return -ENXIO; - err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]); + err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); if (err != I40E_XDP_TX) return -ENOSPC;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 857b1d743c8d..4bf318b8be85 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h@@ -511,7 +511,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); void i40e_detect_recover_hung(struct i40e_vsi *vsi); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); -int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp); +int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf); void i40e_xdp_flush(struct net_device *dev); /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4f2864165723..0daccaf72a30 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c@@ -2262,7 +2262,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, #define IXGBE_XDP_TX 2 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, - struct xdp_buff *xdp); + struct xdp_frame *xdpf); static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring,
@@ -2270,6 +2270,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, { int err, result = IXGBE_XDP_PASS; struct bpf_prog *xdp_prog; + struct xdp_frame *xdpf; u32 act; rcu_read_lock();
@@ -2283,7 +2284,12 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, case XDP_PASS: break; case XDP_TX: - result = ixgbe_xmit_xdp_ring(adapter, xdp); + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) { + result = IXGBE_XDP_CONSUMED; + break; + } + result = ixgbe_xmit_xdp_ring(adapter, xdpf); break; case XDP_REDIRECT: err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
@@ -8344,20 +8350,15 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, } static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, - struct xdp_buff *xdp) + struct xdp_frame *xdpf) { struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - struct xdp_frame *xdpf; u32 len, cmd_type; dma_addr_t dma; u16 i; - xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - len = xdpf->len; if (unlikely(!ixgbe_desc_unused(ring)))
@@ -10010,7 +10011,7 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ring *ring;
@@ -10026,7 +10027,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) if (unlikely(!ring)) return -ENXIO; - err = ixgbe_xmit_xdp_ring(adapter, xdp); + err = ixgbe_xmit_xdp_ring(adapter, xdpf); if (err != IXGBE_XDP_TX) return -ENOSPC;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a6a74e896430..46ac5dd79fa3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c@@ -1294,18 +1294,13 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_get_stats64 = tun_net_get_stats64, }; -static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame) { struct tun_struct *tun = netdev_priv(dev); - struct xdp_frame *frame; struct tun_file *tfile; u32 numqueues; int ret = 0; - frame = convert_to_xdp_frame(xdp); - if (unlikely(!frame)) - return -EOVERFLOW; - rcu_read_lock(); numqueues = READ_ONCE(tun->numqueues);
@@ -1329,6 +1324,16 @@ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) return ret; } +static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) +{ + struct xdp_frame *frame = convert_to_xdp_frame(xdp); + + if (unlikely(!frame)) + return -EOVERFLOW; + + return tun_xdp_xmit(dev, frame); +} + static void tun_xdp_flush(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev);
@@ -1676,7 +1681,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, case XDP_TX: get_page(alloc_frag->page); alloc_frag->offset += buflen; - if (tun_xdp_xmit(tun->dev, &xdp)) + if (tun_xdp_tx(tun->dev, &xdp)) goto err_redirect; tun_xdp_flush(tun->dev); rcu_read_unlock();
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 026443ae945f..8307a85d634d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c@@ -416,10 +416,10 @@ static void virtnet_xdp_flush(struct net_device *dev) } static bool __virtnet_xdp_xmit(struct virtnet_info *vi, - struct xdp_buff *xdp) + struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; - struct xdp_frame *xdpf, *xdpf_sent; + struct xdp_frame *xdpf_sent; struct send_queue *sq; unsigned int len; unsigned int qp;
@@ -432,10 +432,6 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) xdp_return_frame(xdpf_sent); - xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpf)) - return -EOVERFLOW; - /* virtqueue want to use data area in-front of packet */ if (unlikely(xdpf->metasize > 0)) return -EOPNOTSUPP;
@@ -459,7 +455,7 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, return true; } -static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq;
@@ -473,7 +469,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) if (!xdp_prog) return -ENXIO; - sent = __virtnet_xdp_xmit(vi, xdp); + sent = __virtnet_xdp_xmit(vi, xdpf); if (!sent) return -ENOSPC; return 0;
@@ -574,6 +570,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset; + struct xdp_frame *xdpf; struct xdp_buff xdp; void *orig_data; u32 act;
@@ -616,7 +613,10 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - sent = __virtnet_xdp_xmit(vi, &xdp); + xdpf = convert_to_xdp_frame(&xdp); + if (unlikely(!xdpf)) + goto err_xdp; + sent = __virtnet_xdp_xmit(vi, xdpf); if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp;
@@ -708,6 +708,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { + struct xdp_frame *xdpf; struct page *xdp_page; struct xdp_buff xdp; void *data;
@@ -772,7 +773,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - sent = __virtnet_xdp_xmit(vi, &xdp); + xdpf = convert_to_xdp_frame(&xdp); + if (unlikely(!xdpf)) + goto err_xdp; + sent = __virtnet_xdp_xmit(vi, xdpf); if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cf44503ea81a..14e0777ffcfb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h@@ -1165,7 +1165,7 @@ struct dev_ifalias { * This function is used to set or query state related to XDP on the * netdevice and manage BPF offload. See definition of * enum bpf_netdev_command for details. - * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp); * This function is used to submit a XDP packet for transmit on a * netdevice. * void (*ndo_xdp_flush)(struct net_device *dev);
@@ -1356,7 +1356,7 @@ struct net_device_ops { int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); int (*ndo_xdp_xmit)(struct net_device *dev, - struct xdp_buff *xdp); + struct xdp_frame *xdp); void (*ndo_xdp_flush)(struct net_device *dev); };
diff --git a/net/core/filter.c b/net/core/filter.c
index 00c711c5f1a2..2f5e185cc82b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c@@ -2748,13 +2748,18 @@ static int __bpf_tx_xdp(struct net_device *dev, struct xdp_buff *xdp, u32 index) { + struct xdp_frame *xdpf; int err; if (!dev->netdev_ops->ndo_xdp_xmit) { return -EOPNOTSUPP; } - err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); if (err) return err; dev->netdev_ops->ndo_xdp_flush(dev);
@@ -2770,11 +2775,19 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, if (map->map_type == BPF_MAP_TYPE_DEVMAP) { struct net_device *dev = fwd; + struct xdp_frame *xdpf; if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; - err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + + /* TODO: move to inside map code instead, for bulk support + * err = dev_map_enqueue(dev, xdp); + */ + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); if (err) return err; __dev_map_insert_ctx(map, index);