[bpf-next V3 PATCH 14/15] xdp: transition into using xdp_frame for return API
From: Jesper Dangaard Brouer <hidden>
Date: 2018-03-09 20:56:16
Subsystem:
bpf [general] (safe dynamic programs and tools), intel ethernet drivers, mellanox ethernet driver (mlx5e), mellanox mlx5 core vpi driver, networking drivers, networking [general], the rest, tun/tap driver, virtio net driver, xdp (express data path) · Maintainers:
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi, Tony Nguyen, Przemek Kitszel, Saeed Mahameed, Tariq Toukan, Mark Bloch, Leon Romanovsky, Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds, Willem de Bruijn, Jason Wang, "Michael S. Tsirkin", David S. Miller, Jesper Dangaard Brouer, John Fastabend
Changing API xdp_return_frame() to take struct xdp_frame as argument, seems like a natural choice. But there are some subtle performance details here that needs extra care, which is a deliberate choice. When de-referencing xdp_frame on a remote CPU during DMA-TX completion, result in the cache-line is change to "Shared" state. Later when the page is reused for RX, then this xdp_frame cache-line is written, which change the state to "Modified". This situation already happens (naturally) for, virtio_net, tun and cpumap as the xdp_frame pointer is the queued object. In tun and cpumap, the ptr_ring is used for efficiently transferring cache-lines (with pointers) between CPUs. Thus, the only option is to de-referencing xdp_frame. It is only the ixgbe driver that had an optimization, in which it can avoid doing the de-reference of xdp_frame. The driver already have TX-ring queue, which (in case of remote DMA-TX completion) have to be transferred between CPUs anyhow. In this data area, we stored a struct xdp_mem_info and a data pointer, which allowed us to avoid de-referencing xdp_frame. To compensate for this, a prefetchw is used for telling the cache coherency protocol about our access pattern. My benchmarks show that this prefetchw is enough to compensate the ixgbe driver. Signed-off-by: Jesper Dangaard Brouer <redacted> --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 +--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 17 +++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + drivers/net/tun.c | 4 ++-- drivers/net/virtio_net.c | 2 +- include/net/xdp.h | 2 +- kernel/bpf/cpumap.c | 6 +++--- net/core/xdp.c | 4 +++- 8 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index cbc20f199364..dfbc15a45cb4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h@@ -240,8 +240,7 @@ struct ixgbe_tx_buffer { unsigned long time_stamp; union { struct sk_buff *skb; - /* XDP uses address ptr on irq_clean */ - void *data; + struct xdp_frame *xdpf; }; unsigned int bytecount; unsigned short gso_segs;
@@ -249,7 +248,6 @@ struct ixgbe_tx_buffer { DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; - struct xdp_mem_info xdp_mem; }; struct ixgbe_rx_buffer {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ff069597fccf..e6e9b28ecfba 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c@@ -1207,7 +1207,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, /* free the skb */ if (ring_is_xdp(tx_ring)) - xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem); + xdp_return_frame(tx_buffer->xdpf); else napi_consume_skb(tx_buffer->skb, napi_budget);
@@ -2376,6 +2376,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, xdp.data_hard_start = xdp.data - ixgbe_rx_offset(rx_ring); xdp.data_end = xdp.data + size; + prefetchw(xdp.data_hard_start); /* xdp_frame write */ skb = ixgbe_run_xdp(adapter, rx_ring, &xdp); }
@@ -5787,7 +5788,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring) /* Free all the Tx ring sk_buffs */ if (ring_is_xdp(tx_ring)) - xdp_return_frame(tx_buffer->data, &tx_buffer->xdp_mem); + xdp_return_frame(tx_buffer->xdpf); else dev_kfree_skb_any(tx_buffer->skb);
@@ -8333,16 +8334,21 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; + struct xdp_frame *xdpf; u32 len, cmd_type; dma_addr_t dma; u16 i; - len = xdp->data_end - xdp->data; + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + return -EOVERFLOW; + + len = xdpf->len; if (unlikely(!ixgbe_desc_unused(ring))) return IXGBE_XDP_CONSUMED; - dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); + dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE); if (dma_mapping_error(ring->dev, dma)) return IXGBE_XDP_CONSUMED;
@@ -8357,8 +8363,7 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, dma_unmap_len_set(tx_buffer, len, len); dma_unmap_addr_set(tx_buffer, dma, dma); - tx_buffer->data = xdp->data; - tx_buffer->xdp_mem = xdp->rxq->mem; + tx_buffer->xdpf = xdpf; tx_desc->read.buffer_addr = cpu_to_le64(dma);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 2ac78b88fc3d..00b9b13d9fea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c@@ -896,6 +896,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, di->addr + wi->offset, 0, frag_size, DMA_FROM_DEVICE); + prefetchw(va); /* xdp_frame data area */ prefetch(data); wi->offset += frag_size;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8bb61bd11c6f..e57397d4aec2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c@@ -663,7 +663,7 @@ static void tun_ptr_free(void *ptr) if (tun_is_xdp_frame(ptr)) { struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); - xdp_return_frame(xdpf->data, &xdpf->mem); + xdp_return_frame(xdpf); } else { __skb_array_destroy_skb(ptr); }
@@ -2185,7 +2185,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); ret = tun_put_user_xdp(tun, tfile, xdpf, to); - xdp_return_frame(xdpf->data, &xdpf->mem); + xdp_return_frame(xdpf); } else { struct sk_buff *skb = ptr;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6c4220450506..9fdb70fe74fe 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c@@ -430,7 +430,7 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, /* Free up any pending old buffers before queueing new ones. */ while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent->data, &xdpf_sent->mem); + xdp_return_frame(xdpf_sent); xdpf = convert_to_xdp_frame(xdp); if (unlikely(!xdpf))
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 98b55eaf8fd7..35aa9825fdd0 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h@@ -103,7 +103,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) return xdp_frame; } -void xdp_return_frame(void *data, struct xdp_mem_info *mem); +void xdp_return_frame(struct xdp_frame *xdpf); int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index bcdc4dea5ce7..c95b04ec103e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c@@ -219,7 +219,7 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring) while ((xdpf = ptr_ring_consume(ring))) if (WARN_ON_ONCE(xdpf)) - xdp_return_frame(xdpf->data, &xdpf->mem); + xdp_return_frame(xdpf); } static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -275,7 +275,7 @@ static int cpu_map_kthread_run(void *data) skb = cpu_map_build_skb(rcpu, xdpf); if (!skb) { - xdp_return_frame(xdpf->data, &xdpf->mem); + xdp_return_frame(xdpf); continue; }
@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, err = __ptr_ring_produce(q, xdpf); if (err) { drops++; - xdp_return_frame(xdpf->data, &xdpf->mem); + xdp_return_frame(xdpf); } processed++; }
diff --git a/net/core/xdp.c b/net/core/xdp.c
index fe8e87abc266..6ed3d73a73be 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c@@ -294,9 +294,11 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, } EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); -void xdp_return_frame(void *data, struct xdp_mem_info *mem) +void xdp_return_frame(struct xdp_frame *xdpf) { struct xdp_mem_allocator *xa = NULL; + struct xdp_mem_info *mem = &xdpf->mem; + void *data = xdpf->data; rcu_read_lock(); if (mem->id)