--- v11
+++ v9
@@ -1,180 +1,141 @@
-Introduce the capability to map non-linear xdp buffer running
-mvneta_xdp_submit_frame() for XDP_TX and XDP_REDIRECT
+From: Eelco Chaudron <echaudro@redhat.com>
+This change adds support for tail growing and shrinking for XDP multi-buff.
+
+Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
- drivers/net/ethernet/marvell/mvneta.c | 112 +++++++++++++++++---------
- 1 file changed, 76 insertions(+), 36 deletions(-)
+ include/net/xdp.h | 7 ++++++
+ net/core/filter.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++
+ net/core/xdp.c | 5 ++--
+ 3 files changed, 72 insertions(+), 2 deletions(-)
-diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
-index b996eb49d813..dde1c28b0ea8 100644
---- a/drivers/net/ethernet/marvell/mvneta.c
-+++ b/drivers/net/ethernet/marvell/mvneta.c
-@@ -1856,8 +1856,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
- bytes_compl += buf->skb->len;
- pkts_compl++;
- dev_kfree_skb_any(buf->skb);
-- } else if (buf->type == MVNETA_TYPE_XDP_TX ||
-- buf->type == MVNETA_TYPE_XDP_NDO) {
-+ } else if ((buf->type == MVNETA_TYPE_XDP_TX ||
-+ buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) {
- if (napi && buf->type == MVNETA_TYPE_XDP_TX)
- xdp_return_frame_rx_napi(buf->xdpf);
- else
-@@ -2051,47 +2051,87 @@ mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+diff --git a/include/net/xdp.h b/include/net/xdp.h
+index 935a6f83115f..3525801c6ed5 100644
+--- a/include/net/xdp.h
++++ b/include/net/xdp.h
+@@ -132,6 +132,11 @@ xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
+ return (struct skb_shared_info *)xdp_data_hard_end(xdp);
+ }
- static int
- mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
-- struct xdp_frame *xdpf, bool dma_map)
-+ struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map)
- {
-- struct mvneta_tx_desc *tx_desc;
-- struct mvneta_tx_buf *buf;
-- dma_addr_t dma_addr;
-+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
-+ struct device *dev = pp->dev->dev.parent;
-+ struct mvneta_tx_desc *tx_desc = NULL;
-+ int i, num_frames = 1;
-+ struct page *page;
++static inline unsigned int xdp_get_frag_tailroom(const skb_frag_t *frag)
++{
++ return PAGE_SIZE - skb_frag_size(frag) - skb_frag_off(frag);
++}
+
-+ if (unlikely(xdp_frame_is_mb(xdpf)))
-+ num_frames += sinfo->nr_frags;
+ struct xdp_frame {
+ void *data;
+ u16 len;
+@@ -259,6 +264,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
+ return xdp_frame;
+ }
-- if (txq->count >= txq->tx_stop_threshold)
-+ if (txq->count + num_frames >= txq->size)
- return MVNETA_XDP_DROPPED;
++void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
++ struct xdp_buff *xdp);
+ void xdp_return_frame(struct xdp_frame *xdpf);
+ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
+ void xdp_return_buff(struct xdp_buff *xdp);
+diff --git a/net/core/filter.c b/net/core/filter.c
+index caa88955562e..05f574a3d690 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3859,11 +3859,73 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
+ .arg2_type = ARG_ANYTHING,
+ };
-- tx_desc = mvneta_txq_next_desc_get(txq);
-+ for (i = 0; i < num_frames; i++) {
-+ struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index];
-+ skb_frag_t *frag = NULL;
-+ int len = xdpf->len;
-+ dma_addr_t dma_addr;
-
-- buf = &txq->buf[txq->txq_put_index];
-- if (dma_map) {
-- /* ndo_xdp_xmit */
-- dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data,
-- xdpf->len, DMA_TO_DEVICE);
-- if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) {
-- mvneta_txq_desc_put(txq);
-- return MVNETA_XDP_DROPPED;
-+ if (unlikely(i)) { /* paged area */
-+ frag = &sinfo->frags[i - 1];
-+ len = skb_frag_size(frag);
- }
-- buf->type = MVNETA_TYPE_XDP_NDO;
-- } else {
-- struct page *page = virt_to_page(xdpf->data);
-
-- dma_addr = page_pool_get_dma_addr(page) +
-- sizeof(*xdpf) + xdpf->headroom;
-- dma_sync_single_for_device(pp->dev->dev.parent, dma_addr,
-- xdpf->len, DMA_BIDIRECTIONAL);
-- buf->type = MVNETA_TYPE_XDP_TX;
-+ tx_desc = mvneta_txq_next_desc_get(txq);
-+ if (dma_map) {
-+ /* ndo_xdp_xmit */
-+ void *data;
++static int bpf_xdp_mb_adjust_tail(struct xdp_buff *xdp, int offset)
++{
++ struct skb_shared_info *sinfo;
+
-+ data = unlikely(frag) ? skb_frag_address(frag)
-+ : xdpf->data;
-+ dma_addr = dma_map_single(dev, data, len,
-+ DMA_TO_DEVICE);
-+ if (dma_mapping_error(dev, dma_addr)) {
-+ mvneta_txq_desc_put(txq);
-+ goto unmap;
++ if (unlikely(!xdp_buff_is_mb(xdp)))
++ return -EINVAL;
++
++ sinfo = xdp_get_shared_info_from_buff(xdp);
++ if (offset >= 0) {
++ skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
++ int size;
++
++ if (unlikely(offset > xdp_get_frag_tailroom(frag)))
++ return -EINVAL;
++
++ size = skb_frag_size(frag);
++ memset(skb_frag_address(frag) + size, 0, offset);
++ skb_frag_size_set(frag, size + offset);
++ sinfo->data_len += offset;
++ } else {
++ int i, n_frags_free = 0, len_free = 0;
++
++ offset = abs(offset);
++ if (unlikely(offset > ((int)(xdp->data_end - xdp->data) +
++ sinfo->data_len - ETH_HLEN)))
++ return -EINVAL;
++
++ for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
++ skb_frag_t *frag = &sinfo->frags[i];
++ int size = skb_frag_size(frag);
++ int shrink = min_t(int, offset, size);
++
++ len_free += shrink;
++ offset -= shrink;
++
++ if (unlikely(size == shrink)) {
++ struct page *page = skb_frag_page(frag);
++
++ __xdp_return(page_address(page), &xdp->rxq->mem,
++ false, NULL);
++ n_frags_free++;
++ } else {
++ skb_frag_size_set(frag, size - shrink);
++ break;
+ }
++ }
++ sinfo->nr_frags -= n_frags_free;
++ sinfo->data_len -= len_free;
+
-+ buf->type = MVNETA_TYPE_XDP_NDO;
-+ } else {
-+ page = unlikely(frag) ? skb_frag_page(frag)
-+ : virt_to_page(xdpf->data);
-+ dma_addr = page_pool_get_dma_addr(page);
-+ if (unlikely(frag))
-+ dma_addr += skb_frag_off(frag);
-+ else
-+ dma_addr += sizeof(*xdpf) + xdpf->headroom;
-+ dma_sync_single_for_device(dev, dma_addr, len,
-+ DMA_BIDIRECTIONAL);
-+ buf->type = MVNETA_TYPE_XDP_TX;
-+ }
-+ buf->xdpf = unlikely(i) ? NULL : xdpf;
++ if (unlikely(!sinfo->nr_frags))
++ xdp_buff_clear_mb(xdp);
+
-+ tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC;
-+ tx_desc->buf_phys_addr = dma_addr;
-+ tx_desc->data_size = len;
-+ *nxmit_byte += len;
-+
-+ mvneta_txq_inc_put(txq);
- }
-- buf->xdpf = xdpf;
-
-- tx_desc->command = MVNETA_TXD_FLZ_DESC;
-- tx_desc->buf_phys_addr = dma_addr;
-- tx_desc->data_size = xdpf->len;
-+ /*last descriptor */
-+ if (likely(tx_desc))
-+ tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD;
-
-- mvneta_txq_inc_put(txq);
-- txq->pending++;
-- txq->count++;
-+ txq->pending += num_frames;
-+ txq->count += num_frames;
-
- return MVNETA_XDP_TX;
-+
-+unmap:
-+ for (i--; i >= 0; i--) {
-+ mvneta_txq_desc_put(txq);
-+ tx_desc = txq->descs + txq->next_desc_to_proc;
-+ dma_unmap_single(dev, tx_desc->buf_phys_addr,
-+ tx_desc->data_size,
-+ DMA_TO_DEVICE);
++ if (unlikely(offset > 0))
++ xdp->data_end -= offset;
+ }
+
-+ return MVNETA_XDP_DROPPED;
++ return 0;
++}
++
+ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
+ {
+ void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
+ void *data_end = xdp->data_end + offset;
+
++ if (unlikely(xdp_buff_is_mb(xdp)))
++ return bpf_xdp_mb_adjust_tail(xdp, offset);
++
+ /* Notice that xdp_data_hard_end have reserved some tailroom */
+ if (unlikely(data_end > data_hard_end))
+ return -EINVAL;
+diff --git a/net/core/xdp.c b/net/core/xdp.c
+index 71bedf6049a1..ffd70d3e9e5d 100644
+--- a/net/core/xdp.c
++++ b/net/core/xdp.c
+@@ -338,8 +338,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
+ * is used for those calls sites. Thus, allowing for faster recycling
+ * of xdp_frames/pages in those cases.
+ */
+-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+- struct xdp_buff *xdp)
++void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
++ struct xdp_buff *xdp)
+ {
+ struct xdp_mem_allocator *xa;
+ struct page *page;
+@@ -372,6 +372,7 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+ break;
+ }
}
++EXPORT_SYMBOL_GPL(__xdp_return);
- static int
-@@ -2100,8 +2140,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
- struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
- struct mvneta_tx_queue *txq;
- struct netdev_queue *nq;
-+ int cpu, nxmit_byte = 0;
- struct xdp_frame *xdpf;
-- int cpu;
- u32 ret;
-
- xdpf = xdp_convert_buff_to_frame(xdp);
-@@ -2113,10 +2153,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
- nq = netdev_get_tx_queue(pp->dev, txq->id);
-
- __netif_tx_lock(nq, cpu);
-- ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false);
-+ ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false);
- if (ret == MVNETA_XDP_TX) {
- u64_stats_update_begin(&stats->syncp);
-- stats->es.ps.tx_bytes += xdpf->len;
-+ stats->es.ps.tx_bytes += nxmit_byte;
- stats->es.ps.tx_packets++;
- stats->es.ps.xdp_tx++;
- u64_stats_update_end(&stats->syncp);
-@@ -2155,11 +2195,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
-
- __netif_tx_lock(nq, cpu);
- for (i = 0; i < num_frame; i++) {
-- ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
-+ ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte,
-+ true);
- if (ret != MVNETA_XDP_TX)
- break;
-
-- nxmit_byte += frames[i]->len;
- nxmit++;
- }
-
+ void xdp_return_frame(struct xdp_frame *xdpf)
+ {
--
2.31.1