Re: [EXT] Re: [PATCH 1/1] net: fec: add initial XDP support
From: Jesper Dangaard Brouer <hidden>
Date: 2022-09-29 15:46:16
Also in:
imx, lkml
On 29/09/2022 15.11, Shenwei Wang wrote:
quoted
From: Jesper Dangaard Brouer <redacted>
>>
quoted
quoted
diff --git a/drivers/net/ethernet/freescale/fec.hb/drivers/net/ethernet/freescale/fec.h index b0100fe3c9e4..f7531503aa95 100644--- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h@@ -346,8 +346,10 @@ struct bufdesc_ex { * the skbuffer directly. */ +#define FEC_ENET_XDP_HEADROOM (512) /* XDP_PACKET_HEADROOM+ NET_IP_ALIGN) */ Why the large headroom?The accurate value here should be "XDP_PACKET_HEADROOM (256) + NET_IP_ALIGN" which then aligns with 64 bytes. So 256 + 64 should be enough here.
Most other XDP drivers have 256 bytes headroom. I don't understand why you just don't keep this at 256, like other drivers ?
quoted
quoted
+ #define FEC_ENET_RX_PAGES 256 -#define FEC_ENET_RX_FRSIZE 2048 +#define FEC_ENET_RX_FRSIZE (PAGE_SIZE - FEC_ENET_XDP_HEADROOM)This FEC_ENET_RX_FRSIZE is likely wrong, because you also need to reserve 320 bytes at the end for struct skb_shared_info. (320 calculated as SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))quoted
#define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE) #define RX_RING_SIZE (FEC_ENET_RX_FRPPG *FEC_ENET_RX_PAGES)quoted
#define FEC_ENET_TX_FRSIZE 2048@@ -517,6 +519,22 @@ struct bufdesc_prop {[...]quoted
diff --git a/drivers/net/ethernet/freescale/fec_main.cb/drivers/net/ethernet/freescale/fec_main.c index 59921218a8a4..2e30182ed770 100644--- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c@@ -66,6 +66,8 @@ #include <linux/mfd/syscon.h> #include <linux/regmap.h> #include <soc/imx/cpuidle.h> +#include <linux/filter.h> +#include <linux/bpf.h> #include <asm/cacheflush.h>@@ -87,6 +89,11 @@ static const u16 fec_enet_vlan_pri_to_queue[8] = {0, 0,1, 1, 1, 2, 2, 2};quoted
#define FEC_ENET_OPD_V 0xFFF0 #define FEC_MDIO_PM_TIMEOUT 100 /* ms */ +#define FEC_ENET_XDP_PASS 0 +#define FEC_ENET_XDP_CONSUMED BIT(0) +#define FEC_ENET_XDP_TX BIT(1) +#define FEC_ENET_XDP_REDIR BIT(2) + struct fec_devinfo { u32 quirks; };@@ -422,6 +429,49 @@ fec_enet_clear_csum(struct sk_buff *skb, structnet_device *ndev)quoted
return 0; } +static int +fec_enet_create_page_pool(struct fec_enet_private *fep, + struct fec_enet_priv_rx_q *rxq, int size) { + struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); + struct page_pool_params pp_params = { + .order = 0, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .pool_size = size, + .nid = dev_to_node(&fep->pdev->dev), + .dev = &fep->pdev->dev, + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .offset = FEC_ENET_XDP_HEADROOM, + .max_len = FEC_ENET_RX_FRSIZE,XDP BPF-prog cannot access last 320 bytes, so FEC_ENET_RX_FRSIZE is wrong here.So the FEC_ENET_RX_FRSIZE should subtract the sizeof(struct skb_shared_info) in the definition, right?
Yes correct, but use: SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
quoted
quoted
+ }; + int err; + + rxq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rxq->page_pool)) { + err = PTR_ERR(rxq->page_pool); + rxq->page_pool = NULL; + return err; + } + + err = xdp_rxq_info_reg(&rxq->xdp_rxq, fep->netdev, rxq->id, 0); + if (err < 0) + goto err_free_pp; + + err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, + rxq->page_pool); + if (err) + goto err_unregister_rxq; + + return 0; + +err_unregister_rxq: + xdp_rxq_info_unreg(&rxq->xdp_rxq); +err_free_pp: + page_pool_destroy(rxq->page_pool); + rxq->page_pool = NULL; + return err; +} + static struct bufdesc * fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb, @@ -1285,7 +1335,6 @@ fec_stop(struct net_device *ndev) } } - static void fec_timeout(struct net_device *ndev, unsigned int txqueue) {@@ -1450,7 +1499,7 @@ static void fec_enet_tx(struct net_device *ndev) fec_enet_tx_queue(ndev, i); } -static int +static int __maybe_unused fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff *skb) { struct fec_enet_private *fep = netdev_priv(ndev); @@ -1470,8 +1519,9 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc*bdp, struct sk_buffquoted
return 0; } -static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb, - struct bufdesc *bdp, u32 length, bool swap) +static bool __maybe_unused +fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb, + struct bufdesc *bdp, u32 length, bool swap) { struct fec_enet_private *fep = netdev_priv(ndev); struct sk_buff *new_skb;@@ -1496,6 +1546,78 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb, return true; } +static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, + struct bufdesc *bdp, int index) { + struct page *new_page; + dma_addr_t phys_addr; + + new_page = page_pool_dev_alloc_pages(rxq->page_pool); + WARN_ON(!new_page); + rxq->rx_skb_info[index].page = new_page; + + rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM; + phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM; + bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); } + +static u32 +fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog, + struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, +int index) { + unsigned int sync, len = xdp->data_end - xdp->data; + u32 ret = FEC_ENET_XDP_PASS; + struct page *page; + int err; + u32 act; + + act = bpf_prog_run_xdp(prog, xdp); + + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ + sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM; + sync = max(sync, len); + + switch (act) { + case XDP_PASS: + rxq->stats.xdp_pass++; + ret = FEC_ENET_XDP_PASS; + break; + + case XDP_TX: + rxq->stats.xdp_tx++; + bpf_warn_invalid_xdp_action(fep->netdev, prog, act); + fallthrough;This fallthrough looks wrong. The next xdp_do_redirect() call will pickup left- overs in per CPU bpf_redirect_info.So before the XDP_TX is implemented, this part of codes should reside below the XDP_REDIRECT case?
If that fallthrough goes to dropping packet, then yes.
quoted
quoted
+ + case XDP_REDIRECT: + err = xdp_do_redirect(fep->netdev, xdp, prog); + rxq->stats.xdp_redirect++; - dma_unmap_single(&fep->pdev->dev,...quoted
quoted
- fec32_to_cpu(bdp->cbd_bufaddr), - FEC_ENET_RX_FRSIZE - fep->rx_align, - DMA_FROM_DEVICE); - } - - prefetch(skb->data - NET_IP_ALIGN); + skb = build_skb(page_address(page), FEC_ENET_RX_FRSIZE);This looks wrong, I think FEC_ENET_RX_FRSIZE should be replaced by PAGE_SIZE. As build_skb() does: size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));Agree. As the current FRSIZE definition did not subtract the sizeof(struct skb_shared_info), I happened to not see the problem during the testing.
As I wrote use PAGE_SIZE here.
quoted
quoted
+ skb_reserve(skb, FEC_ENET_XDP_HEADROOM);The skb_reserve looks correct.quoted
skb_put(skb, pkt_len - 4); data = skb->data; + page_pool_release_page(rxq->page_pool, page);Today page_pool have SKB recycle support (you might have looked at drivers that didn't utilize this yet), thus you don't need to release the page (page_pool_release_page) here. Instead you could simply mark the SKB for recycling, unless driver does some page refcnt tricks I didn't notice. skb_mark_for_recycle(skb);
I hope you try out the above proposed change.
quoted
quoted
- if (!is_copybreak && need_swap) + if (need_swap) swap_buffer(data, pkt_len); #if !defined(CONFIG_M5272)@@ -1649,16 +1781,6 @@ fec_enet_rx_queue(struct net_device *ndev, intbudget, u16 queue_id)[...]