Thread (28 messages) 28 messages, 5 authors, 2022-10-07

Re: [EXT] Re: [PATCH 1/1] net: fec: add initial XDP support

From: Jesper Dangaard Brouer <hidden>
Date: 2022-09-29 15:46:16
Also in: imx, lkml


On 29/09/2022 15.11, Shenwei Wang wrote:
quoted
From: Jesper Dangaard Brouer <redacted>
 >>
quoted
quoted
diff --git a/drivers/net/ethernet/freescale/fec.h
b/drivers/net/ethernet/freescale/fec.h
index b0100fe3c9e4..f7531503aa95 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -346,8 +346,10 @@ struct bufdesc_ex {
    * the skbuffer directly.
    */

+#define FEC_ENET_XDP_HEADROOM        (512) /* XDP_PACKET_HEADROOM
+ NET_IP_ALIGN) */

Why the large headroom?
The accurate value here should be "XDP_PACKET_HEADROOM (256) +
NET_IP_ALIGN" which then aligns with 64 bytes. So 256 + 64 should be
enough here.
Most other XDP drivers have 256 bytes headroom.
I don't understand why you just don't keep this at 256, like other drivers ?
quoted
quoted
+
   #define FEC_ENET_RX_PAGES   256
-#define FEC_ENET_RX_FRSIZE   2048
+#define FEC_ENET_RX_FRSIZE   (PAGE_SIZE - FEC_ENET_XDP_HEADROOM)
This FEC_ENET_RX_FRSIZE is likely wrong, because you also need to reserve 320
bytes at the end for struct skb_shared_info.
(320 calculated as SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
quoted
   #define FEC_ENET_RX_FRPPG   (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
   #define RX_RING_SIZE                (FEC_ENET_RX_FRPPG *
FEC_ENET_RX_PAGES)
quoted
   #define FEC_ENET_TX_FRSIZE  2048
@@ -517,6 +519,22 @@ struct bufdesc_prop {
[...]
quoted
diff --git a/drivers/net/ethernet/freescale/fec_main.c
b/drivers/net/ethernet/freescale/fec_main.c
index 59921218a8a4..2e30182ed770 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -66,6 +66,8 @@
   #include <linux/mfd/syscon.h>
   #include <linux/regmap.h>
   #include <soc/imx/cpuidle.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>

   #include <asm/cacheflush.h>
@@ -87,6 +89,11 @@ static const u16 fec_enet_vlan_pri_to_queue[8] = {0, 0,
1, 1, 1, 2, 2, 2};
quoted
   #define FEC_ENET_OPD_V      0xFFF0
   #define FEC_MDIO_PM_TIMEOUT  100 /* ms */

+#define FEC_ENET_XDP_PASS          0
+#define FEC_ENET_XDP_CONSUMED      BIT(0)
+#define FEC_ENET_XDP_TX            BIT(1)
+#define FEC_ENET_XDP_REDIR         BIT(2)
+
   struct fec_devinfo {
       u32 quirks;
   };
@@ -422,6 +429,49 @@ fec_enet_clear_csum(struct sk_buff *skb, struct
net_device *ndev)
quoted
       return 0;
   }

+static int
+fec_enet_create_page_pool(struct fec_enet_private *fep,
+                       struct fec_enet_priv_rx_q *rxq, int size) {
+     struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
+     struct page_pool_params pp_params = {
+             .order = 0,
+             .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+             .pool_size = size,
+             .nid = dev_to_node(&fep->pdev->dev),
+             .dev = &fep->pdev->dev,
+             .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+             .offset = FEC_ENET_XDP_HEADROOM,
+             .max_len = FEC_ENET_RX_FRSIZE,
XDP BPF-prog cannot access last 320 bytes, so FEC_ENET_RX_FRSIZE is wrong
here.
So the FEC_ENET_RX_FRSIZE should subtract the sizeof(struct
skb_shared_info) in the definition, right?
Yes correct, but use:
   SKB_DATA_ALIGN(sizeof(struct skb_shared_info))

quoted
quoted
+     };
+     int err;
+
+     rxq->page_pool = page_pool_create(&pp_params);
+     if (IS_ERR(rxq->page_pool)) {
+             err = PTR_ERR(rxq->page_pool);
+             rxq->page_pool = NULL;
+             return err;
+     }
+
+     err = xdp_rxq_info_reg(&rxq->xdp_rxq, fep->netdev, rxq->id, 0);
+     if (err < 0)
+             goto err_free_pp;
+
+     err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
+                                      rxq->page_pool);
+     if (err)
+             goto err_unregister_rxq;
+
+     return 0;
+
+err_unregister_rxq:
+     xdp_rxq_info_unreg(&rxq->xdp_rxq);
+err_free_pp:
+     page_pool_destroy(rxq->page_pool);
+     rxq->page_pool = NULL;
+     return err;
+}
+
   static struct bufdesc *
   fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
                            struct sk_buff *skb, @@ -1285,7 +1335,6 @@
fec_stop(struct net_device *ndev)
       }
   }

-
   static void
   fec_timeout(struct net_device *ndev, unsigned int txqueue)
   {
@@ -1450,7 +1499,7 @@ static void fec_enet_tx(struct net_device *ndev)
               fec_enet_tx_queue(ndev, i);
   }

-static int
+static int __maybe_unused
   fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff *skb)
   {
       struct  fec_enet_private *fep = netdev_priv(ndev); @@ -1470,8
+1519,9 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc
*bdp, struct sk_buff
quoted
       return 0;
   }

-static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
-                            struct bufdesc *bdp, u32 length, bool swap)
+static bool __maybe_unused
+fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
+                struct bufdesc *bdp, u32 length, bool swap)
   {
       struct  fec_enet_private *fep = netdev_priv(ndev);
       struct sk_buff *new_skb;
@@ -1496,6 +1546,78 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
       return true;
   }

+static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
+                             struct bufdesc *bdp, int index) {
+     struct page *new_page;
+     dma_addr_t phys_addr;
+
+     new_page = page_pool_dev_alloc_pages(rxq->page_pool);
+     WARN_ON(!new_page);
+     rxq->rx_skb_info[index].page = new_page;
+
+     rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
+     phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM;
+     bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); }
+
+static u32
+fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
+              struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq,
+int index) {
+     unsigned int sync, len = xdp->data_end - xdp->data;
+     u32 ret = FEC_ENET_XDP_PASS;
+     struct page *page;
+     int err;
+     u32 act;
+
+     act = bpf_prog_run_xdp(prog, xdp);
+
+     /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
+     sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM;
+     sync = max(sync, len);
+
+     switch (act) {
+     case XDP_PASS:
+             rxq->stats.xdp_pass++;
+             ret = FEC_ENET_XDP_PASS;
+             break;
+
+     case XDP_TX:
+             rxq->stats.xdp_tx++;
+             bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
+             fallthrough;
This fallthrough looks wrong. The next xdp_do_redirect() call will pickup left-
overs in per CPU bpf_redirect_info.
So before the XDP_TX is implemented, this part of codes should reside below the XDP_REDIRECT case?
If that fallthrough goes to dropping packet, then yes.
quoted
quoted
+
+     case XDP_REDIRECT:
+             err = xdp_do_redirect(fep->netdev, xdp, prog);
+             rxq->stats.xdp_redirect++;
-                     dma_unmap_single(&fep->pdev->dev,
...
quoted
quoted
-                                      fec32_to_cpu(bdp->cbd_bufaddr),
-                                      FEC_ENET_RX_FRSIZE - fep->rx_align,
-                                      DMA_FROM_DEVICE);
-             }
-
-             prefetch(skb->data - NET_IP_ALIGN);
+             skb = build_skb(page_address(page), FEC_ENET_RX_FRSIZE);
This looks wrong, I think FEC_ENET_RX_FRSIZE should be replaced by PAGE_SIZE.
As build_skb() does:

   size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
Agree. As the current FRSIZE definition did not subtract the sizeof(struct skb_shared_info), I happened to not see the problem during the testing.
As I wrote use PAGE_SIZE here.

quoted
quoted
+             skb_reserve(skb, FEC_ENET_XDP_HEADROOM);
The skb_reserve looks correct.
quoted
               skb_put(skb, pkt_len - 4);
               data = skb->data;
+             page_pool_release_page(rxq->page_pool, page);
Today page_pool have SKB recycle support (you might have looked at drivers
that didn't utilize this yet), thus you don't need to release the page
(page_pool_release_page) here.  Instead you could simply mark the SKB for
recycling, unless driver does some page refcnt tricks I didn't notice.

   skb_mark_for_recycle(skb);
I hope you try out the above proposed change.
quoted
quoted
-             if (!is_copybreak && need_swap)
+             if (need_swap)
                       swap_buffer(data, pkt_len);

   #if !defined(CONFIG_M5272)
@@ -1649,16 +1781,6 @@ fec_enet_rx_queue(struct net_device *ndev, int
budget, u16 queue_id)
[...]
  
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help