Inter-revision diff: patch 10

Comparing v20 (message) to v9 (message)

--- v20
+++ v9
@@ -1,52 +1,324 @@
-Enable the capability to receive jumbo frames even if the interface is
-running in XDP mode if the loaded program declare to properly support
-xdp multi-buff. At same time reject a xdp program not supporting xdp
-multi-buffer if the driver is running in xdp multi-buffer mode.
+From: Eelco Chaudron <echaudro@redhat.com>
 
-Acked-by: John Fastabend <john.fastabend@gmail.com>
+This patch adds support for multi-buffer for the following helpers:
+  - bpf_xdp_output()
+  - bpf_perf_event_output()
+
+Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
 Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
 ---
- drivers/net/ethernet/marvell/mvneta.c | 13 +++++++++----
- 1 file changed, 9 insertions(+), 4 deletions(-)
+ kernel/trace/bpf_trace.c                      |   3 +
+ net/core/filter.c                             |  72 +++++++++-
+ .../selftests/bpf/prog_tests/xdp_bpf2bpf.c    | 127 ++++++++++++------
+ .../selftests/bpf/progs/test_xdp_bpf2bpf.c    |   2 +-
+ 4 files changed, 160 insertions(+), 44 deletions(-)
 
-diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
-index 332699960b53..98db3d03116a 100644
---- a/drivers/net/ethernet/marvell/mvneta.c
-+++ b/drivers/net/ethernet/marvell/mvneta.c
-@@ -3750,6 +3750,7 @@ static void mvneta_percpu_disable(void *arg)
- static int mvneta_change_mtu(struct net_device *dev, int mtu)
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index d2d7cf6cfe83..ee926ec64f78 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -1365,6 +1365,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
+ 
+ extern const struct bpf_func_proto bpf_skb_output_proto;
+ extern const struct bpf_func_proto bpf_xdp_output_proto;
++extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
+ 
+ BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
+ 	   struct bpf_map *, map, u64, flags)
+@@ -1460,6 +1461,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ 		return &bpf_sock_from_file_proto;
+ 	case BPF_FUNC_get_socket_cookie:
+ 		return &bpf_get_socket_ptr_cookie_proto;
++	case BPF_FUNC_xdp_get_buff_len:
++		return &bpf_xdp_get_buff_len_trace_proto;
+ #endif
+ 	case BPF_FUNC_seq_printf:
+ 		return prog->expected_attach_type == BPF_TRACE_ITER ?
+diff --git a/net/core/filter.c b/net/core/filter.c
+index b0855f2d4726..f7211b7908a9 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3939,6 +3939,15 @@ const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+ 	.arg1_type	= ARG_PTR_TO_CTX,
+ };
+ 
++BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
++
++const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
++	.func		= bpf_xdp_get_buff_len,
++	.gpl_only	= false,
++	.arg1_type	= ARG_PTR_TO_BTF_ID,
++	.arg1_btf_id	= &bpf_xdp_get_buff_len_bpf_ids[0],
++};
++
+ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
  {
- 	struct mvneta_port *pp = netdev_priv(dev);
-+	struct bpf_prog *prog = pp->xdp_prog;
- 	int ret;
- 
- 	if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
-@@ -3758,8 +3759,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
- 		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
- 	}
- 
--	if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
--		netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
-+	if (prog && !prog->aux->xdp_mb && mtu > MVNETA_MAX_RX_BUF_SIZE) {
-+		netdev_info(dev,
-+			    "Illegal MTU %d for XDP prog without multi-buf\n",
-+			    mtu);
-+
+ 	void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
+@@ -4606,10 +4615,56 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
+ };
+ #endif
+ 
+-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
++static unsigned long bpf_xdp_copy(void *dst_buff, const void *ctx,
+ 				  unsigned long off, unsigned long len)
+ {
+-	memcpy(dst_buff, src_buff + off, len);
++	struct xdp_buff *xdp = (struct xdp_buff *)ctx;
++	struct skb_shared_info *sinfo;
++	unsigned long base_len;
++
++	if (likely(!xdp_buff_is_mb(xdp))) {
++		memcpy(dst_buff, xdp->data + off, len);
++		return 0;
++	}
++
++	base_len = xdp->data_end - xdp->data;
++	sinfo = xdp_get_shared_info_from_buff(xdp);
++	do {
++		const void *src_buff = NULL;
++		unsigned long copy_len = 0;
++
++		if (off < base_len) {
++			src_buff = xdp->data + off;
++			copy_len = min(len, base_len - off);
++		} else {
++			unsigned long frag_off_total = base_len;
++			int i;
++
++			for (i = 0; i < sinfo->nr_frags; i++) {
++				skb_frag_t *frag = &sinfo->frags[i];
++				unsigned long frag_len, frag_off;
++
++				frag_len = skb_frag_size(frag);
++				frag_off = off - frag_off_total;
++				if (frag_off < frag_len) {
++					src_buff = skb_frag_address(frag) +
++						   frag_off;
++					copy_len = min(len,
++						       frag_len - frag_off);
++					break;
++				}
++				frag_off_total += frag_len;
++			}
++		}
++		if (!src_buff)
++			break;
++
++		memcpy(dst_buff, src_buff, copy_len);
++		off += copy_len;
++		len -= copy_len;
++		dst_buff += copy_len;
++	} while (len);
++
+ 	return 0;
+ }
+ 
+@@ -4621,10 +4676,19 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
+ 	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
  		return -EINVAL;
- 	}
- 
-@@ -4428,8 +4432,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
- 	struct mvneta_port *pp = netdev_priv(dev);
- 	struct bpf_prog *old_prog;
- 
--	if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
--		NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
-+	if (prog && !prog->aux->xdp_mb && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-+		NL_SET_ERR_MSG_MOD(extack,
-+				   "prog does not support XDP multi-buff");
- 		return -EOPNOTSUPP;
- 	}
- 
+ 	if (unlikely(!xdp ||
+-		     xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
++		     (likely(!xdp_buff_is_mb(xdp)) &&
++		      xdp_size > (unsigned long)(xdp->data_end - xdp->data))))
+ 		return -EFAULT;
++	if (unlikely(xdp_buff_is_mb(xdp))) {
++		struct skb_shared_info *sinfo;
++
++		sinfo = xdp_get_shared_info_from_buff(xdp);
++		if (unlikely(xdp_size > ((int)(xdp->data_end - xdp->data) +
++					 sinfo->data_len)))
++			return -EFAULT;
++	}
+ 
+-	return bpf_event_output(map, flags, meta, meta_size, xdp->data,
++	return bpf_event_output(map, flags, meta, meta_size, xdp,
+ 				xdp_size, bpf_xdp_copy);
+ }
+ 
+diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+index 3bd5904b4db5..cc9be5912be8 100644
+--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
++++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+@@ -10,11 +10,20 @@ struct meta {
+ 	int pkt_len;
+ };
+ 
++struct test_ctx_s {
++	bool passed;
++	int pkt_size;
++};
++
++struct test_ctx_s test_ctx;
++
+ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+ {
+-	int duration = 0;
+ 	struct meta *meta = (struct meta *)data;
+ 	struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
++	unsigned char *raw_pkt = data + sizeof(*meta);
++	struct test_ctx_s *tst_ctx = ctx;
++	int duration = 0;
+ 
+ 	if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+ 		  "check_size", "size %u < %zu\n",
+@@ -25,25 +34,90 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+ 		  "meta->ifindex = %d\n", meta->ifindex))
+ 		return;
+ 
+-	if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+-		  "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
++	if (CHECK(meta->pkt_len != tst_ctx->pkt_size, "check_meta_pkt_len",
++		  "meta->pkt_len = %d\n", tst_ctx->pkt_size))
+ 		return;
+ 
+ 	if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+ 		  "check_packet_content", "content not the same\n"))
+ 		return;
+ 
+-	*(bool *)ctx = true;
++	if (meta->pkt_len > sizeof(pkt_v4)) {
++		for (int i = 0; i < (meta->pkt_len - sizeof(pkt_v4)); i++) {
++			if (raw_pkt[i + sizeof(pkt_v4)] != (unsigned char)i) {
++				CHECK(true, "check_packet_content",
++				      "byte %zu does not match %u != %u\n",
++				      i + sizeof(pkt_v4),
++				      raw_pkt[i + sizeof(pkt_v4)],
++				      (unsigned char)i);
++				break;
++			}
++		}
++	}
++
++	tst_ctx->passed = true;
+ }
+ 
+-void test_xdp_bpf2bpf(void)
++static int run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
++				    struct test_xdp_bpf2bpf *ftrace_skel,
++				    int pkt_size)
+ {
+ 	__u32 duration = 0, retval, size;
+-	char buf[128];
++	unsigned char buf_in[9000];
++	unsigned char buf[9000];
++	int err;
++
++	if (pkt_size > sizeof(buf_in) || pkt_size < sizeof(pkt_v4))
++		return -EINVAL;
++
++	test_ctx.passed = false;
++	test_ctx.pkt_size = pkt_size;
++
++	memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
++	if (pkt_size > sizeof(pkt_v4)) {
++		for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
++			buf_in[i + sizeof(pkt_v4)] = i;
++	}
++
++	/* Run test program */
++	err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
++				buf, &size, &retval, &duration);
++
++	if (CHECK(err || retval != XDP_PASS || size != pkt_size,
++		  "ipv4", "err %d errno %d retval %d size %d\n",
++		  err, errno, retval, size))
++		return -1;
++
++	/* Make sure bpf_xdp_output() was triggered and it sent the expected
++	 * data to the perf ring buffer.
++	 */
++	err = perf_buffer__poll(pb, 100);
++	if (CHECK(err <= 0, "perf_buffer__poll", "err %d\n", err))
++		return -1;
++
++	if (CHECK_FAIL(!test_ctx.passed))
++		return -1;
++
++	/* Verify test results */
++	if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
++		  "result", "fentry failed err %llu\n",
++		  ftrace_skel->bss->test_result_fentry))
++		return -1;
++
++	if (CHECK(ftrace_skel->bss->test_result_fexit != XDP_PASS, "result",
++		  "fexit failed err %llu\n",
++		  ftrace_skel->bss->test_result_fexit))
++		return -1;
++
++	return 0;
++}
++
++void test_xdp_bpf2bpf(void)
++{
+ 	int err, pkt_fd, map_fd;
+-	bool passed = false;
+-	struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+-	struct iptnl_info value4 = {.family = AF_INET};
++	__u32 duration = 0;
++	int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
++	struct iptnl_info value4 = {.family = AF_INET6};
+ 	struct test_xdp *pkt_skel = NULL;
+ 	struct test_xdp_bpf2bpf *ftrace_skel = NULL;
+ 	struct vip key4 = {.protocol = 6, .family = AF_INET};
+@@ -87,40 +161,15 @@ void test_xdp_bpf2bpf(void)
+ 
+ 	/* Set up perf buffer */
+ 	pb_opts.sample_cb = on_sample;
+-	pb_opts.ctx = &passed;
++	pb_opts.ctx = &test_ctx;
+ 	pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+-			      1, &pb_opts);
++			      8, &pb_opts);
+ 	if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
+ 		goto out;
+ 
+-	/* Run test program */
+-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+-				buf, &size, &retval, &duration);
+-
+-	if (CHECK(err || retval != XDP_TX || size != 74 ||
+-		  iph->protocol != IPPROTO_IPIP, "ipv4",
+-		  "err %d errno %d retval %d size %d\n",
+-		  err, errno, retval, size))
+-		goto out;
+-
+-	/* Make sure bpf_xdp_output() was triggered and it sent the expected
+-	 * data to the perf ring buffer.
+-	 */
+-	err = perf_buffer__poll(pb, 100);
+-	if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+-		goto out;
+-
+-	CHECK_FAIL(!passed);
+-
+-	/* Verify test results */
+-	if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
+-		  "result", "fentry failed err %llu\n",
+-		  ftrace_skel->bss->test_result_fentry))
+-		goto out;
+-
+-	CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
+-	      "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
+-
++	for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
++		run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
++					 pkt_sizes[i]);
+ out:
+ 	if (pb)
+ 		perf_buffer__free(pb);
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+index a038e827f850..902b54190377 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
+ 	void *data = (void *)(long)xdp->data;
+ 
+ 	meta.ifindex = xdp->rxq->dev->ifindex;
+-	meta.pkt_len = data_end - data;
++	meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
+ 	bpf_xdp_output(xdp, &perf_buf_map,
+ 		       ((__u64) meta.pkt_len << 32) |
+ 		       BPF_F_CURRENT_CPU,
 -- 
-2.33.1
+2.31.1
 
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help