--- v20
+++ v9
@@ -1,52 +1,324 @@
-Enable the capability to receive jumbo frames even if the interface is
-running in XDP mode if the loaded program declare to properly support
-xdp multi-buff. At same time reject a xdp program not supporting xdp
-multi-buffer if the driver is running in xdp multi-buffer mode.
+From: Eelco Chaudron <echaudro@redhat.com>
-Acked-by: John Fastabend <john.fastabend@gmail.com>
+This patch adds support for multi-buffer for the following helpers:
+ - bpf_xdp_output()
+ - bpf_perf_event_output()
+
+Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
- drivers/net/ethernet/marvell/mvneta.c | 13 +++++++++----
- 1 file changed, 9 insertions(+), 4 deletions(-)
+ kernel/trace/bpf_trace.c | 3 +
+ net/core/filter.c | 72 +++++++++-
+ .../selftests/bpf/prog_tests/xdp_bpf2bpf.c | 127 ++++++++++++------
+ .../selftests/bpf/progs/test_xdp_bpf2bpf.c | 2 +-
+ 4 files changed, 160 insertions(+), 44 deletions(-)
-diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
-index 332699960b53..98db3d03116a 100644
---- a/drivers/net/ethernet/marvell/mvneta.c
-+++ b/drivers/net/ethernet/marvell/mvneta.c
-@@ -3750,6 +3750,7 @@ static void mvneta_percpu_disable(void *arg)
- static int mvneta_change_mtu(struct net_device *dev, int mtu)
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index d2d7cf6cfe83..ee926ec64f78 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -1365,6 +1365,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
+
+ extern const struct bpf_func_proto bpf_skb_output_proto;
+ extern const struct bpf_func_proto bpf_xdp_output_proto;
++extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
+
+ BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
+ struct bpf_map *, map, u64, flags)
+@@ -1460,6 +1461,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ return &bpf_sock_from_file_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_ptr_cookie_proto;
++ case BPF_FUNC_xdp_get_buff_len:
++ return &bpf_xdp_get_buff_len_trace_proto;
+ #endif
+ case BPF_FUNC_seq_printf:
+ return prog->expected_attach_type == BPF_TRACE_ITER ?
+diff --git a/net/core/filter.c b/net/core/filter.c
+index b0855f2d4726..f7211b7908a9 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3939,6 +3939,15 @@ const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ };
+
++BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
++
++const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
++ .func = bpf_xdp_get_buff_len,
++ .gpl_only = false,
++ .arg1_type = ARG_PTR_TO_BTF_ID,
++ .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
++};
++
+ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
- struct mvneta_port *pp = netdev_priv(dev);
-+ struct bpf_prog *prog = pp->xdp_prog;
- int ret;
-
- if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
-@@ -3758,8 +3759,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
- mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
- }
-
-- if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) {
-- netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu);
-+ if (prog && !prog->aux->xdp_mb && mtu > MVNETA_MAX_RX_BUF_SIZE) {
-+ netdev_info(dev,
-+ "Illegal MTU %d for XDP prog without multi-buf\n",
-+ mtu);
-+
+ void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
+@@ -4606,10 +4615,56 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
+ };
+ #endif
+
+-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
++static unsigned long bpf_xdp_copy(void *dst_buff, const void *ctx,
+ unsigned long off, unsigned long len)
+ {
+- memcpy(dst_buff, src_buff + off, len);
++ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
++ struct skb_shared_info *sinfo;
++ unsigned long base_len;
++
++ if (likely(!xdp_buff_is_mb(xdp))) {
++ memcpy(dst_buff, xdp->data + off, len);
++ return 0;
++ }
++
++ base_len = xdp->data_end - xdp->data;
++ sinfo = xdp_get_shared_info_from_buff(xdp);
++ do {
++ const void *src_buff = NULL;
++ unsigned long copy_len = 0;
++
++ if (off < base_len) {
++ src_buff = xdp->data + off;
++ copy_len = min(len, base_len - off);
++ } else {
++ unsigned long frag_off_total = base_len;
++ int i;
++
++ for (i = 0; i < sinfo->nr_frags; i++) {
++ skb_frag_t *frag = &sinfo->frags[i];
++ unsigned long frag_len, frag_off;
++
++ frag_len = skb_frag_size(frag);
++ frag_off = off - frag_off_total;
++ if (frag_off < frag_len) {
++ src_buff = skb_frag_address(frag) +
++ frag_off;
++ copy_len = min(len,
++ frag_len - frag_off);
++ break;
++ }
++ frag_off_total += frag_len;
++ }
++ }
++ if (!src_buff)
++ break;
++
++ memcpy(dst_buff, src_buff, copy_len);
++ off += copy_len;
++ len -= copy_len;
++ dst_buff += copy_len;
++ } while (len);
++
+ return 0;
+ }
+
+@@ -4621,10 +4676,19 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
+ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- }
-
-@@ -4428,8 +4432,9 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
- struct mvneta_port *pp = netdev_priv(dev);
- struct bpf_prog *old_prog;
-
-- if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-- NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
-+ if (prog && !prog->aux->xdp_mb && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-+ NL_SET_ERR_MSG_MOD(extack,
-+ "prog does not support XDP multi-buff");
- return -EOPNOTSUPP;
- }
-
+ if (unlikely(!xdp ||
+- xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
++ (likely(!xdp_buff_is_mb(xdp)) &&
++ xdp_size > (unsigned long)(xdp->data_end - xdp->data))))
+ return -EFAULT;
++ if (unlikely(xdp_buff_is_mb(xdp))) {
++ struct skb_shared_info *sinfo;
++
++ sinfo = xdp_get_shared_info_from_buff(xdp);
++ if (unlikely(xdp_size > ((int)(xdp->data_end - xdp->data) +
++ sinfo->data_len)))
++ return -EFAULT;
++ }
+
+- return bpf_event_output(map, flags, meta, meta_size, xdp->data,
++ return bpf_event_output(map, flags, meta, meta_size, xdp,
+ xdp_size, bpf_xdp_copy);
+ }
+
+diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+index 3bd5904b4db5..cc9be5912be8 100644
+--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
++++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+@@ -10,11 +10,20 @@ struct meta {
+ int pkt_len;
+ };
+
++struct test_ctx_s {
++ bool passed;
++ int pkt_size;
++};
++
++struct test_ctx_s test_ctx;
++
+ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+ {
+- int duration = 0;
+ struct meta *meta = (struct meta *)data;
+ struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
++ unsigned char *raw_pkt = data + sizeof(*meta);
++ struct test_ctx_s *tst_ctx = ctx;
++ int duration = 0;
+
+ if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+ "check_size", "size %u < %zu\n",
+@@ -25,25 +34,90 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+ "meta->ifindex = %d\n", meta->ifindex))
+ return;
+
+- if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+- "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
++ if (CHECK(meta->pkt_len != tst_ctx->pkt_size, "check_meta_pkt_len",
++ "meta->pkt_len = %d\n", tst_ctx->pkt_size))
+ return;
+
+ if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+ "check_packet_content", "content not the same\n"))
+ return;
+
+- *(bool *)ctx = true;
++ if (meta->pkt_len > sizeof(pkt_v4)) {
++ for (int i = 0; i < (meta->pkt_len - sizeof(pkt_v4)); i++) {
++ if (raw_pkt[i + sizeof(pkt_v4)] != (unsigned char)i) {
++ CHECK(true, "check_packet_content",
++ "byte %zu does not match %u != %u\n",
++ i + sizeof(pkt_v4),
++ raw_pkt[i + sizeof(pkt_v4)],
++ (unsigned char)i);
++ break;
++ }
++ }
++ }
++
++ tst_ctx->passed = true;
+ }
+
+-void test_xdp_bpf2bpf(void)
++static int run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
++ struct test_xdp_bpf2bpf *ftrace_skel,
++ int pkt_size)
+ {
+ __u32 duration = 0, retval, size;
+- char buf[128];
++ unsigned char buf_in[9000];
++ unsigned char buf[9000];
++ int err;
++
++ if (pkt_size > sizeof(buf_in) || pkt_size < sizeof(pkt_v4))
++ return -EINVAL;
++
++ test_ctx.passed = false;
++ test_ctx.pkt_size = pkt_size;
++
++ memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
++ if (pkt_size > sizeof(pkt_v4)) {
++ for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
++ buf_in[i + sizeof(pkt_v4)] = i;
++ }
++
++ /* Run test program */
++ err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
++ buf, &size, &retval, &duration);
++
++ if (CHECK(err || retval != XDP_PASS || size != pkt_size,
++ "ipv4", "err %d errno %d retval %d size %d\n",
++ err, errno, retval, size))
++ return -1;
++
++ /* Make sure bpf_xdp_output() was triggered and it sent the expected
++ * data to the perf ring buffer.
++ */
++ err = perf_buffer__poll(pb, 100);
++ if (CHECK(err <= 0, "perf_buffer__poll", "err %d\n", err))
++ return -1;
++
++ if (CHECK_FAIL(!test_ctx.passed))
++ return -1;
++
++ /* Verify test results */
++ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
++ "result", "fentry failed err %llu\n",
++ ftrace_skel->bss->test_result_fentry))
++ return -1;
++
++ if (CHECK(ftrace_skel->bss->test_result_fexit != XDP_PASS, "result",
++ "fexit failed err %llu\n",
++ ftrace_skel->bss->test_result_fexit))
++ return -1;
++
++ return 0;
++}
++
++void test_xdp_bpf2bpf(void)
++{
+ int err, pkt_fd, map_fd;
+- bool passed = false;
+- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+- struct iptnl_info value4 = {.family = AF_INET};
++ __u32 duration = 0;
++ int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
++ struct iptnl_info value4 = {.family = AF_INET6};
+ struct test_xdp *pkt_skel = NULL;
+ struct test_xdp_bpf2bpf *ftrace_skel = NULL;
+ struct vip key4 = {.protocol = 6, .family = AF_INET};
+@@ -87,40 +161,15 @@ void test_xdp_bpf2bpf(void)
+
+ /* Set up perf buffer */
+ pb_opts.sample_cb = on_sample;
+- pb_opts.ctx = &passed;
++ pb_opts.ctx = &test_ctx;
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+- 1, &pb_opts);
++ 8, &pb_opts);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
+ goto out;
+
+- /* Run test program */
+- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+- buf, &size, &retval, &duration);
+-
+- if (CHECK(err || retval != XDP_TX || size != 74 ||
+- iph->protocol != IPPROTO_IPIP, "ipv4",
+- "err %d errno %d retval %d size %d\n",
+- err, errno, retval, size))
+- goto out;
+-
+- /* Make sure bpf_xdp_output() was triggered and it sent the expected
+- * data to the perf ring buffer.
+- */
+- err = perf_buffer__poll(pb, 100);
+- if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+- goto out;
+-
+- CHECK_FAIL(!passed);
+-
+- /* Verify test results */
+- if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
+- "result", "fentry failed err %llu\n",
+- ftrace_skel->bss->test_result_fentry))
+- goto out;
+-
+- CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
+- "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
+-
++ for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
++ run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
++ pkt_sizes[i]);
+ out:
+ if (pb)
+ perf_buffer__free(pb);
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+index a038e827f850..902b54190377 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
+ void *data = (void *)(long)xdp->data;
+
+ meta.ifindex = xdp->rxq->dev->ifindex;
+- meta.pkt_len = data_end - data;
++ meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
+ bpf_xdp_output(xdp, &perf_buf_map,
+ ((__u64) meta.pkt_len << 32) |
+ BPF_F_CURRENT_CPU,
--
-2.33.1
+2.31.1