--- v3
+++ v9
@@ -1,130 +1,324 @@
-Introduce xdp multi-buffer selftest for the following ebpf helpers:
-- bpf_xdp_get_frags_total_size
-- bpf_xdp_get_frag_count
+From: Eelco Chaudron <echaudro@redhat.com>
-Co-developed-by: Eelco Chaudron <echaudro@redhat.com>
+This patch adds support for multi-buffer for the following helpers:
+ - bpf_xdp_output()
+ - bpf_perf_event_output()
+
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
- .../testing/selftests/bpf/prog_tests/xdp_mb.c | 77 +++++++++++++++++++
- .../selftests/bpf/progs/test_xdp_multi_buff.c | 24 ++++++
- 2 files changed, 101 insertions(+)
- create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_mb.c
- create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_multi_buff.c
+ kernel/trace/bpf_trace.c | 3 +
+ net/core/filter.c | 72 +++++++++-
+ .../selftests/bpf/prog_tests/xdp_bpf2bpf.c | 127 ++++++++++++------
+ .../selftests/bpf/progs/test_xdp_bpf2bpf.c | 2 +-
+ 4 files changed, 160 insertions(+), 44 deletions(-)
-diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_mb.c b/tools/testing/selftests/bpf/prog_tests/xdp_mb.c
-new file mode 100644
-index 000000000000..8cfe7253bf2a
---- /dev/null
-+++ b/tools/testing/selftests/bpf/prog_tests/xdp_mb.c
-@@ -0,0 +1,77 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <unistd.h>
-+#include <linux/kernel.h>
-+#include <test_progs.h>
-+#include <network_helpers.h>
-+
-+#include "test_xdp_multi_buff.skel.h"
-+
-+static void test_xdp_mb_check_len(void)
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index d2d7cf6cfe83..ee926ec64f78 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -1365,6 +1365,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
+
+ extern const struct bpf_func_proto bpf_skb_output_proto;
+ extern const struct bpf_func_proto bpf_xdp_output_proto;
++extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
+
+ BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
+ struct bpf_map *, map, u64, flags)
+@@ -1460,6 +1461,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+ return &bpf_sock_from_file_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_ptr_cookie_proto;
++ case BPF_FUNC_xdp_get_buff_len:
++ return &bpf_xdp_get_buff_len_trace_proto;
+ #endif
+ case BPF_FUNC_seq_printf:
+ return prog->expected_attach_type == BPF_TRACE_ITER ?
+diff --git a/net/core/filter.c b/net/core/filter.c
+index b0855f2d4726..f7211b7908a9 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3939,6 +3939,15 @@ const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
+ .arg1_type = ARG_PTR_TO_CTX,
+ };
+
++BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
++
++const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
++ .func = bpf_xdp_get_buff_len,
++ .gpl_only = false,
++ .arg1_type = ARG_PTR_TO_BTF_ID,
++ .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
++};
++
+ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
+ {
+ void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
+@@ -4606,10 +4615,56 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
+ };
+ #endif
+
+-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
++static unsigned long bpf_xdp_copy(void *dst_buff, const void *ctx,
+ unsigned long off, unsigned long len)
+ {
+- memcpy(dst_buff, src_buff + off, len);
++ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
++ struct skb_shared_info *sinfo;
++ unsigned long base_len;
++
++ if (likely(!xdp_buff_is_mb(xdp))) {
++ memcpy(dst_buff, xdp->data + off, len);
++ return 0;
++ }
++
++ base_len = xdp->data_end - xdp->data;
++ sinfo = xdp_get_shared_info_from_buff(xdp);
++ do {
++ const void *src_buff = NULL;
++ unsigned long copy_len = 0;
++
++ if (off < base_len) {
++ src_buff = xdp->data + off;
++ copy_len = min(len, base_len - off);
++ } else {
++ unsigned long frag_off_total = base_len;
++ int i;
++
++ for (i = 0; i < sinfo->nr_frags; i++) {
++ skb_frag_t *frag = &sinfo->frags[i];
++ unsigned long frag_len, frag_off;
++
++ frag_len = skb_frag_size(frag);
++ frag_off = off - frag_off_total;
++ if (frag_off < frag_len) {
++ src_buff = skb_frag_address(frag) +
++ frag_off;
++ copy_len = min(len,
++ frag_len - frag_off);
++ break;
++ }
++ frag_off_total += frag_len;
++ }
++ }
++ if (!src_buff)
++ break;
++
++ memcpy(dst_buff, src_buff, copy_len);
++ off += copy_len;
++ len -= copy_len;
++ dst_buff += copy_len;
++ } while (len);
++
+ return 0;
+ }
+
+@@ -4621,10 +4676,19 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
+ if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+ return -EINVAL;
+ if (unlikely(!xdp ||
+- xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
++ (likely(!xdp_buff_is_mb(xdp)) &&
++ xdp_size > (unsigned long)(xdp->data_end - xdp->data))))
+ return -EFAULT;
++ if (unlikely(xdp_buff_is_mb(xdp))) {
++ struct skb_shared_info *sinfo;
++
++ sinfo = xdp_get_shared_info_from_buff(xdp);
++ if (unlikely(xdp_size > ((int)(xdp->data_end - xdp->data) +
++ sinfo->data_len)))
++ return -EFAULT;
++ }
+
+- return bpf_event_output(map, flags, meta, meta_size, xdp->data,
++ return bpf_event_output(map, flags, meta, meta_size, xdp,
+ xdp_size, bpf_xdp_copy);
+ }
+
+diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+index 3bd5904b4db5..cc9be5912be8 100644
+--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
++++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+@@ -10,11 +10,20 @@ struct meta {
+ int pkt_len;
+ };
+
++struct test_ctx_s {
++ bool passed;
++ int pkt_size;
++};
++
++struct test_ctx_s test_ctx;
++
+ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+ {
+- int duration = 0;
+ struct meta *meta = (struct meta *)data;
+ struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
++ unsigned char *raw_pkt = data + sizeof(*meta);
++ struct test_ctx_s *tst_ctx = ctx;
++ int duration = 0;
+
+ if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+ "check_size", "size %u < %zu\n",
+@@ -25,25 +34,90 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+ "meta->ifindex = %d\n", meta->ifindex))
+ return;
+
+- if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+- "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
++ if (CHECK(meta->pkt_len != tst_ctx->pkt_size, "check_meta_pkt_len",
++ "meta->pkt_len = %d\n", tst_ctx->pkt_size))
+ return;
+
+ if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+ "check_packet_content", "content not the same\n"))
+ return;
+
+- *(bool *)ctx = true;
++ if (meta->pkt_len > sizeof(pkt_v4)) {
++ for (int i = 0; i < (meta->pkt_len - sizeof(pkt_v4)); i++) {
++ if (raw_pkt[i + sizeof(pkt_v4)] != (unsigned char)i) {
++ CHECK(true, "check_packet_content",
++ "byte %zu does not match %u != %u\n",
++ i + sizeof(pkt_v4),
++ raw_pkt[i + sizeof(pkt_v4)],
++ (unsigned char)i);
++ break;
++ }
++ }
++ }
++
++ tst_ctx->passed = true;
+ }
+
+-void test_xdp_bpf2bpf(void)
++static int run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
++ struct test_xdp_bpf2bpf *ftrace_skel,
++ int pkt_size)
+ {
+ __u32 duration = 0, retval, size;
+- char buf[128];
++ unsigned char buf_in[9000];
++ unsigned char buf[9000];
++ int err;
++
++ if (pkt_size > sizeof(buf_in) || pkt_size < sizeof(pkt_v4))
++ return -EINVAL;
++
++ test_ctx.passed = false;
++ test_ctx.pkt_size = pkt_size;
++
++ memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
++ if (pkt_size > sizeof(pkt_v4)) {
++ for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
++ buf_in[i + sizeof(pkt_v4)] = i;
++ }
++
++ /* Run test program */
++ err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
++ buf, &size, &retval, &duration);
++
++ if (CHECK(err || retval != XDP_PASS || size != pkt_size,
++ "ipv4", "err %d errno %d retval %d size %d\n",
++ err, errno, retval, size))
++ return -1;
++
++ /* Make sure bpf_xdp_output() was triggered and it sent the expected
++ * data to the perf ring buffer.
++ */
++ err = perf_buffer__poll(pb, 100);
++ if (CHECK(err <= 0, "perf_buffer__poll", "err %d\n", err))
++ return -1;
++
++ if (CHECK_FAIL(!test_ctx.passed))
++ return -1;
++
++ /* Verify test results */
++ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
++ "result", "fentry failed err %llu\n",
++ ftrace_skel->bss->test_result_fentry))
++ return -1;
++
++ if (CHECK(ftrace_skel->bss->test_result_fexit != XDP_PASS, "result",
++ "fexit failed err %llu\n",
++ ftrace_skel->bss->test_result_fexit))
++ return -1;
++
++ return 0;
++}
++
++void test_xdp_bpf2bpf(void)
+{
-+ int test_sizes[] = { 128, 4096, 9000 };
-+ struct test_xdp_multi_buff *pkt_skel;
-+ char *pkt_in = NULL, *pkt_out = NULL;
-+ __u32 duration = 0, retval, size;
-+ int err, pkt_fd, i;
-+
-+ /* Load XDP program */
-+ pkt_skel = test_xdp_multi_buff__open_and_load();
-+ if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp_mb skeleton failed\n"))
-+ goto out;
-+
-+ /* Allocate resources */
-+ pkt_out = malloc(test_sizes[ARRAY_SIZE(test_sizes) - 1]);
-+ pkt_in = malloc(test_sizes[ARRAY_SIZE(test_sizes) - 1]);
-+ if (CHECK(!pkt_in || !pkt_out, "malloc",
-+ "Failed malloc, in = %p, out %p\n", pkt_in, pkt_out))
-+ goto out;
-+
-+ pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_check_mb_len);
-+ if (pkt_fd < 0)
-+ goto out;
-+
-+ /* Run test for specific set of packets */
-+ for (i = 0; i < ARRAY_SIZE(test_sizes); i++) {
-+ int frag_count;
-+
-+ /* Run test program */
-+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_in, test_sizes[i],
-+ pkt_out, &size, &retval, &duration);
-+
-+ if (CHECK(err || retval != XDP_PASS, // || size != test_sizes[i],
-+ "test_run", "err %d errno %d retval %d size %d[%d]\n",
-+ err, errno, retval, size, test_sizes[i]))
-+ goto out;
-+
-+ /* Verify test results */
-+ frag_count = DIV_ROUND_UP(
-+ test_sizes[i] - pkt_skel->data->test_result_xdp_len,
-+ getpagesize());
-+
-+ if (CHECK(pkt_skel->data->test_result_frag_count != frag_count,
-+ "result", "frag_count = %llu != %u\n",
-+ pkt_skel->data->test_result_frag_count, frag_count))
-+ goto out;
-+
-+ if (CHECK(pkt_skel->data->test_result_frag_len != test_sizes[i] -
-+ pkt_skel->data->test_result_xdp_len,
-+ "result", "frag_len = %llu != %llu\n",
-+ pkt_skel->data->test_result_frag_len,
-+ test_sizes[i] - pkt_skel->data->test_result_xdp_len))
-+ goto out;
-+ }
-+out:
-+ if (pkt_out)
-+ free(pkt_out);
-+ if (pkt_in)
-+ free(pkt_in);
-+
-+ test_xdp_multi_buff__destroy(pkt_skel);
-+}
-+
-+void test_xdp_mb(void)
-+{
-+ if (test__start_subtest("xdp_mb_check_len_frags"))
-+ test_xdp_mb_check_len();
-+}
-diff --git a/tools/testing/selftests/bpf/progs/test_xdp_multi_buff.c b/tools/testing/selftests/bpf/progs/test_xdp_multi_buff.c
-new file mode 100644
-index 000000000000..1a46e0925282
---- /dev/null
-+++ b/tools/testing/selftests/bpf/progs/test_xdp_multi_buff.c
-@@ -0,0 +1,24 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/bpf.h>
-+#include <linux/if_ether.h>
-+#include <bpf/bpf_helpers.h>
-+#include <stdint.h>
-+
-+__u64 test_result_frag_len = UINT64_MAX;
-+__u64 test_result_frag_count = UINT64_MAX;
-+__u64 test_result_xdp_len = UINT64_MAX;
-+
-+SEC("xdp_check_mb_len")
-+int _xdp_check_mb_len(struct xdp_md *xdp)
-+{
-+ void *data_end = (void *)(long)xdp->data_end;
-+ void *data = (void *)(long)xdp->data;
-+
-+ test_result_xdp_len = (__u64)(data_end - data);
-+ test_result_frag_len = bpf_xdp_get_frags_total_size(xdp);
-+ test_result_frag_count = bpf_xdp_get_frag_count(xdp);
-+ return XDP_PASS;
-+}
-+
-+char _license[] SEC("license") = "GPL";
+ int err, pkt_fd, map_fd;
+- bool passed = false;
+- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+- struct iptnl_info value4 = {.family = AF_INET};
++ __u32 duration = 0;
++ int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
++ struct iptnl_info value4 = {.family = AF_INET6};
+ struct test_xdp *pkt_skel = NULL;
+ struct test_xdp_bpf2bpf *ftrace_skel = NULL;
+ struct vip key4 = {.protocol = 6, .family = AF_INET};
+@@ -87,40 +161,15 @@ void test_xdp_bpf2bpf(void)
+
+ /* Set up perf buffer */
+ pb_opts.sample_cb = on_sample;
+- pb_opts.ctx = &passed;
++ pb_opts.ctx = &test_ctx;
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+- 1, &pb_opts);
++ 8, &pb_opts);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
+ goto out;
+
+- /* Run test program */
+- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+- buf, &size, &retval, &duration);
+-
+- if (CHECK(err || retval != XDP_TX || size != 74 ||
+- iph->protocol != IPPROTO_IPIP, "ipv4",
+- "err %d errno %d retval %d size %d\n",
+- err, errno, retval, size))
+- goto out;
+-
+- /* Make sure bpf_xdp_output() was triggered and it sent the expected
+- * data to the perf ring buffer.
+- */
+- err = perf_buffer__poll(pb, 100);
+- if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+- goto out;
+-
+- CHECK_FAIL(!passed);
+-
+- /* Verify test results */
+- if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
+- "result", "fentry failed err %llu\n",
+- ftrace_skel->bss->test_result_fentry))
+- goto out;
+-
+- CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
+- "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
+-
++ for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
++ run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
++ pkt_sizes[i]);
+ out:
+ if (pb)
+ perf_buffer__free(pb);
+diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+index a038e827f850..902b54190377 100644
+--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
++++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
+ void *data = (void *)(long)xdp->data;
+
+ meta.ifindex = xdp->rxq->dev->ifindex;
+- meta.pkt_len = data_end - data;
++ meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
+ bpf_xdp_output(xdp, &perf_buf_map,
+ ((__u64) meta.pkt_len << 32) |
+ BPF_F_CURRENT_CPU,
--
-2.26.2
+2.31.1