Thread (17 messages) 17 messages, 3 authors, 2021-10-01

Re: [PATCH bpf-next v6 8/9] libbpf: Update gen_loader to emit BTF_KIND_FUNC relocations

From: Alexei Starovoitov <hidden>
Date: 2021-10-01 19:56:51
Also in: bpf

On Thu, Sep 30, 2021 at 11:59:47AM +0530, Kumar Kartikeya Dwivedi wrote:
quoted hunk ↗ jump to hunk
This change updates the BPF syscall loader to relocate BTF_KIND_FUNC
relocations, with support for weak kfunc relocations. The general idea
is to move map_fds to loader map, and also use the data for storing
kfunc BTF fds. Since both reuse the fd_array parameter, they need to be
kept together.

For map_fds, we reserve MAX_USED_MAPS slots in a region, and for kfunc,
we reserve MAX_KFUNC_DESCS. This is done so that insn->off has more
chances of being <= INT16_MAX than treating data map as a sparse array
and adding fd as needed.

When the MAX_KFUNC_DESCS limit is reached, we fall back to the sparse
array model, so that as long as it does remain <= INT16_MAX, we pass an
index relative to the start of fd_array.

We store all ksyms in an array where we try to avoid calling the
bpf_btf_find_by_name_kind helper, and also reuse the BTF fd that was
already stored. This also speeds up the loading process compared to
emitting calls in all cases, in later tests.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 tools/lib/bpf/bpf_gen_internal.h |  16 +-
 tools/lib/bpf/gen_loader.c       | 323 ++++++++++++++++++++++++++-----
 tools/lib/bpf/libbpf.c           |   8 +-
 3 files changed, 292 insertions(+), 55 deletions(-)
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 615400391e57..70eccbffefb1 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -7,6 +7,15 @@ struct ksym_relo_desc {
 	const char *name;
 	int kind;
 	int insn_idx;
+	bool is_weak;
+};
+
+struct ksym_desc {
+	const char *name;
+	int ref;
+	int kind;
+	int off;
+	int insn;
 };
 
 struct bpf_gen {
@@ -24,6 +33,10 @@ struct bpf_gen {
 	int relo_cnt;
 	char attach_target[128];
 	int attach_kind;
+	struct ksym_desc *ksyms;
+	__u32 nr_ksyms;
+	int fd_array;
+	int nr_fd_array;
 };
 
 void bpf_gen__init(struct bpf_gen *gen, int log_level);
@@ -36,6 +49,7 @@ void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_a
 void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
 void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
 void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
-void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx);
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, int kind,
+			    int insn_idx);
 
 #endif
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 80087b13877f..9902f2f47fb2 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -14,8 +14,10 @@
 #include "bpf_gen_internal.h"
 #include "skel_internal.h"
 
-#define MAX_USED_MAPS 64
-#define MAX_USED_PROGS 32
+#define MAX_USED_MAPS	64
+#define MAX_USED_PROGS	32
+#define MAX_KFUNC_DESCS 256
+#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS)
 
 /* The following structure describes the stack layout of the loader program.
  * In addition R6 contains the pointer to context.
@@ -30,7 +32,6 @@
  */
 struct loader_stack {
 	__u32 btf_fd;
-	__u32 map_fd[MAX_USED_MAPS];
 	__u32 prog_fd[MAX_USED_PROGS];
 	__u32 inner_map_fd;
 };
@@ -143,13 +144,58 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
 	if (realloc_data_buf(gen, size8))
 		return 0;
 	prev = gen->data_cur;
-	memcpy(gen->data_cur, data, size);
+	if (data)
+		memcpy(gen->data_cur, data, size);
Manpage says that realloc doesn't init the memory.
Lets zero it here instead of leaving uninitialized.
 	gen->data_cur += size;
 	memcpy(gen->data_cur, &zero, size8 - size);
 	gen->data_cur += size8 - size;
 	return prev - gen->data_start;
 }
 
+/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative
+ * to start of fd_array. Caller can decide if it is usable or not.
+ */
+static int fd_array_init(struct bpf_gen *gen)
+{
+	if (!gen->fd_array)
+		gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
+	return gen->fd_array;
+}
+
+static int add_map_fd(struct bpf_gen *gen)
+{
+	if (!fd_array_init(gen))
+		return -1;
Pls drop the error check here and other places.
In out-of-mem situation gen->error will be set and all subsequent ops
will be using zero. The final check is in finish().
+	if (gen->nr_maps == MAX_USED_MAPS) {
+		pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS);
+		gen->error = -E2BIG;
+		return -1;
+	}
+	return gen->nr_maps++;
+}
+
+static int add_kfunc_btf_fd(struct bpf_gen *gen)
+{
+	int cur;
+
+	if (!fd_array_init(gen))
+		return -1;
drop it.
+	if (gen->nr_fd_array == MAX_KFUNC_DESCS) {
+		cur = add_data(gen, NULL, sizeof(int));
+		if (!cur)
+			return -1;
drop it.
+		return (cur - gen->fd_array) / sizeof(int);
+	}
+	return MAX_USED_MAPS + gen->nr_fd_array++;
+}
+
+static int blob_fd_array_off(struct bpf_gen *gen, int index)
+{
+	if (!gen->fd_array)
+		return 0;
drop it.
+	return gen->fd_array + (index * sizeof(int));
no need for extra ()
quoted hunk ↗ jump to hunk
+}
+
 static int insn_bytes_to_bpf_size(__u32 sz)
 {
 	switch (sz) {
@@ -171,14 +217,22 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data)
 	emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
 }
 
-/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */
-static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off)
+static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off)
 {
-	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10));
-	emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, blob_off));
+	emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0));
 	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
 					 0, 0, 0, off));
-	emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+	emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off)
+{
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, blob_off));
+	emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0));
+	emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
 }
 
 static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off,
@@ -326,11 +380,11 @@ int bpf_gen__finish(struct bpf_gen *gen)
 			       offsetof(struct bpf_prog_desc, prog_fd), 4,
 			       stack_off(prog_fd[i]));
 	for (i = 0; i < gen->nr_maps; i++)
-		move_stack2ctx(gen,
-			       sizeof(struct bpf_loader_ctx) +
-			       sizeof(struct bpf_map_desc) * i +
-			       offsetof(struct bpf_map_desc, map_fd), 4,
-			       stack_off(map_fd[i]));
+		move_blob2ctx(gen,
+			      sizeof(struct bpf_loader_ctx) +
+			      sizeof(struct bpf_map_desc) * i +
+			      offsetof(struct bpf_map_desc, map_fd), 4,
+			      blob_fd_array_off(gen, i));
 	emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
 	emit(gen, BPF_EXIT_INSN());
 	pr_debug("gen: finish %d\n", gen->error);
@@ -390,7 +444,7 @@ void bpf_gen__map_create(struct bpf_gen *gen,
 {
 	int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id);
 	bool close_inner_map_fd = false;
-	int map_create_attr;
+	int map_create_attr, idx;
 	union bpf_attr attr;
 
 	memset(&attr, 0, attr_size);
@@ -467,9 +521,13 @@ void bpf_gen__map_create(struct bpf_gen *gen,
 		gen->error = -EDOM; /* internal bug */
 		return;
 	} else {
-		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
-				      stack_off(map_fd[map_idx])));
-		gen->nr_maps++;
+		/* add_map_fd does gen->nr_maps++ */
+		idx = add_map_fd(gen);
+		if (idx < 0)
+			return;
drop it.
quoted hunk ↗ jump to hunk
+		emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+						 0, 0, 0, blob_fd_array_off(gen, idx)));
+		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0));
 	}
 	if (close_inner_map_fd)
 		emit_sys_close_stack(gen, stack_off(inner_map_fd));
@@ -511,8 +569,8 @@ static void emit_find_attach_target(struct bpf_gen *gen)
 	 */
 }
 
-void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind,
-			    int insn_idx)
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,
+			    int kind, int insn_idx)
 {
 	struct ksym_relo_desc *relo;
 
@@ -524,38 +582,196 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind,
 	gen->relos = relo;
 	relo += gen->relo_cnt;
 	relo->name = name;
+	relo->is_weak = is_weak;
 	relo->kind = kind;
 	relo->insn_idx = insn_idx;
 	gen->relo_cnt++;
 }
 
-static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
+/* returns existing ksym_desc with ref incremented, or inserts a new one */
+static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo)
 {
-	int name, insn, len = strlen(relo->name) + 1;
+	struct ksym_desc *kdesc;
 
-	pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx);
-	name = add_data(gen, relo->name, len);
+	for (int i = 0; i < gen->nr_ksyms; i++) {
+		if (!strcmp(gen->ksyms[i].name, relo->name)) {
+			gen->ksyms[i].ref++;
+			return &gen->ksyms[i];
+		}
+	}
+	kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc));
+	if (!kdesc) {
+		gen->error = -ENOMEM;
+		return NULL;
+	}
+	gen->ksyms = kdesc;
+	kdesc = &gen->ksyms[gen->nr_ksyms++];
+	kdesc->name = relo->name;
+	kdesc->kind = relo->kind;
+	kdesc->ref = 1;
+	kdesc->off = 0;
+	kdesc->insn = 0;
+	return kdesc;
+}
+
+/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7}
+ * Returns result in BPF_REG_7
+ */
+static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo)
+{
+	int name_off, len = strlen(relo->name) + 1;
 
+	name_off = add_data(gen, relo->name, len);
+	if (!name_off)
+		return;
drop it.
 	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
-					 0, 0, 0, name));
+					 0, 0, 0, name_off));
 	emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
 	emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind));
 	emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
 	emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
 	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
 	debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind);
-	emit_check_err(gen);
+}
+
+/* Expects:
+ * BPF_REG_8 - pointer to instruction
+ *
+ * We need to reuse BTF fd for same symbol otherwise each relocation takes a new
+ * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols,
+ * this would mean a new BTF fd index for each entry. By pairing symbol name
+ * with index, we get the insn->imm, insn->off pairing that kernel uses for
+ * kfunc_tab, which becomes the effective limit even though all of them may
+ * share same index in fd_array (such that kfunc_btf_tab has 1 element).
+ */
+static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn)
+{
+	struct ksym_desc *kdesc;
+	int btf_fd_idx;
+
+	kdesc = get_ksym_desc(gen, relo);
+	if (!kdesc)
+		return;
+	/* try to copy from existing bpf_insn */
+	if (kdesc->ref > 1) {
+		move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
+			       kdesc->insn + offsetof(struct bpf_insn, imm));
+		move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2,
+			       kdesc->insn + offsetof(struct bpf_insn, off));
+		goto log;
+	}
+	/* remember insn offset, so we can copy BTF ID and FD later */
+	kdesc->insn = insn;
+	emit_bpf_find_by_name_kind(gen, relo);
+	if (!relo->is_weak)
+		emit_check_err(gen);
+	/* get index in fd_array to store BTF FD at */
+	btf_fd_idx = add_kfunc_btf_fd(gen);
+	if (btf_fd_idx < 0)
+		return;
drop it.
+	if (btf_fd_idx > INT16_MAX) {
+		pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n",
+			btf_fd_idx, relo->name);
+		gen->error = -E2BIG;
+		return;
this one is necessary. keep it.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help