Re: [RFC PATCH bpf-next v2 03/11] bpf: Unify dynptr handling in the verifier
From: Mykyta Yatsenko <hidden>
Date: 2026-03-11 22:22:32
Also in:
bpf
Amery Hung [off-list ref] writes:
On Wed, Mar 11, 2026 at 9:03 AM Mykyta Yatsenko [off-list ref] wrote:quoted
Amery Hung [off-list ref] writes:quoted
Simplify dynptr checking for helper and kfunc by unifying it. Remember initialized dynptr in process_dynptr_func() so that we can easily retrieve the information for verification later. Signed-off-by: Amery Hung <redacted> --- kernel/bpf/verifier.c | 179 +++++++++--------------------------------- 1 file changed, 36 insertions(+), 143 deletions(-)diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0f77c4c5b510..d52780962adb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c@@ -277,8 +277,15 @@ struct bpf_map_desc { int uid; }; +struct bpf_dynptr_desc { + enum bpf_dynptr_type type; + u32 id; + u32 ref_obj_id;nit: let's add a comment here explaining what this field is for.We are about to change the meaning of id and ref_obj_id. I can add comments explaining id, ref_obj_id and parent_id in the refactor patch (#6). That said, the meaning of these fields will apply to all objects tracked by the verifier, not just limited to dynptr, and is already documented when we define bpf_reg_state in include/linux/bpf_verifier.h. Can you share a bit what info you are looking for?
the description from commit message would help: /* id of the referenced object; objects with same ref_obj_id have the same lifetime */ Oftentimes when I work on verifier, it's difficult to understand what some data field is for. It's easier now with the AI, but still I see a lot of value to have that inline. Essentially ref_obj_id does not have obvious meaning (at least to me).
quoted
quoted
+}; + struct bpf_call_arg_meta { struct bpf_map_desc map; + struct bpf_dynptr_desc initialized_dynptr; bool raw_mode; bool pkt_access; u8 release_regno;@@ -287,7 +294,6 @@ struct bpf_call_arg_meta { int mem_size; u64 msize_max_value; int ref_obj_id; - int dynptr_id; int func_id; struct btf *btf; u32 btf_id;@@ -346,16 +352,12 @@ struct bpf_kfunc_call_arg_meta { struct { struct btf_field *field; } arg_rbtree_root; - struct { - enum bpf_dynptr_type type; - u32 id; - u32 ref_obj_id; - } initialized_dynptr; struct { u8 spi; u8 frameno; } iter; struct bpf_map_desc map; + struct bpf_dynptr_desc initialized_dynptr; u64 mem_size; };@@ -511,11 +513,6 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_request_sock; } -static bool is_dynptr_ref_function(enum bpf_func_id func_id) -{ - return func_id == BPF_FUNC_dynptr_data; -} - static bool is_sync_callback_calling_kfunc(u32 btf_id); static bool is_async_callback_calling_kfunc(u32 btf_id); static bool is_callback_calling_kfunc(u32 btf_id);@@ -597,8 +594,6 @@ static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, ref_obj_uses++; if (is_acquire_function(func_id, map)) ref_obj_uses++; - if (is_dynptr_ref_function(func_id)) - ref_obj_uses++; return ref_obj_uses > 1; }@@ -8750,7 +8745,8 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, * type, and declare it as 'const struct bpf_dynptr *' in their prototype. */ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx, - enum bpf_arg_type arg_type, int clone_ref_obj_id) + enum bpf_arg_type arg_type, int clone_ref_obj_id, + struct bpf_dynptr_desc *initialized_dynptr) { struct bpf_reg_state *reg = reg_state(env, regno); int err;@@ -8825,6 +8821,20 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn } err = mark_dynptr_read(env, reg); + + if (initialized_dynptr) { + struct bpf_func_state *state = func(env, reg);state is only used if reg->type != CONST_PTR_TO_DYNPTR, does it make sense to move state = func(env, reg); to the corresponding if block?I think this is fine. It looks less cluttered this way.quoted
quoted
+ int spi; + + if (reg->type != CONST_PTR_TO_DYNPTR) { + spi = dynptr_get_spi(env, reg);looking at the deleted dynptr_id() and dynptr_ref_obj_id() spi can be negative, what changed here that we no longer need this check?is_dynptr_reg_valid_init() above already makes sure reg points to a valid dynptr so we don't need to check it again.quoted
quoted
+ reg = &state->stack[spi].spilled_ptr; + } + + initialized_dynptr->id = reg->id; + initialized_dynptr->type = reg->dynptr.type; + initialized_dynptr->ref_obj_id = reg->ref_obj_id; + } } return err; }@@ -9587,72 +9597,6 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, } } -static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env, - const struct bpf_func_proto *fn, - struct bpf_reg_state *regs) -{ - struct bpf_reg_state *state = NULL; - int i; - - for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) - if (arg_type_is_dynptr(fn->arg_type[i])) { - if (state) { - verbose(env, "verifier internal error: multiple dynptr args\n"); - return NULL; - } - state = ®s[BPF_REG_1 + i]; - } - - if (!state) - verbose(env, "verifier internal error: no dynptr arg found\n"); - - return state; -} - -static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) -{ - struct bpf_func_state *state = func(env, reg); - int spi; - - if (reg->type == CONST_PTR_TO_DYNPTR) - return reg->id; - spi = dynptr_get_spi(env, reg); - if (spi < 0) - return spi; - return state->stack[spi].spilled_ptr.id; -} - -static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) -{ - struct bpf_func_state *state = func(env, reg); - int spi; - - if (reg->type == CONST_PTR_TO_DYNPTR) - return reg->ref_obj_id; - spi = dynptr_get_spi(env, reg); - if (spi < 0) - return spi; - return state->stack[spi].spilled_ptr.ref_obj_id; -} - -static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env, - struct bpf_reg_state *reg) -{ - struct bpf_func_state *state = func(env, reg); - int spi; - - if (reg->type == CONST_PTR_TO_DYNPTR) - return reg->dynptr.type; - - spi = __get_spi(reg->var_off.value); - if (spi < 0) { - verbose(env, "verifier internal error: invalid spi when querying dynptr type\n"); - return BPF_DYNPTR_TYPE_INVALID; - } - - return state->stack[spi].spilled_ptr.dynptr.type; -} - static int check_reg_const_str(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno) {@@ -10007,7 +9951,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, true, meta); break; case ARG_PTR_TO_DYNPTR: - err = process_dynptr_func(env, regno, insn_idx, arg_type, 0); + err = process_dynptr_func(env, regno, insn_idx, arg_type, 0, + &meta->initialized_dynptr); if (err) return err; break;@@ -10666,7 +10611,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, if (ret) return ret; - ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0); + ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0, NULL); if (ret) return ret; } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {@@ -11771,52 +11716,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } } break; - case BPF_FUNC_dynptr_data: - { - struct bpf_reg_state *reg; - int id, ref_obj_id; - - reg = get_dynptr_arg_reg(env, fn, regs); - if (!reg) - return -EFAULT; - - - if (meta.dynptr_id) { - verifier_bug(env, "meta.dynptr_id already set"); - return -EFAULT; - } - if (meta.ref_obj_id) { - verifier_bug(env, "meta.ref_obj_id already set"); - return -EFAULT; - } - - id = dynptr_id(env, reg); - if (id < 0) { - verifier_bug(env, "failed to obtain dynptr id"); - return id; - } - - ref_obj_id = dynptr_ref_obj_id(env, reg); - if (ref_obj_id < 0) { - verifier_bug(env, "failed to obtain dynptr ref_obj_id"); - return ref_obj_id; - } - - meta.dynptr_id = id; - meta.ref_obj_id = ref_obj_id; - - break; - } case BPF_FUNC_dynptr_write: { - enum bpf_dynptr_type dynptr_type; - struct bpf_reg_state *reg; - - reg = get_dynptr_arg_reg(env, fn, regs); - if (!reg) - return -EFAULT; + enum bpf_dynptr_type dynptr_type = meta.initialized_dynptr.type; - dynptr_type = dynptr_get_type(env, reg); if (dynptr_type == BPF_DYNPTR_TYPE_INVALID) return -EFAULT;@@ -12007,10 +11910,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } - if (is_dynptr_ref_function(func_id)) - regs[BPF_REG_0].dynptr_id = meta.dynptr_id; - - if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) { + if (is_ptr_cast_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; } else if (is_acquire_function(func_id, meta.map.ptr)) {@@ -12024,6 +11924,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].ref_obj_id = id; } + if (func_id == BPF_FUNC_dynptr_data) { + regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id; + regs[BPF_REG_0].ref_obj_id = meta.initialized_dynptr.ref_obj_id; + } + err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); if (err) return err;@@ -13559,22 +13464,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } } - ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id); + ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id, + &meta->initialized_dynptr); if (ret < 0) return ret; - - if (!(dynptr_arg_type & MEM_UNINIT)) { - int id = dynptr_id(env, reg); - - if (id < 0) { - verifier_bug(env, "failed to obtain dynptr id"); - return id; - } - meta->initialized_dynptr.id = id; - meta->initialized_dynptr.type = dynptr_get_type(env, reg); - meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg); - } - break; } case KF_ARG_PTR_TO_ITER: --2.47.3