Re: [PATCH bpf-next v6 03/10] bpf: per-cgroup lsm flavor
From: Martin KaFai Lau <hidden>
Date: 2022-05-06 23:03:10
Also in:
bpf
On Fri, Apr 29, 2022 at 02:15:33PM -0700, Stanislav Fomichev wrote:
quoted hunk ↗ jump to hunk
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 064eccba641d..a0e68ef5dfb1 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c@@ -16,6 +16,7 @@ #include <linux/bpf_local_storage.h> #include <linux/btf_ids.h> #include <linux/ima.h> +#include <linux/bpf-cgroup.h> /* For every LSM hook that allows attachment of BPF programs, declare a nop * function where a BPF program can be attached.@@ -35,6 +36,66 @@ BTF_SET_START(bpf_lsm_hooks) #undef LSM_HOOK BTF_SET_END(bpf_lsm_hooks) +/* List of LSM hooks that should operate on 'current' cgroup regardless + * of function signature. + */ +BTF_SET_START(bpf_lsm_current_hooks) +/* operate on freshly allocated sk without any cgroup association */ +BTF_ID(func, bpf_lsm_sk_alloc_security) +BTF_ID(func, bpf_lsm_sk_free_security) +BTF_SET_END(bpf_lsm_current_hooks) + +int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, + bpf_func_t *bpf_func) +{ + const struct btf_type *first_arg_type; + const struct btf_type *sock_type; + const struct btf_type *sk_type; + const struct btf *btf_vmlinux; + const struct btf_param *args; + s32 type_id; + + if (!prog->aux->attach_func_proto || + !btf_type_is_func_proto(prog->aux->attach_func_proto))
Also remove these tests during the attach time. These should have already been checked during the load time.
+ return -EINVAL;
+
+ if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
+ btf_id_set_contains(&bpf_lsm_current_hooks,
+ prog->aux->attach_btf_id)) {
+ *bpf_func = __cgroup_bpf_run_lsm_current;
+ return 0;
+ }
+
+ args = btf_params(prog->aux->attach_func_proto);
+
+ btf_vmlinux = bpf_get_btf_vmlinux();
+
+ type_id = btf_find_by_name_kind(btf_vmlinux, "socket", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ sock_type = btf_type_by_id(btf_vmlinux, type_id);
+
+ type_id = btf_find_by_name_kind(btf_vmlinux, "sock", BTF_KIND_STRUCT);
+ if (type_id < 0)
+ return -EINVAL;
+ sk_type = btf_type_by_id(btf_vmlinux, type_id);Although practical kconfig should have CONFIG_NET, not sure btf has "socket" and "sock" in some unusual kconfig setup. If it does not, it will return error too early for other hooks. May be put them under "#ifdef CONFIG_NET"? While adding CONFIG_NET, it will be easier to just use the btf_sock_ids[]. "socket" needs to be added to btf_sock_ids[]. Take a look at btf_ids.h and filter.c.
+ + first_arg_type = btf_type_resolve_ptr(btf_vmlinux, args[0].type, NULL); + if (first_arg_type == sock_type) + *bpf_func = __cgroup_bpf_run_lsm_socket; + else if (first_arg_type == sk_type) + *bpf_func = __cgroup_bpf_run_lsm_sock; + else + *bpf_func = __cgroup_bpf_run_lsm_current; + + return 0; +}
[ ... ]
quoted hunk ↗ jump to hunk
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 134785ab487c..9cc38454e402 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c@@ -14,6 +14,9 @@ #include <linux/string.h> #include <linux/bpf.h> #include <linux/bpf-cgroup.h> +#include <linux/btf_ids.h> +#include <linux/bpf_lsm.h> +#include <linux/bpf_verifier.h> #include <net/sock.h> #include <net/bpf_sk_storage.h>@@ -61,6 +64,85 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, return run_ctx.retval; } +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, + const struct bpf_insn *insn) +{ + const struct bpf_prog *shim_prog; + struct sock *sk; + struct cgroup *cgrp; + int ret = 0; + u64 *regs; + + regs = (u64 *)ctx; + sk = (void *)(unsigned long)regs[BPF_REG_0]; + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); + + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + if (likely(cgrp)) + ret = bpf_prog_run_array_cg(&cgrp->bpf, + shim_prog->aux->cgroup_atype, + ctx, bpf_prog_run, 0, NULL); + return ret; +} + +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, + const struct bpf_insn *insn) +{ + const struct bpf_prog *shim_prog; + struct socket *sock; + struct cgroup *cgrp; + int ret = 0; + u64 *regs; + + regs = (u64 *)ctx; + sock = (void *)(unsigned long)regs[BPF_REG_0]; + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); + + cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); + if (likely(cgrp)) + ret = bpf_prog_run_array_cg(&cgrp->bpf, + shim_prog->aux->cgroup_atype, + ctx, bpf_prog_run, 0, NULL); + return ret; +} + +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, + const struct bpf_insn *insn) +{ + const struct bpf_prog *shim_prog; + struct cgroup *cgrp; + int ret = 0;
From lsm_hook_defs.h, there are some default return values that are not 0. Is it ok to always return 0 in cases like the cgroup array is empty ?
+ + if (unlikely(!current)) + return 0; + + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); + + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + if (likely(cgrp)) + ret = bpf_prog_run_array_cg(&cgrp->bpf, + shim_prog->aux->cgroup_atype, + ctx, bpf_prog_run, 0, NULL); + rcu_read_unlock(); + return ret; +}
[ ... ]
quoted hunk ↗ jump to hunk
@@ -479,6 +572,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *old_prog = NULL; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; + struct bpf_attach_target_info tgt_info = {}; enum cgroup_bpf_attach_type atype; struct bpf_prog_list *pl; struct hlist_head *progs;@@ -495,9 +589,34 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, /* replace_prog implies BPF_F_REPLACE, and vice versa */ return -EINVAL; - atype = to_cgroup_bpf_attach_type(type); - if (atype < 0) - return -EINVAL; + if (type == BPF_LSM_CGROUP) { + struct bpf_prog *p = prog ? : link->link.prog;
nit. This "prog ? ...." has been done multiple times (new and existing codes) in this function. may be do it once at the very beginning.
+
+ if (replace_prog) {
+ /* Reusing shim from the original program. */
+ if (replace_prog->aux->attach_btf_id !=
+ p->aux->attach_btf_id)
+ return -EINVAL;
+
+ atype = replace_prog->aux->cgroup_atype;
+ } else {
+ err = bpf_check_attach_target(NULL, p, NULL,
+ p->aux->attach_btf_id,
+ &tgt_info);
+ if (err)
+ return -EINVAL;return err;
+
+ atype = bpf_lsm_attach_type_get(p->aux->attach_btf_id);
+ if (atype < 0)
+ return atype;
+ }
+
+ p->aux->cgroup_atype = atype;
+ } else {
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+ }
progs = &cgrp->bpf.progs[atype];[ ... ]
quoted hunk ↗ jump to hunk
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 0c4fd194e801..77dfa517c47c 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c@@ -11,6 +11,8 @@ #include <linux/rcupdate_wait.h> #include <linux/module.h> #include <linux/static_call.h> +#include <linux/bpf_verifier.h> +#include <linux/bpf_lsm.h> /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = {@@ -485,6 +487,147 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) return err; } +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +static struct bpf_prog *cgroup_shim_alloc(const struct bpf_prog *prog, + bpf_func_t bpf_func) +{ + struct bpf_prog *p; + + p = bpf_prog_alloc(1, 0); + if (!p) + return NULL; + + p->jited = false; + p->bpf_func = bpf_func; + + p->aux->cgroup_atype = prog->aux->cgroup_atype; + p->aux->attach_func_proto = prog->aux->attach_func_proto; + p->aux->attach_btf_id = prog->aux->attach_btf_id; + p->aux->attach_btf = prog->aux->attach_btf; + btf_get(p->aux->attach_btf); + p->type = BPF_PROG_TYPE_LSM; + p->expected_attach_type = BPF_LSM_MAC; + bpf_prog_inc(p); + + return p; +} + +static struct bpf_prog *cgroup_shim_find(struct bpf_trampoline *tr, + bpf_func_t bpf_func) +{ + const struct bpf_prog_aux *aux; + int kind; + + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { + struct bpf_prog *p = aux->prog; + + if (p->bpf_func == bpf_func) + return p; + } + } + + return NULL; +} + +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, + struct bpf_attach_target_info *tgt_info) +{ + struct bpf_prog *shim_prog = NULL; + struct bpf_trampoline *tr; + bpf_func_t bpf_func; + u64 key; + int err; + + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, + prog->aux->attach_btf_id); + + err = bpf_lsm_find_cgroup_shim(prog, &bpf_func); + if (err) + return err; + + tr = bpf_trampoline_get(key, tgt_info); + if (!tr) + return -ENOMEM; + + mutex_lock(&tr->mutex); + + shim_prog = cgroup_shim_find(tr, bpf_func); + if (shim_prog) { + /* Reusing existing shim attached by the other program. */ + bpf_prog_inc(shim_prog); + mutex_unlock(&tr->mutex); + return 0; + } + + /* Allocate and install new shim. */ + + shim_prog = cgroup_shim_alloc(prog, bpf_func); + if (!shim_prog) { + err = -ENOMEM; + goto out; + } + + err = __bpf_trampoline_link_prog(shim_prog, tr); + if (err) + goto out; + + mutex_unlock(&tr->mutex); + + return 0; +out: + if (shim_prog) + bpf_prog_put(shim_prog); +
bpf_trampoline_get() was done earlier. Does it need to call bpf_trampoline_put(tr) in error cases ? More on tr refcnt later.
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+ struct bpf_prog *shim_prog;
+ struct bpf_trampoline *tr;
+ bpf_func_t bpf_func;
+ u64 key;
+ int err;
+
+ key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+ prog->aux->attach_btf_id);
+
+ err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+ if (err)
+ return;
+
+ tr = bpf_trampoline_lookup(key);
+ if (!tr)
+ return;
+
+ mutex_lock(&tr->mutex);
+
+ shim_prog = cgroup_shim_find(tr, bpf_func);
+ if (shim_prog) {
+ /* We use shim_prog refcnt for tracking whether to
+ * remove the shim program from the trampoline.
+ * Trampoline's mutex is held while refcnt is
+ * added/subtracted so we don't need to care about
+ * potential races.
+ */
+
+ if (atomic64_read(&shim_prog->aux->refcnt) == 1)
+ WARN_ON_ONCE(__bpf_trampoline_unlink_prog(shim_prog, tr));
+
+ bpf_prog_put(shim_prog);
+ }
+
+ mutex_unlock(&tr->mutex);
+
+ bpf_trampoline_put(tr); /* bpf_trampoline_lookup */
+
+ if (shim_prog)
+ bpf_trampoline_put(tr);While looking at the bpf_trampoline_{link,unlink}_cgroup_shim() again,
which prog (cgroup lsm progs or shim_prog) actually owns the tr.
It should be the shim_prog ?
How about storing tr in "shim_prog->aux->dst_trampoline = tr;"
Then the earlier bpf_prog_put(shim_prog) in this function will take care
of the bpf_trampoline_put(shim_prog->aux->dst_trampoline)
instead of each cgroup lsm prog owns a refcnt of the tr
and then this individual bpf_trampoline_put(tr) here can also
go away.
quoted hunk ↗ jump to hunk
+} +#endif + struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) {@@ -609,6 +752,24 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) rcu_read_unlock(); } +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog) + __acquires(RCU) +{ + /* Runtime stats are exported via actual BPF_LSM_CGROUP + * programs, not the shims. + */ + rcu_read_lock(); + migrate_disable(); + return NO_START_TIME; +} + +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start) + __releases(RCU) +{ + migrate_enable(); + rcu_read_unlock(); +} + u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) { rcu_read_lock_trace();diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 813f6ee80419..99703d96c579 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c@@ -14455,6 +14455,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, fallthrough; case BPF_MODIFY_RETURN: case BPF_LSM_MAC: + case BPF_LSM_CGROUP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: if (!btf_type_is_func(t)) {@@ -14633,6 +14634,33 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return 0; } +static int check_used_maps(struct bpf_verifier_env *env) +{ + int i; + + for (i = 0; i < env->used_map_cnt; i++) { + struct bpf_map *map = env->used_maps[i]; + + switch (env->prog->expected_attach_type) { + case BPF_LSM_CGROUP: + if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && + map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) + break; + + if (map->key_size != sizeof(__u64)) {
Follow on my very last comment in v5. I think we should not limit the bpf_cgroup_storage_key and should allow using both cgroup_inode_id and attach_type as the key to avoid inconsistent surprise (some keys are not allowed in a specific attach_type), so this check_used_maps() function can also be avoided.
quoted hunk ↗ jump to hunk
+ verbose(env, "only global cgroup local storage is supported for BPF_LSM_CGROUP\n"); + return -EINVAL; + } + + break; + default: + break; + } + } + + return 0; +} + struct btf *bpf_get_btf_vmlinux(void) { if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {@@ -14854,6 +14882,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) convert_pseudo_ld_imm64(env); } + ret = check_used_maps(env); + if (ret < 0) + goto err_release_maps; + adjust_btf_func(env); err_release_maps:diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 444fe6f1cf35..112e396bbe65 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h@@ -998,6 +998,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, + BPF_LSM_CGROUP, __MAX_BPF_ATTACH_TYPE };-- 2.36.0.464.gb9c8b46e94-goog