Thread (40 messages) 40 messages, 7 authors, 2021-09-14

Re: [PATCH v3 bpf-next 02/14] bpf: refactor BPF_PROG_RUN_ARRAY family of macros into functions

From: Andrii Nakryiko <hidden>
Date: 2021-08-10 00:36:20

On Mon, Aug 9, 2021 at 4:00 PM Daniel Borkmann [off-list ref] wrote:
On 7/30/21 7:34 AM, Andrii Nakryiko wrote:
quoted
Similar to BPF_PROG_RUN, turn BPF_PROG_RUN_ARRAY macros into proper functions
with all the same readability and maintainability benefits. Making them into
functions required shuffling around bpf_set_run_ctx/bpf_reset_run_ctx
functions. Also, explicitly specifying the type of the BPF prog run callback
required adjusting __bpf_prog_run_save_cb() to accept const void *, casted
internally to const struct sk_buff.

Further, split out a cgroup-specific BPF_PROG_RUN_ARRAY_CG and
BPF_PROG_RUN_ARRAY_CG_FLAGS from the more generic BPF_PROG_RUN_ARRAY due to
the differences in bpf_run_ctx used for those two different use cases.

I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to accept
struct cgroup and enum bpf_attach_type instead of bpf_prog_array, fetching
cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
required including include/linux/cgroup-defs.h, which I wasn't sure is ok with
everyone.

The remaining generic BPF_PROG_RUN_ARRAY function will be extended to
pass-through user-provided context value in the next patch.

Acked-by: Yonghong Song <redacted>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
  include/linux/bpf.h      | 187 +++++++++++++++++++++++----------------
  include/linux/filter.h   |   5 +-
  kernel/bpf/cgroup.c      |  32 +++----
  kernel/trace/bpf_trace.c |   2 +-
  4 files changed, 132 insertions(+), 94 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c8cc09013210..9c44b56b698f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1146,67 +1146,124 @@ struct bpf_run_ctx {};

  struct bpf_cg_run_ctx {
      struct bpf_run_ctx run_ctx;
-     struct bpf_prog_array_item *prog_item;
+     const struct bpf_prog_array_item *prog_item;
  };

+#ifdef CONFIG_BPF_SYSCALL
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+     struct bpf_run_ctx *old_ctx;
+
+     old_ctx = current->bpf_ctx;
+     current->bpf_ctx = new_ctx;
+     return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+     current->bpf_ctx = old_ctx;
+}
+#else /* CONFIG_BPF_SYSCALL */
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+     return NULL;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+}
+#endif /* CONFIG_BPF_SYSCALL */
nit, but either is fine..:

static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
{
        struct bpf_run_ctx *old_ctx = NULL;

#ifdef CONFIG_BPF_SYSCALL
        old_ctx = current->bpf_ctx;
        current->bpf_ctx = new_ctx;
#endif
        return old_ctx;
}

static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
{
#ifdef CONFIG_BPF_SYSCALL
        current->bpf_ctx = old_ctx;
#endif
}
sure, I don't mind
quoted
  /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
  #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE                        (1 << 0)
  /* BPF program asks to set CN on the packet. */
  #define BPF_RET_SET_CN                                              (1 << 0)

-#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)                \
-     ({                                                              \
-             struct bpf_prog_array_item *_item;                      \
-             struct bpf_prog *_prog;                                 \
-             struct bpf_prog_array *_array;                          \
-             struct bpf_run_ctx *old_run_ctx;                        \
-             struct bpf_cg_run_ctx run_ctx;                          \
-             u32 _ret = 1;                                           \
-             u32 func_ret;                                           \
-             migrate_disable();                                      \
-             rcu_read_lock();                                        \
-             _array = rcu_dereference(array);                        \
-             _item = &_array->items[0];                              \
-             old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);        \
-             while ((_prog = READ_ONCE(_item->prog))) {              \
-                     run_ctx.prog_item = _item;                      \
-                     func_ret = func(_prog, ctx);                    \
-                     _ret &= (func_ret & 1);                         \
-                     *(ret_flags) |= (func_ret >> 1);                \
-                     _item++;                                        \
-             }                                                       \
-             bpf_reset_run_ctx(old_run_ctx);                         \
-             rcu_read_unlock();                                      \
-             migrate_enable();                                       \
-             _ret;                                                   \
-      })
-
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage)       \
-     ({                                              \
-             struct bpf_prog_array_item *_item;      \
-             struct bpf_prog *_prog;                 \
-             struct bpf_prog_array *_array;          \
-             struct bpf_run_ctx *old_run_ctx;        \
-             struct bpf_cg_run_ctx run_ctx;          \
-             u32 _ret = 1;                           \
-             migrate_disable();                      \
-             rcu_read_lock();                        \
-             _array = rcu_dereference(array);        \
-             if (unlikely(check_non_null && !_array))\
-                     goto _out;                      \
-             _item = &_array->items[0];              \
-             old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
-             while ((_prog = READ_ONCE(_item->prog))) {      \
-                     run_ctx.prog_item = _item;      \
-                     _ret &= func(_prog, ctx);       \
-                     _item++;                        \
-             }                                       \
-             bpf_reset_run_ctx(old_run_ctx);         \
-_out:                                                        \
-             rcu_read_unlock();                      \
-             migrate_enable();                       \
-             _ret;                                   \
-      })
+typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
+                         const void *ctx, bpf_prog_run_fn run_prog,
+                         u32 *ret_flags)
+{
+     const struct bpf_prog_array_item *item;
+     const struct bpf_prog *prog;
+     const struct bpf_prog_array *array;
+     struct bpf_run_ctx *old_run_ctx;
+     struct bpf_cg_run_ctx run_ctx;
+     u32 ret = 1;
+     u32 func_ret;
+
+     migrate_disable();
+     rcu_read_lock();
+     array = rcu_dereference(array_rcu);
+     item = &array->items[0];
+     old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+     while ((prog = READ_ONCE(item->prog))) {
+             run_ctx.prog_item = item;
+             func_ret = run_prog(prog, ctx);
+             ret &= (func_ret & 1);
+             *(ret_flags) |= (func_ret >> 1);
+             item++;
+     }
+     bpf_reset_run_ctx(old_run_ctx);
+     rcu_read_unlock();
+     migrate_enable();
+     return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
+                   const void *ctx, bpf_prog_run_fn run_prog)
+{
+     const struct bpf_prog_array_item *item;
+     const struct bpf_prog *prog;
+     const struct bpf_prog_array *array;
+     struct bpf_run_ctx *old_run_ctx;
+     struct bpf_cg_run_ctx run_ctx;
+     u32 ret = 1;
+
+     migrate_disable();
+     rcu_read_lock();
+     array = rcu_dereference(array_rcu);
+     item = &array->items[0];
+     old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+     while ((prog = READ_ONCE(item->prog))) {
+             run_ctx.prog_item = item;
+             ret &= run_prog(prog, ctx);
+             item++;
+     }
+     bpf_reset_run_ctx(old_run_ctx);
+     rcu_read_unlock();
+     migrate_enable();
+     return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+                const void *ctx, bpf_prog_run_fn run_prog)
+{
+     const struct bpf_prog_array_item *item;
+     const struct bpf_prog *prog;
+     const struct bpf_prog_array *array;
+     u32 ret = 1;
+
+     migrate_disable();
+     rcu_read_lock();
+     array = rcu_dereference(array_rcu);
+     if (unlikely(!array))
+             goto out;
+     item = &array->items[0];
+     while ((prog = READ_ONCE(item->prog))) {
+             ret &= run_prog(prog, ctx);
+             item++;
+     }
+out:
+     rcu_read_unlock();
+     migrate_enable();
+     return ret;
+}
Is there any way we could consolidate the above somewhat further and have things
optimized out at compilation time, e.g. when const args are null/non-null? :/
Do you mean like passing "bool check_for_null" as an extra argument and then do

if (check_for_null && unlikely(!array))
    goto out;

?

I feel like that actually makes a bigger mess and just unnecessarily
obfuscates code, while saving just a few lines of straightforward
code. We didn't even do that for BPF_PROG_RUN_ARRAY_FLAGS vs
__BPF_PROG_RUN_ARRAY before, even though there are about 2 lines of
code differences.

But also in one of the next patches I'm adding perf-specific
bpf_perf_run_ctx (different from bpf_cg_run_ctx), so it will make
sharing the logic within these BPF_PROG_RUN_ARRAY*  even harder and
more convoluted.

Or were you talking about some other aspect here?
quoted
  /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
   * so BPF programs can request cwr for TCP packets.
@@ -1235,7 +1292,7 @@ _out:                                                   \
              u32 _flags = 0;                         \
[...]
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help