Re: [RFC bpf-next 01/10] uprobe: Add session callbacks to uprobe_consumer
From: Jiri Olsa <hidden>
Date: 2024-06-10 11:06:40
Also in:
bpf, lkml
Subsystem:
bpf [general] (safe dynamic programs and tools), bpf [security & lsm] (security audit and enforcement using bpf), bpf [tracing], performance events subsystem, the rest, tracing, uprobes · Maintainers:
Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi, KP Singh, Matt Bobrowski, Song Liu, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Linus Torvalds, Steven Rostedt, Masami Hiramatsu, Oleg Nesterov
On Thu, Jun 06, 2024 at 09:52:39AM -0700, Andrii Nakryiko wrote:
On Thu, Jun 6, 2024 at 9:46 AM Jiri Olsa [off-list ref] wrote:quoted
On Wed, Jun 05, 2024 at 10:50:11PM +0200, Jiri Olsa wrote:quoted
On Wed, Jun 05, 2024 at 07:56:19PM +0200, Oleg Nesterov wrote:quoted
On 06/05, Andrii Nakryiko wrote:quoted
so any such limitations will cause problems, issue reports, investigation, etc.Agreed...quoted
As one possible solution, what if we do struct return_instance { ... u64 session_cookies[]; }; and allocate sizeof(struct return_instance) + 8 * <num-of-session-consumers> and then at runtime pass &session_cookies[i] as data pointer to session-aware callbacks?I too thought about this, but I guess it is not that simple. Just for example. Suppose we have 2 session-consumers C1 and C2. What if uprobe_unregister(C1) comes before the probed function returns? We need something like map_cookie_to_consumer().I guess we could have hash table in return_instance that gets 'consumer -> cookie' ?ok, hash table is probably too big for this.. I guess some solution that would iterate consumers and cookies made sure it matches would be fineYes, I was hoping to avoid hash tables for this, and in the common case have no added overhead.
hi,
here's first stab on that.. the change below:
- extends current handlers with extra argument rather than adding new
set of handlers
- store session consumers objects within return_instance object and
- iterate these objects ^^^ in handle_uretprobe_chain
I guess it could be still polished, but I wonder if this could
be the right direction to do this.. thoughts? ;-)
thanks,
jirka
---diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f46e0ca0169c..4e40e8352eac 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h@@ -34,15 +34,19 @@ enum uprobe_filter_ctx { }; struct uprobe_consumer { - int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, + unsigned long *data); int (*ret_handler)(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs); + struct pt_regs *regs, + unsigned long *data); bool (*filter)(struct uprobe_consumer *self, enum uprobe_filter_ctx ctx, struct mm_struct *mm); struct uprobe_consumer *next; + bool is_session; + unsigned int id; }; #ifdef CONFIG_UPROBES
@@ -80,6 +84,12 @@ struct uprobe_task { unsigned int depth; }; +struct session_consumer { + long cookie; + unsigned int id; + int rc; +}; + struct return_instance { struct uprobe *uprobe; unsigned long func;
@@ -88,6 +98,8 @@ struct return_instance { bool chained; /* true, if instance is nested */ struct return_instance *next; /* keep as stack */ + int session_cnt; + struct session_consumer sc[1]; /* 1 for zero item marking the end */ }; enum rp_check {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2c83ba776fc7..cbd71dc06ef0 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c@@ -63,6 +63,8 @@ struct uprobe { loff_t ref_ctr_offset; unsigned long flags; + unsigned int session_cnt; + /* * The generic code assumes that it has two members of unknown type * owned by the arch-specific code:
@@ -750,11 +752,30 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, return uprobe; } +static void +uprobe_consumer_account(struct uprobe *uprobe, struct uprobe_consumer *uc) +{ + static unsigned int session_id; + + if (uc->is_session) { + uprobe->session_cnt++; + uc->id = ++session_id ?: ++session_id; + } +} + +static void +uprobe_consumer_unaccount(struct uprobe *uprobe, struct uprobe_consumer *uc) +{ + if (uc->is_session) + uprobe->session_cnt--; +} + static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); uc->next = uprobe->consumers; uprobe->consumers = uc; + uprobe_consumer_account(uprobe, uc); up_write(&uprobe->consumer_rwsem); }
@@ -773,6 +794,7 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) if (*con == uc) { *con = uc->next; ret = true; + uprobe_consumer_unaccount(uprobe, uc); break; } }
@@ -1744,6 +1766,23 @@ static struct uprobe_task *get_utask(void) return current->utask; } +static size_t ri_size(int session_cnt) +{ + struct return_instance *ri __maybe_unused; + + return sizeof(*ri) + session_cnt * sizeof(ri->sc[0]); +} + +static struct return_instance *alloc_return_instance(int session_cnt) +{ + struct return_instance *ri; + + ri = kzalloc(ri_size(session_cnt), GFP_KERNEL); + if (ri) + ri->session_cnt = session_cnt; + return ri; +} + static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) { struct uprobe_task *n_utask;
@@ -1756,11 +1795,11 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) p = &n_utask->return_instances; for (o = o_utask->return_instances; o; o = o->next) { - n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); + n = alloc_return_instance(o->session_cnt); if (!n) return -ENOMEM; - *n = *o; + memcpy(n, o, ri_size(o->session_cnt)); get_uprobe(n->uprobe); n->next = NULL;
@@ -1853,35 +1892,38 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained, utask->return_instances = ri; } -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) +static struct return_instance * +prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs, + struct return_instance *ri, int session_cnt) { - struct return_instance *ri; struct uprobe_task *utask; unsigned long orig_ret_vaddr, trampoline_vaddr; bool chained; if (!get_xol_area()) - return; + return ri; utask = get_utask(); if (!utask) - return; + return ri; if (utask->depth >= MAX_URETPROBE_DEPTH) { printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" " nestedness limit pid/tgid=%d/%d\n", current->pid, current->tgid); - return; + return ri; } - ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL); - if (!ri) - return; + if (!ri) { + ri = alloc_return_instance(session_cnt); + if (!ri) + return NULL; + } trampoline_vaddr = get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) - goto fail; + return ri; /* drop the entries invalidated by longjmp() */ chained = (orig_ret_vaddr == trampoline_vaddr);
@@ -1899,7 +1941,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) * attack from user-space. */ uprobe_warn(current, "handle tail call"); - goto fail; + return ri; } orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; }
@@ -1914,9 +1956,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) ri->next = utask->return_instances; utask->return_instances = ri; - return; - fail: - kfree(ri); + return NULL; } /* Prepare to single-step probed instruction out of line. */
@@ -2069,44 +2109,90 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; int remove = UPROBE_HANDLER_REMOVE; + struct session_consumer *sc = NULL; + struct return_instance *ri = NULL; bool need_prep = false; /* prepare return uprobe, when needed */ down_read(&uprobe->register_rwsem); - for (uc = uprobe->consumers; uc; uc = uc->next) { + if (uprobe->session_cnt) { + ri = alloc_return_instance(uprobe->session_cnt); + if (!ri) + goto out; + } + for (uc = uprobe->consumers, sc = &ri->sc[0]; uc; uc = uc->next) { int rc = 0; if (uc->handler) { - rc = uc->handler(uc, regs); + rc = uc->handler(uc, regs, uc->is_session ? &sc->cookie : NULL); WARN(rc & ~UPROBE_HANDLER_MASK, "bad rc=0x%x from %ps()\n", rc, uc->handler); } - if (uc->ret_handler) + if (uc->is_session) { + need_prep |= !rc; + remove = 0; + sc->id = uc->id; + sc->rc = rc; + sc++; + } else if (uc->ret_handler) { need_prep = true; + } remove &= rc; } if (need_prep && !remove) - prepare_uretprobe(uprobe, regs); /* put bp at return */ + ri = prepare_uretprobe(uprobe, regs, ri, uprobe->session_cnt); /* put bp at return */ + kfree(ri); if (remove && uprobe->consumers) { WARN_ON(!uprobe_is_active(uprobe)); unapply_uprobe(uprobe, current->mm); } + out: up_read(&uprobe->register_rwsem); } +static struct session_consumer * +consumer_find(struct session_consumer *sc, struct uprobe_consumer *uc) +{ + for (; sc && sc->id; sc++) { + if (sc->id == uc->id) + return sc; + } + return NULL; +} + static void handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) { struct uprobe *uprobe = ri->uprobe; + struct session_consumer *sc, *tmp; struct uprobe_consumer *uc; down_read(&uprobe->register_rwsem); - for (uc = uprobe->consumers; uc; uc = uc->next) { - if (uc->ret_handler) - uc->ret_handler(uc, ri->func, regs); + for (uc = uprobe->consumers, sc = &ri->sc[0]; uc; uc = uc->next) { + long *cookie = NULL; + int rc = 0; + + if (uc->is_session) { + /* + * session_consumers are in order with uprobe_consumers, + * we just need to reflect that any uprobe_consumer could + * be removed or added + */ + tmp = consumer_find(sc, uc); + if (tmp) { + rc = tmp->rc; + cookie = &tmp->cookie; + sc = tmp + 1; + } else { + rc = 1; + } + } + + if (!rc && uc->ret_handler) + uc->ret_handler(uc, ri->func, regs, cookie); } up_read(&uprobe->register_rwsem); }
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f5154c051d2c..ae7c35379e4a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c@@ -3329,7 +3329,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx } static int -uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs, + unsigned long *data) { struct bpf_uprobe *uprobe;
@@ -3338,7 +3339,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) } static int -uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs, + unsigned long *data) { struct bpf_uprobe *uprobe;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8541fa1494ae..f7b17f08344c 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c@@ -88,9 +88,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev) static int register_uprobe_event(struct trace_uprobe *tu); static int unregister_uprobe_event(struct trace_uprobe *tu); -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs, + unsigned long *data); static int uretprobe_dispatcher(struct uprobe_consumer *con, - unsigned long func, struct pt_regs *regs); + unsigned long func, struct pt_regs *regs, + unsigned long *data); #ifdef CONFIG_STACK_GROWSUP static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
@@ -1500,7 +1502,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, } } -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs, + unsigned long *data) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd;
@@ -1530,7 +1533,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) } static int uretprobe_dispatcher(struct uprobe_consumer *con, - unsigned long func, struct pt_regs *regs) + unsigned long func, struct pt_regs *regs, + unsigned long *data) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd;