Re: [PATCH v10 06/14] unwind_user/deferred: Add deferred unwinding interface
From: Peter Zijlstra <peterz@infradead.org>
Date: 2025-06-18 18:46:31
Also in:
bpf, lkml
+struct unwind_work;
+
+typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 timestamp);
+
+struct unwind_work {
+ struct list_head list;Does this really need to be a list? Single linked list like callback_head not good enough?
quoted hunk ↗ jump to hunk
+ unwind_callback_t func; +}; + #ifdef CONFIG_UNWIND_USER void unwind_task_init(struct task_struct *task);@@ -12,10 +22,15 @@ void unwind_task_free(struct task_struct *task); int unwind_deferred_trace(struct unwind_stacktrace *trace); +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); +int unwind_deferred_request(struct unwind_work *work, u64 *timestamp); +void unwind_deferred_cancel(struct unwind_work *work); + static __always_inline void unwind_exit_to_user_mode(void) { if (unlikely(current->unwind_info.cache)) current->unwind_info.cache->nr_entries = 0; + current->unwind_info.timestamp = 0;
Surely clearing that timestamp is only relevant when there is a cache around? Better to not add this unconditional write to the exit path.
quoted hunk ↗ jump to hunk
} #else /* !CONFIG_UNWIND_USER */@@ -24,6 +39,9 @@ static inline void unwind_task_init(struct task_struct *task) {} static inline void unwind_task_free(struct task_struct *task) {} static inline int unwind_deferred_trace(struct unwind_stacktrace *trace) { return -ENOSYS; } +static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; } +static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } +static inline void unwind_deferred_cancel(struct unwind_work *work) {} static inline void unwind_exit_to_user_mode(void) {}diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h index db5b54b18828..5df264cf81ad 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h@@ -9,6 +9,9 @@ struct unwind_cache { struct unwind_task_info { struct unwind_cache *cache; + struct callback_head work; + u64 timestamp; + int pending; }; #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index e3913781c8c6..b76c704ddc6d 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c@@ -2,13 +2,35 @@ /* * Deferred user space unwinding */ +#include <linux/sched/task_stack.h> +#include <linux/unwind_deferred.h> +#include <linux/sched/clock.h> +#include <linux/task_work.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/slab.h> -#include <linux/unwind_deferred.h> +#include <linux/mm.h> #define UNWIND_MAX_ENTRIES 512 +/* Guards adding to and reading the list of callbacks */ +static DEFINE_MUTEX(callback_mutex); +static LIST_HEAD(callbacks);
Global state.. smells like failure.
quoted hunk ↗ jump to hunk
+/* + * Read the task context timestamp, if this is the first caller then + * it will set the timestamp. + */ +static u64 get_timestamp(struct unwind_task_info *info) +{ + lockdep_assert_irqs_disabled(); + + if (!info->timestamp) + info->timestamp = local_clock(); + + return info->timestamp; +} + /** * unwind_deferred_trace - Produce a user stacktrace in faultable context * @trace: The descriptor that will store the user stacktrace@@ -59,11 +81,117 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace) return 0; } +static void unwind_deferred_task_work(struct callback_head *head) +{ + struct unwind_task_info *info = container_of(head, struct unwind_task_info, work); + struct unwind_stacktrace trace; + struct unwind_work *work; + u64 timestamp; + + if (WARN_ON_ONCE(!info->pending)) + return; + + /* Allow work to come in again */ + WRITE_ONCE(info->pending, 0); + + /* + * From here on out, the callback must always be called, even if it's + * just an empty trace. + */ + trace.nr = 0; + trace.entries = NULL; + + unwind_deferred_trace(&trace); + + timestamp = info->timestamp; + + guard(mutex)(&callback_mutex); + list_for_each_entry(work, &callbacks, list) { + work->func(work, &trace, timestamp); + }
So now you're globally serializing all return-to-user instances. How is that not a problem?
+}