Thread (79 messages) 79 messages, 6 authors, 2025-06-24

Re: [PATCH v10 06/14] unwind_user/deferred: Add deferred unwinding interface

From: Peter Zijlstra <peterz@infradead.org>
Date: 2025-06-18 18:46:31
Also in: bpf, lkml

+struct unwind_work;
+
+typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 timestamp);
+
+struct unwind_work {
+	struct list_head		list;
Does this really need to be a list? Single linked list like
callback_head not good enough?
quoted hunk ↗ jump to hunk
+	unwind_callback_t		func;
+};
+
 #ifdef CONFIG_UNWIND_USER
 
 void unwind_task_init(struct task_struct *task);
@@ -12,10 +22,15 @@ void unwind_task_free(struct task_struct *task);
 
 int unwind_deferred_trace(struct unwind_stacktrace *trace);
 
+int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func);
+int unwind_deferred_request(struct unwind_work *work, u64 *timestamp);
+void unwind_deferred_cancel(struct unwind_work *work);
+
 static __always_inline void unwind_exit_to_user_mode(void)
 {
 	if (unlikely(current->unwind_info.cache))
 		current->unwind_info.cache->nr_entries = 0;
+	current->unwind_info.timestamp = 0;
Surely clearing that timestamp is only relevant when there is a cache
around? Better to not add this unconditional write to the exit path.
quoted hunk ↗ jump to hunk
 }
 
 #else /* !CONFIG_UNWIND_USER */
@@ -24,6 +39,9 @@ static inline void unwind_task_init(struct task_struct *task) {}
 static inline void unwind_task_free(struct task_struct *task) {}
 
 static inline int unwind_deferred_trace(struct unwind_stacktrace *trace) { return -ENOSYS; }
+static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; }
+static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; }
+static inline void unwind_deferred_cancel(struct unwind_work *work) {}
 
 static inline void unwind_exit_to_user_mode(void) {}
 
diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
index db5b54b18828..5df264cf81ad 100644
--- a/include/linux/unwind_deferred_types.h
+++ b/include/linux/unwind_deferred_types.h
@@ -9,6 +9,9 @@ struct unwind_cache {
 
 struct unwind_task_info {
 	struct unwind_cache	*cache;
+	struct callback_head	work;
+	u64			timestamp;
+	int			pending;
 };
 
 #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index e3913781c8c6..b76c704ddc6d 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -2,13 +2,35 @@
 /*
  * Deferred user space unwinding
  */
+#include <linux/sched/task_stack.h>
+#include <linux/unwind_deferred.h>
+#include <linux/sched/clock.h>
+#include <linux/task_work.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/unwind_deferred.h>
+#include <linux/mm.h>
 
 #define UNWIND_MAX_ENTRIES 512
 
+/* Guards adding to and reading the list of callbacks */
+static DEFINE_MUTEX(callback_mutex);
+static LIST_HEAD(callbacks);
Global state.. smells like failure.
quoted hunk ↗ jump to hunk
+/*
+ * Read the task context timestamp, if this is the first caller then
+ * it will set the timestamp.
+ */
+static u64 get_timestamp(struct unwind_task_info *info)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (!info->timestamp)
+		info->timestamp = local_clock();
+
+	return info->timestamp;
+}
+
 /**
  * unwind_deferred_trace - Produce a user stacktrace in faultable context
  * @trace: The descriptor that will store the user stacktrace
@@ -59,11 +81,117 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace)
 	return 0;
 }
 
+static void unwind_deferred_task_work(struct callback_head *head)
+{
+	struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
+	struct unwind_stacktrace trace;
+	struct unwind_work *work;
+	u64 timestamp;
+
+	if (WARN_ON_ONCE(!info->pending))
+		return;
+
+	/* Allow work to come in again */
+	WRITE_ONCE(info->pending, 0);
+
+	/*
+	 * From here on out, the callback must always be called, even if it's
+	 * just an empty trace.
+	 */
+	trace.nr = 0;
+	trace.entries = NULL;
+
+	unwind_deferred_trace(&trace);
+
+	timestamp = info->timestamp;
+
+	guard(mutex)(&callback_mutex);
+	list_for_each_entry(work, &callbacks, list) {
+		work->func(work, &trace, timestamp);
+	}
So now you're globally serializing all return-to-user instances. How is
that not a problem?
+}
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help