--- v2
+++ v4
@@ -1,289 +1,118 @@
-For live patching and possibly other use cases, a stack trace is only
-useful if it can be assured that it's completely reliable. Add a new
-save_stack_trace_tsk_reliable() function to achieve that.
+From: Miroslav Benes <mbenes@suse.cz>
-Scenarios which indicate that a stack trace may be unreliable:
+Update a task's patch state when returning from a system call or user
+space interrupt, or after handling a signal.
-- running tasks
-- interrupt stacks
-- preemption
-- corrupted stack data
-- the stack grows the wrong way
-- the stack walk doesn't reach the bottom
-- the user didn't provide a large enough entries array
+This greatly increases the chances of a patch operation succeeding. If
+a task is I/O bound, it can be patched when returning from a system
+call. If a task is CPU bound, it can be patched when returning from an
+interrupt. If a task is sleeping on a to-be-patched function, the user
+can send SIGSTOP and SIGCONT to force it to switch.
-Also add a config option so arch-independent code can determine at build
-time whether the function is implemented.
+Since there are two ways the syscall can be restarted on return from a
+signal handling process, it is important to clear the flag before
+do_signal() is called. Otherwise we could miss the migration if we used
+SIGSTOP/SIGCONT procedure or fake signal to migrate patching blocking
+tasks. If we place our hook to sysc_work label in entry before
+TIF_SIGPENDING is evaluated we kill two birds with one stone. The task
+is correctly migrated in all return paths from a syscall.
+Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
- arch/Kconfig | 6 ++++
- arch/x86/Kconfig | 1 +
- arch/x86/kernel/dumpstack.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
- arch/x86/kernel/stacktrace.c | 24 ++++++++++++++
- include/linux/kernel.h | 1 +
- include/linux/stacktrace.h | 20 +++++++++---
- kernel/extable.c | 2 +-
- kernel/stacktrace.c | 4 +--
- lib/Kconfig.debug | 6 ++++
- 9 files changed, 134 insertions(+), 7 deletions(-)
+ arch/s390/include/asm/thread_info.h | 2 ++
+ arch/s390/kernel/entry.S | 31 ++++++++++++++++++++++++++++++-
+ 2 files changed, 32 insertions(+), 1 deletion(-)
-diff --git a/arch/Kconfig b/arch/Kconfig
-index 8f84fd2..ec4d480 100644
---- a/arch/Kconfig
-+++ b/arch/Kconfig
-@@ -598,6 +598,12 @@ config HAVE_STACK_VALIDATION
- Architecture supports the 'objtool check' host tool command, which
- performs compile-time stack metadata validation.
+diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
+index 4977668..646845e 100644
+--- a/arch/s390/include/asm/thread_info.h
++++ b/arch/s390/include/asm/thread_info.h
+@@ -56,6 +56,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+ #define TIF_SIGPENDING 1 /* signal pending */
+ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
+ #define TIF_UPROBE 3 /* breakpointed or single-stepping */
++#define TIF_PATCH_PENDING 4 /* pending live patching update */
-+config HAVE_RELIABLE_STACKTRACE
-+ bool
-+ help
-+ Architecture has a save_stack_trace_tsk_reliable() function which
-+ only returns a stack trace if it can guarantee the trace is reliable.
-+
- #
- # ABI hall of shame
- #
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 0b128b4..78c4e00 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -140,6 +140,7 @@ config X86
- select HAVE_PERF_REGS
- select HAVE_PERF_USER_STACK_DUMP
- select HAVE_REGS_AND_STACK_ACCESS_API
-+ select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER
- select HAVE_SYSCALL_TRACEPOINTS
- select HAVE_UID16 if X86_32 || IA32_EMULATION
- select HAVE_UNSTABLE_SCHED_CLOCK
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 13d240c..70d0013 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -145,6 +145,83 @@ int print_context_stack_bp(struct thread_info *tinfo,
- }
- EXPORT_SYMBOL_GPL(print_context_stack_bp);
+ #define TIF_31BIT 16 /* 32bit process */
+ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */
+@@ -74,6 +75,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+ #define _TIF_SIGPENDING _BITUL(TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED _BITUL(TIF_NEED_RESCHED)
+ #define _TIF_UPROBE _BITUL(TIF_UPROBE)
++#define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING)
-+#ifdef CONFIG_RELIABLE_STACKTRACE
-+/*
-+ * Only succeeds if the stack trace is deemed reliable. This relies on the
-+ * fact that frame pointers are reliable thanks to CONFIG_STACK_VALIDATION.
-+ *
-+ * The caller must ensure that the task is either sleeping or is the current
-+ * task.
-+ */
-+int print_context_stack_reliable(struct thread_info *tinfo,
-+ unsigned long *stack, unsigned long *bp,
-+ const struct stacktrace_ops *ops,
-+ void *data, unsigned long *end, int *graph)
-+{
-+ struct stack_frame *frame = (struct stack_frame *)*bp;
-+ struct stack_frame *last_frame = NULL;
-+ unsigned long *ret_addr = &frame->return_address;
-+
-+ /*
-+ * If the kernel was preempted by an IRQ, we can't trust the stack
-+ * because the preempted function might not have gotten the chance to
-+ * save the frame pointer on the stack before it was interrupted.
-+ */
-+ if (tinfo->task->flags & PF_PREEMPT_IRQ)
-+ return -EINVAL;
-+
-+ /*
-+ * A freshly forked task has an empty stack trace. We can consider
-+ * that to be reliable.
-+ */
-+ if (test_ti_thread_flag(tinfo, TIF_FORK))
-+ return 0;
-+
-+ while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
-+ unsigned long addr = *ret_addr;
-+
-+ /*
-+ * Make sure the stack only grows down.
-+ */
-+ if (frame <= last_frame)
-+ return -EINVAL;
-+
-+ /*
-+ * Make sure the frame refers to a valid kernel function.
-+ */
-+ if (!core_kernel_text(addr) && !init_kernel_text(addr) &&
-+ !is_module_text_address(addr))
-+ return -EINVAL;
-+
-+ /*
-+ * Save the kernel text address and make sure the entries array
-+ * isn't full.
-+ */
-+ if (ops->address(data, addr, 1))
-+ return -EINVAL;
-+
-+ /*
-+ * If the function graph tracer is in effect, save the real
-+ * function address.
-+ */
-+ print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
-+
-+ last_frame = frame;
-+ frame = frame->next_frame;
-+ ret_addr = &frame->return_address;
-+ }
-+
-+ /*
-+ * Make sure we reached the bottom of the stack.
-+ */
-+ if (last_frame + 1 != (void *)task_pt_regs(tinfo->task))
-+ return -EINVAL;
-+
-+ *bp = (unsigned long)frame;
-+ return 0;
-+}
-+#endif /* CONFIG_RELIABLE_STACKTRACE */
-+
- static int print_trace_stack(void *data, char *name)
- {
- printk("%s <%s> ", (char *)data, name);
-diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
-index 9ee98ee..10882e4 100644
---- a/arch/x86/kernel/stacktrace.c
-+++ b/arch/x86/kernel/stacktrace.c
-@@ -148,3 +148,27 @@ void save_stack_trace_user(struct stack_trace *trace)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
- }
+ #define _TIF_31BIT _BITUL(TIF_31BIT)
+ #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
+diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
+index 97298c5..2084848 100644
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT
+ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-+#ifdef CONFIG_RELIABLE_STACKTRACE
-+
-+static int save_stack_stack_reliable(void *data, char *name)
-+{
-+ return -EINVAL;
-+}
-+
-+static const struct stacktrace_ops save_stack_ops_reliable = {
-+ .stack = save_stack_stack_reliable,
-+ .address = save_stack_address,
-+ .walk_stack = print_context_stack_reliable,
-+};
-+
-+/*
-+ * Returns 0 if the stack trace is deemed reliable. The caller must ensure
-+ * that the task is either sleeping or is the current task.
-+ */
-+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-+ struct stack_trace *trace)
-+{
-+ return dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_reliable, trace);
-+}
-+
-+#endif /* CONFIG_RELIABLE_STACKTRACE */
-diff --git a/include/linux/kernel.h b/include/linux/kernel.h
-index cc73982..6be1e82 100644
---- a/include/linux/kernel.h
-+++ b/include/linux/kernel.h
-@@ -429,6 +429,7 @@ extern char *get_options(const char *str, int nints, int *ints);
- extern unsigned long long memparse(const char *ptr, char **retptr);
- extern bool parse_option_str(const char *str, const char *option);
-
-+extern int init_kernel_text(unsigned long addr);
- extern int core_kernel_text(unsigned long addr);
- extern int core_kernel_data(unsigned long addr);
- extern int __kernel_text_address(unsigned long addr);
-diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
-index 0a34489..527e4cc 100644
---- a/include/linux/stacktrace.h
-+++ b/include/linux/stacktrace.h
-@@ -2,17 +2,18 @@
- #define __LINUX_STACKTRACE_H
-
- #include <linux/types.h>
-+#include <linux/errno.h>
-
- struct task_struct;
- struct pt_regs;
-
--#ifdef CONFIG_STACKTRACE
- struct stack_trace {
- unsigned int nr_entries, max_entries;
- unsigned long *entries;
- int skip; /* input argument: How many entries to skip */
- };
-
-+#ifdef CONFIG_STACKTRACE
- extern void save_stack_trace(struct stack_trace *trace);
- extern void save_stack_trace_regs(struct pt_regs *regs,
- struct stack_trace *trace);
-@@ -29,12 +30,23 @@ extern void save_stack_trace_user(struct stack_trace *trace);
- # define save_stack_trace_user(trace) do { } while (0)
+ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+- _TIF_UPROBE)
++ _TIF_UPROBE | _TIF_PATCH_PENDING)
+ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+ _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
+@@ -352,6 +352,11 @@ ENTRY(system_call)
+ #endif
+ TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
+ jo .Lsysc_singlestep
++#ifdef CONFIG_LIVEPATCH
++ TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING
++ jo .Lsysc_patch_pending # handle live patching just before
++ # signals and possible syscall restart
++#endif
+ TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
+ jo .Lsysc_sigpending
+ TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
+@@ -426,6 +431,16 @@ ENTRY(system_call)
#endif
--#else
-+#else /* !CONFIG_STACKTRACE */
- # define save_stack_trace(trace) do { } while (0)
- # define save_stack_trace_tsk(tsk, trace) do { } while (0)
- # define save_stack_trace_user(trace) do { } while (0)
- # define print_stack_trace(trace, spaces) do { } while (0)
- # define snprint_stack_trace(buf, size, trace, spaces) do { } while (0)
--#endif
-+#endif /* CONFIG_STACKTRACE */
+ #
++# _TIF_PATCH_PENDING is set, call klp_update_patch_state
++#
++#ifdef CONFIG_LIVEPATCH
++.Lsysc_patch_pending:
++ lg %r2,__LC_CURRENT # pass pointer to task struct
++ larl %r14,.Lsysc_return
++ jg klp_update_patch_state
++#endif
++
++#
+ # _PIF_PER_TRAP is set, call do_per_trap
+ #
+ .Lsysc_singlestep:
+@@ -674,6 +689,10 @@ ENTRY(io_int_handler)
+ jo .Lio_mcck_pending
+ TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
+ jo .Lio_reschedule
++#ifdef CONFIG_LIVEPATCH
++ TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING
++ jo .Lio_patch_pending
++#endif
+ TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
+ jo .Lio_sigpending
+ TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
+@@ -720,6 +739,16 @@ ENTRY(io_int_handler)
+ j .Lio_return
--#endif
-+#ifdef CONFIG_RELIABLE_STACKTRACE
-+extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-+ struct stack_trace *trace);
-+#else
-+static inline int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-+ struct stack_trace *trace)
-+{
-+ return -ENOSYS;
-+}
-+#endif /* CONFIG_RELIABLE_STACKTRACE */
+ #
++# _TIF_PATCH_PENDING is set, call klp_update_patch_state
++#
++#ifdef CONFIG_LIVEPATCH
++.Lio_patch_pending:
++ lg %r2,__LC_CURRENT # pass pointer to task struct
++ larl %r14,.Lio_return
++ jg klp_update_patch_state
++#endif
+
-+#endif /* __LINUX_STACKTRACE_H */
-diff --git a/kernel/extable.c b/kernel/extable.c
-index e820cce..c085844 100644
---- a/kernel/extable.c
-+++ b/kernel/extable.c
-@@ -58,7 +58,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
- return e;
- }
-
--static inline int init_kernel_text(unsigned long addr)
-+int init_kernel_text(unsigned long addr)
- {
- if (addr >= (unsigned long)_sinittext &&
- addr < (unsigned long)_einittext)
-diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
-index b6e4c16..f35bc5d 100644
---- a/kernel/stacktrace.c
-+++ b/kernel/stacktrace.c
-@@ -58,8 +58,8 @@ int snprint_stack_trace(char *buf, size_t size,
- EXPORT_SYMBOL_GPL(snprint_stack_trace);
-
- /*
-- * Architectures that do not implement save_stack_trace_tsk or
-- * save_stack_trace_regs get this weak alias and a once-per-bootup warning
-+ * Architectures that do not implement save_stack_trace_*()
-+ * get this weak alias and a once-per-bootup warning
- * (whenever this facility is utilized - for example by procfs):
- */
- __weak void
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 5d57177..189a2d7 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -1164,6 +1164,12 @@ config STACKTRACE
- It is also used by various kernel debugging features that require
- stack trace generation.
-
-+config RELIABLE_STACKTRACE
-+ def_bool y
-+ depends on HAVE_RELIABLE_STACKTRACE
-+ depends on STACKTRACE
-+ depends on STACK_VALIDATION
-+
- config DEBUG_KOBJECT
- bool "kobject debugging"
- depends on DEBUG_KERNEL
++#
+ # _TIF_SIGPENDING or is set, call do_signal
+ #
+ .Lio_sigpending:
--
-2.4.11
+2.7.4