Inter-revision diff: patch 7

Comparing v2 (message) to v4 (message)

--- v2
+++ v4
@@ -1,289 +1,118 @@
-For live patching and possibly other use cases, a stack trace is only
-useful if it can be assured that it's completely reliable.  Add a new
-save_stack_trace_tsk_reliable() function to achieve that.
+From: Miroslav Benes <mbenes@suse.cz>
 
-Scenarios which indicate that a stack trace may be unreliable:
+Update a task's patch state when returning from a system call or user
+space interrupt, or after handling a signal.
 
-- running tasks
-- interrupt stacks
-- preemption
-- corrupted stack data
-- the stack grows the wrong way
-- the stack walk doesn't reach the bottom
-- the user didn't provide a large enough entries array
+This greatly increases the chances of a patch operation succeeding.  If
+a task is I/O bound, it can be patched when returning from a system
+call.  If a task is CPU bound, it can be patched when returning from an
+interrupt.  If a task is sleeping on a to-be-patched function, the user
+can send SIGSTOP and SIGCONT to force it to switch.
 
-Also add a config option so arch-independent code can determine at build
-time whether the function is implemented.
+Since there are two ways the syscall can be restarted on return from a
+signal handling process, it is important to clear the flag before
+do_signal() is called. Otherwise we could miss the migration if we used
+SIGSTOP/SIGCONT procedure or fake signal to migrate patching blocking
+tasks. If we place our hook to sysc_work label in entry before
+TIF_SIGPENDING is evaluated we kill two birds with one stone. The task
+is correctly migrated in all return paths from a syscall.
 
+Signed-off-by: Miroslav Benes <mbenes@suse.cz>
 Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
 ---
- arch/Kconfig                 |  6 ++++
- arch/x86/Kconfig             |  1 +
- arch/x86/kernel/dumpstack.c  | 77 ++++++++++++++++++++++++++++++++++++++++++++
- arch/x86/kernel/stacktrace.c | 24 ++++++++++++++
- include/linux/kernel.h       |  1 +
- include/linux/stacktrace.h   | 20 +++++++++---
- kernel/extable.c             |  2 +-
- kernel/stacktrace.c          |  4 +--
- lib/Kconfig.debug            |  6 ++++
- 9 files changed, 134 insertions(+), 7 deletions(-)
+ arch/s390/include/asm/thread_info.h |  2 ++
+ arch/s390/kernel/entry.S            | 31 ++++++++++++++++++++++++++++++-
+ 2 files changed, 32 insertions(+), 1 deletion(-)
 
-diff --git a/arch/Kconfig b/arch/Kconfig
-index 8f84fd2..ec4d480 100644
---- a/arch/Kconfig
-+++ b/arch/Kconfig
-@@ -598,6 +598,12 @@ config HAVE_STACK_VALIDATION
- 	  Architecture supports the 'objtool check' host tool command, which
- 	  performs compile-time stack metadata validation.
+diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
+index 4977668..646845e 100644
+--- a/arch/s390/include/asm/thread_info.h
++++ b/arch/s390/include/asm/thread_info.h
+@@ -56,6 +56,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+ #define TIF_SIGPENDING		1	/* signal pending */
+ #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+ #define TIF_UPROBE		3	/* breakpointed or single-stepping */
++#define TIF_PATCH_PENDING	4	/* pending live patching update */
  
-+config HAVE_RELIABLE_STACKTRACE
-+	bool
-+	help
-+	  Architecture has a save_stack_trace_tsk_reliable() function which
-+	  only returns a stack trace if it can guarantee the trace is reliable.
-+
- #
- # ABI hall of shame
- #
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 0b128b4..78c4e00 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -140,6 +140,7 @@ config X86
- 	select HAVE_PERF_REGS
- 	select HAVE_PERF_USER_STACK_DUMP
- 	select HAVE_REGS_AND_STACK_ACCESS_API
-+	select HAVE_RELIABLE_STACKTRACE		if X86_64 && FRAME_POINTER
- 	select HAVE_SYSCALL_TRACEPOINTS
- 	select HAVE_UID16			if X86_32 || IA32_EMULATION
- 	select HAVE_UNSTABLE_SCHED_CLOCK
-diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 13d240c..70d0013 100644
---- a/arch/x86/kernel/dumpstack.c
-+++ b/arch/x86/kernel/dumpstack.c
-@@ -145,6 +145,83 @@ int print_context_stack_bp(struct thread_info *tinfo,
- }
- EXPORT_SYMBOL_GPL(print_context_stack_bp);
+ #define TIF_31BIT		16	/* 32bit process */
+ #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+@@ -74,6 +75,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+ #define _TIF_SIGPENDING		_BITUL(TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED	_BITUL(TIF_NEED_RESCHED)
+ #define _TIF_UPROBE		_BITUL(TIF_UPROBE)
++#define _TIF_PATCH_PENDING	_BITUL(TIF_PATCH_PENDING)
  
-+#ifdef CONFIG_RELIABLE_STACKTRACE
-+/*
-+ * Only succeeds if the stack trace is deemed reliable.  This relies on the
-+ * fact that frame pointers are reliable thanks to CONFIG_STACK_VALIDATION.
-+ *
-+ * The caller must ensure that the task is either sleeping or is the current
-+ * task.
-+ */
-+int print_context_stack_reliable(struct thread_info *tinfo,
-+				 unsigned long *stack, unsigned long *bp,
-+				 const struct stacktrace_ops *ops,
-+				 void *data, unsigned long *end, int *graph)
-+{
-+	struct stack_frame *frame = (struct stack_frame *)*bp;
-+	struct stack_frame *last_frame = NULL;
-+	unsigned long *ret_addr = &frame->return_address;
-+
-+	/*
-+	 * If the kernel was preempted by an IRQ, we can't trust the stack
-+	 * because the preempted function might not have gotten the chance to
-+	 * save the frame pointer on the stack before it was interrupted.
-+	 */
-+	if (tinfo->task->flags & PF_PREEMPT_IRQ)
-+		return -EINVAL;
-+
-+	/*
-+	 * A freshly forked task has an empty stack trace.  We can consider
-+	 * that to be reliable.
-+	 */
-+	if (test_ti_thread_flag(tinfo, TIF_FORK))
-+		return 0;
-+
-+	while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
-+		unsigned long addr = *ret_addr;
-+
-+		/*
-+		 * Make sure the stack only grows down.
-+		 */
-+		if (frame <= last_frame)
-+			return -EINVAL;
-+
-+		/*
-+		 * Make sure the frame refers to a valid kernel function.
-+		 */
-+		if (!core_kernel_text(addr) && !init_kernel_text(addr) &&
-+		    !is_module_text_address(addr))
-+			return -EINVAL;
-+
-+		/*
-+		 * Save the kernel text address and make sure the entries array
-+		 * isn't full.
-+		 */
-+		if (ops->address(data, addr, 1))
-+			return -EINVAL;
-+
-+		/*
-+		 * If the function graph tracer is in effect, save the real
-+		 * function address.
-+		 */
-+		print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
-+
-+		last_frame = frame;
-+		frame = frame->next_frame;
-+		ret_addr = &frame->return_address;
-+	}
-+
-+	/*
-+	 * Make sure we reached the bottom of the stack.
-+	 */
-+	if (last_frame + 1 != (void *)task_pt_regs(tinfo->task))
-+		return -EINVAL;
-+
-+	*bp = (unsigned long)frame;
-+	return 0;
-+}
-+#endif /* CONFIG_RELIABLE_STACKTRACE */
-+
- static int print_trace_stack(void *data, char *name)
- {
- 	printk("%s <%s> ", (char *)data, name);
-diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
-index 9ee98ee..10882e4 100644
---- a/arch/x86/kernel/stacktrace.c
-+++ b/arch/x86/kernel/stacktrace.c
-@@ -148,3 +148,27 @@ void save_stack_trace_user(struct stack_trace *trace)
- 		trace->entries[trace->nr_entries++] = ULONG_MAX;
- }
+ #define _TIF_31BIT		_BITUL(TIF_31BIT)
+ #define _TIF_SINGLE_STEP	_BITUL(TIF_SINGLE_STEP)
+diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
+index 97298c5..2084848 100644
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -47,7 +47,7 @@ STACK_SIZE  = 1 << STACK_SHIFT
+ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
  
-+#ifdef CONFIG_RELIABLE_STACKTRACE
-+
-+static int save_stack_stack_reliable(void *data, char *name)
-+{
-+	return -EINVAL;
-+}
-+
-+static const struct stacktrace_ops save_stack_ops_reliable = {
-+	.stack		= save_stack_stack_reliable,
-+	.address	= save_stack_address,
-+	.walk_stack	= print_context_stack_reliable,
-+};
-+
-+/*
-+ * Returns 0 if the stack trace is deemed reliable.  The caller must ensure
-+ * that the task is either sleeping or is the current task.
-+ */
-+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-+				  struct stack_trace *trace)
-+{
-+	return dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_reliable, trace);
-+}
-+
-+#endif /* CONFIG_RELIABLE_STACKTRACE */
-diff --git a/include/linux/kernel.h b/include/linux/kernel.h
-index cc73982..6be1e82 100644
---- a/include/linux/kernel.h
-+++ b/include/linux/kernel.h
-@@ -429,6 +429,7 @@ extern char *get_options(const char *str, int nints, int *ints);
- extern unsigned long long memparse(const char *ptr, char **retptr);
- extern bool parse_option_str(const char *str, const char *option);
- 
-+extern int init_kernel_text(unsigned long addr);
- extern int core_kernel_text(unsigned long addr);
- extern int core_kernel_data(unsigned long addr);
- extern int __kernel_text_address(unsigned long addr);
-diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
-index 0a34489..527e4cc 100644
---- a/include/linux/stacktrace.h
-+++ b/include/linux/stacktrace.h
-@@ -2,17 +2,18 @@
- #define __LINUX_STACKTRACE_H
- 
- #include <linux/types.h>
-+#include <linux/errno.h>
- 
- struct task_struct;
- struct pt_regs;
- 
--#ifdef CONFIG_STACKTRACE
- struct stack_trace {
- 	unsigned int nr_entries, max_entries;
- 	unsigned long *entries;
- 	int skip;	/* input argument: How many entries to skip */
- };
- 
-+#ifdef CONFIG_STACKTRACE
- extern void save_stack_trace(struct stack_trace *trace);
- extern void save_stack_trace_regs(struct pt_regs *regs,
- 				  struct stack_trace *trace);
-@@ -29,12 +30,23 @@ extern void save_stack_trace_user(struct stack_trace *trace);
- # define save_stack_trace_user(trace)              do { } while (0)
+ _TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+-		   _TIF_UPROBE)
++		   _TIF_UPROBE | _TIF_PATCH_PENDING)
+ _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ 		   _TIF_SYSCALL_TRACEPOINT)
+ _CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
+@@ -352,6 +352,11 @@ ENTRY(system_call)
+ #endif
+ 	TSTMSK	__PT_FLAGS(%r11),_PIF_PER_TRAP
+ 	jo	.Lsysc_singlestep
++#ifdef CONFIG_LIVEPATCH
++	TSTMSK	__TI_flags(%r12),_TIF_PATCH_PENDING
++	jo	.Lsysc_patch_pending	# handle live patching just before
++					# signals and possible syscall restart
++#endif
+ 	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
+ 	jo	.Lsysc_sigpending
+ 	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
+@@ -426,6 +431,16 @@ ENTRY(system_call)
  #endif
  
--#else
-+#else /* !CONFIG_STACKTRACE */
- # define save_stack_trace(trace)			do { } while (0)
- # define save_stack_trace_tsk(tsk, trace)		do { } while (0)
- # define save_stack_trace_user(trace)			do { } while (0)
- # define print_stack_trace(trace, spaces)		do { } while (0)
- # define snprint_stack_trace(buf, size, trace, spaces)	do { } while (0)
--#endif
-+#endif /* CONFIG_STACKTRACE */
+ #
++# _TIF_PATCH_PENDING is set, call klp_update_patch_state
++#
++#ifdef CONFIG_LIVEPATCH
++.Lsysc_patch_pending:
++	lg	%r2,__LC_CURRENT	# pass pointer to task struct
++	larl	%r14,.Lsysc_return
++	jg	klp_update_patch_state
++#endif
++
++#
+ # _PIF_PER_TRAP is set, call do_per_trap
+ #
+ .Lsysc_singlestep:
+@@ -674,6 +689,10 @@ ENTRY(io_int_handler)
+ 	jo	.Lio_mcck_pending
+ 	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
+ 	jo	.Lio_reschedule
++#ifdef CONFIG_LIVEPATCH
++	TSTMSK	__TI_flags(%r12),_TIF_PATCH_PENDING
++	jo	.Lio_patch_pending
++#endif
+ 	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
+ 	jo	.Lio_sigpending
+ 	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
+@@ -720,6 +739,16 @@ ENTRY(io_int_handler)
+ 	j	.Lio_return
  
--#endif
-+#ifdef CONFIG_RELIABLE_STACKTRACE
-+extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-+					 struct stack_trace *trace);
-+#else
-+static inline int save_stack_trace_tsk_reliable(struct task_struct *tsk,
-+						struct stack_trace *trace)
-+{
-+	return -ENOSYS;
-+}
-+#endif /* CONFIG_RELIABLE_STACKTRACE */
+ #
++# _TIF_PATCH_PENDING is set, call klp_update_patch_state
++#
++#ifdef CONFIG_LIVEPATCH
++.Lio_patch_pending:
++	lg	%r2,__LC_CURRENT	# pass pointer to task struct
++	larl	%r14,.Lio_return
++	jg	klp_update_patch_state
++#endif
 +
-+#endif /* __LINUX_STACKTRACE_H */
-diff --git a/kernel/extable.c b/kernel/extable.c
-index e820cce..c085844 100644
---- a/kernel/extable.c
-+++ b/kernel/extable.c
-@@ -58,7 +58,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
- 	return e;
- }
- 
--static inline int init_kernel_text(unsigned long addr)
-+int init_kernel_text(unsigned long addr)
- {
- 	if (addr >= (unsigned long)_sinittext &&
- 	    addr < (unsigned long)_einittext)
-diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
-index b6e4c16..f35bc5d 100644
---- a/kernel/stacktrace.c
-+++ b/kernel/stacktrace.c
-@@ -58,8 +58,8 @@ int snprint_stack_trace(char *buf, size_t size,
- EXPORT_SYMBOL_GPL(snprint_stack_trace);
- 
- /*
-- * Architectures that do not implement save_stack_trace_tsk or
-- * save_stack_trace_regs get this weak alias and a once-per-bootup warning
-+ * Architectures that do not implement save_stack_trace_*()
-+ * get this weak alias and a once-per-bootup warning
-  * (whenever this facility is utilized - for example by procfs):
-  */
- __weak void
-diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 5d57177..189a2d7 100644
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -1164,6 +1164,12 @@ config STACKTRACE
- 	  It is also used by various kernel debugging features that require
- 	  stack trace generation.
- 
-+config RELIABLE_STACKTRACE
-+	def_bool y
-+	depends on HAVE_RELIABLE_STACKTRACE
-+	depends on STACKTRACE
-+	depends on STACK_VALIDATION
-+
- config DEBUG_KOBJECT
- 	bool "kobject debugging"
- 	depends on DEBUG_KERNEL
++#
+ # _TIF_SIGPENDING or is set, call do_signal
+ #
+ .Lio_sigpending:
 -- 
-2.4.11
+2.7.4
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help