Re: [PATCH] s390/livepatch: Implement reliable stack tracing for the consistency model
From: Josh Poimboeuf <hidden>
Date: 2019-07-28 20:45:08
Also in:
linux-s390, lkml
On Wed, Jul 10, 2019 at 12:59:18PM +0200, Miroslav Benes wrote:
The livepatch consistency model requires reliable stack tracing architecture support in order to work properly. In order to achieve this, two main issues have to be solved. First, reliable and consistent call chain backtracing has to be ensured. Second, the unwinder needs to be able to detect stack corruptions and return errors. The "zSeries ELF Application Binary Interface Supplement" says: "The stack pointer points to the first word of the lowest allocated stack frame. If the "back chain" is implemented this word will point to the previously allocated stack frame (towards higher addresses), except for the first stack frame, which shall have a back chain of zero (NULL). The stack shall grow downwards, in other words towards lower addresses." "back chain" is optional. GCC option -mbackchain enables it. Quoting Martin Schwidefsky [1]:
This reference footnote seems to be missing at the bottom of the patch description.
quoted hunk ↗ jump to hunk
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index d827b5b9a32c..1cc96c54169c 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h@@ -45,6 +45,25 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, bool unwind_next_frame(struct unwind_state *state); unsigned long unwind_get_return_address(struct unwind_state *state); +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE +void __unwind_start_reliable(struct unwind_state *state, + struct task_struct *task, unsigned long sp); +bool unwind_next_frame_reliable(struct unwind_state *state); + +static inline void unwind_start_reliable(struct unwind_state *state, + struct task_struct *task) +{ + unsigned long sp; + + if (task == current) + sp = current_stack_pointer(); + else + sp = task->thread.ksp; + + __unwind_start_reliable(state, task, sp); +} +#endif +
(Ah, cool, I didn't realize s390 ported the x86 unwind interfaces. We should look at unifying them someday.) Why do you need _reliable() variants of the unwind interfaces? Can the error checking be integrated into unwind_start() and unwind_next_frame() like they are on x86?
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+void __unwind_start_reliable(struct unwind_state *state,
+ struct task_struct *task, unsigned long sp)
+{
+ struct stack_info *info = &state->stack_info;
+ struct stack_frame *sf;
+ unsigned long ip;
+
+ memset(state, 0, sizeof(*state));
+ state->task = task;
+
+ /* Get current stack pointer and initialize stack info */
+ if (get_stack_info_reliable(sp, task, info) ||
+ !on_stack(info, sp, sizeof(struct stack_frame))) {
+ /* Something is wrong with the stack pointer */
+ info->type = STACK_TYPE_UNKNOWN;
+ state->error = true;
+ return;
+ }
+
+ /* Get the instruction pointer from the stack frame */
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* Decode any ftrace redirection */
+ if (ip == (unsigned long) return_to_handler)
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ ip, NULL);
+#endifThe return_to_handler and ifdef checks aren't needed. Those are done already by the call. Also it seems a bit odd that the kretprobes check isn't done in this function next to the ftrace check.
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->ip = ip;
+}
+
+bool unwind_next_frame_reliable(struct unwind_state *state)
+{
+ struct stack_info *info = &state->stack_info;
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long sp, ip;
+
+ sf = (struct stack_frame *) state->sp;
+ sp = READ_ONCE_NOCHECK(sf->back_chain);
+ /*
+ * Idle tasks are special. The final back-chain points to nodat_stack.
+ * See CALL_ON_STACK() in smp_start_secondary() callback used in
+ * __cpu_up(). We just accept it, go to else branch and look for
+ * pt_regs.
+ */
+ if (likely(sp && !(is_idle_task(state->task) &&
+ outside_of_stack(state, sp)))) {
+ /* Non-zero back-chain points to the previous frame */
+ if (unlikely(outside_of_stack(state, sp)))
+ goto out_err;
+
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+ } else {
+ /* No back-chain, look for a pt_regs structure */
+ sp = state->sp + STACK_FRAME_OVERHEAD;
+ regs = (struct pt_regs *) sp;
+ if ((unsigned long)regs != info->end - sizeof(struct pt_regs))
+ goto out_err;
+ if (!(state->task->flags & (PF_KTHREAD | PF_IDLE)) &&
+ !user_mode(regs))
+ goto out_err;
+
+ state->regs = regs;
+ goto out_stop;
+ }
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* Decode any ftrace redirection */
+ if (ip == (unsigned long) return_to_handler)
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ ip, (void *) sp);
+#endif
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->ip = ip;
+ return true;
+
+out_err:
+ state->error = true;
+out_stop:
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return false;
+}
+#endifFor the _reliable() variants of the unwind interfaces, there's a lot of code duplication with the non-reliable variants. It looks like it would be a lot cleaner (and easier to follow) if they were integrated. Overall it's looking good though. -- Josh