[PATCH v4 07/13] task_isolation: arch/x86: enable task isolation functionality
From: Alex Belits <hidden>
Date: 2020-07-22 14:55:01
Also in:
linux-arch, linux-arm-kernel, lkml, netdev
Subsystem:
the rest, x86 architecture (32-bit and 64-bit), x86 entry code, x86 mm, xen hypervisor x86 · Maintainers:
Linus Torvalds, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, Andy Lutomirski, Peter Zijlstra, Juergen Gross
In prepare_exit_to_usermode(), run cleanup for tasks exited fromi isolation and call task_isolation_start() for tasks that entered TIF_TASK_ISOLATION. In syscall_trace_enter(), add the necessary support for reporting syscalls for task-isolation processes. Add task_isolation_remote() calls for the kernel exception types that do not result in signals, namely non-signalling page faults. Add task_isolation_kernel_enter() calls to interrupt and syscall entry handlers. This mechanism relies on calls to functions that call task_isolation_kernel_enter() early after entry into kernel. Those functions are: enter_from_user_mode() called from do_syscall_64(), do_int80_syscall_32(), do_fast_syscall_32(), idtentry_enter_user(), idtentry_enter_cond_rcu() idtentry_enter_cond_rcu() called from non-raw IDT macros and other entry points idtentry_enter_user() nmi_enter() xen_call_function_interrupt() xen_call_function_single_interrupt() xen_irq_work_interrupt() Signed-off-by: Chris Metcalf <redacted> [abelits@marvell.com: adapted for kernel 5.8] Signed-off-by: Alex Belits <redacted> --- arch/x86/Kconfig | 1 + arch/x86/entry/common.c | 20 +++++++++++++++++++- arch/x86/include/asm/barrier.h | 2 ++ arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/apic/ipi.c | 2 ++ arch/x86/mm/fault.c | 4 ++++ arch/x86/xen/smp.c | 3 +++ arch/x86/xen/smp_pv.c | 2 ++ 8 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 883da0abf779..3a80142f85c8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig@@ -149,6 +149,7 @@ config X86 select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TASK_ISOLATION select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index f09288431f28..ab94d90a2bd5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c@@ -26,6 +26,7 @@ #include <linux/livepatch.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/isolation.h> #ifdef CONFIG_XEN_PV #include <xen/xen-ops.h>
@@ -86,6 +87,7 @@ static noinstr void enter_from_user_mode(void) { enum ctx_state state = ct_state(); + task_isolation_kernel_enter(); lockdep_hardirqs_off(CALLER_ADDR0); user_exit_irqoff();
@@ -97,6 +99,7 @@ static noinstr void enter_from_user_mode(void) #else static __always_inline void enter_from_user_mode(void) { + task_isolation_kernel_enter(); lockdep_hardirqs_off(CALLER_ADDR0); instrumentation_begin(); trace_hardirqs_off_finish();
@@ -161,6 +164,15 @@ static long syscall_trace_enter(struct pt_regs *regs) return -1L; } + /* + * In task isolation mode, we may prevent the syscall from + * running, and if so we also deliver a signal to the process. + */ + if (work & _TIF_TASK_ISOLATION) { + if (task_isolation_syscall(regs->orig_ax) == -1) + return -1L; + work &= ~_TIF_TASK_ISOLATION; + } #ifdef CONFIG_SECCOMP /* * Do seccomp after ptrace, to catch any tracer changes.
@@ -263,6 +275,8 @@ static void __prepare_exit_to_usermode(struct pt_regs *regs) lockdep_assert_irqs_disabled(); lockdep_sys_exit(); + task_isolation_check_run_cleanup(); + cached_flags = READ_ONCE(ti->flags); if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
@@ -278,6 +292,9 @@ static void __prepare_exit_to_usermode(struct pt_regs *regs) if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) switch_fpu_return(); + if (cached_flags & _TIF_TASK_ISOLATION) + task_isolation_start(); + #ifdef CONFIG_COMPAT /* * Compat syscalls set TS_COMPAT. Make sure we clear it before
@@ -597,7 +614,8 @@ bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) check_user_regs(regs); enter_from_user_mode(); return false; - } + } else + task_isolation_kernel_enter(); /* * If this entry hit the idle task invoke rcu_irq_enter() whether
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 7f828fe49797..5be6ca0519fc 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h@@ -4,6 +4,7 @@ #include <asm/alternative.h> #include <asm/nops.h> +#include <asm/processor.h> /* * Force strict CPU ordering.
@@ -53,6 +54,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define dma_rmb() barrier() #define dma_wmb() barrier() +#define instr_sync() sync_core() #ifdef CONFIG_X86_32 #define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc")
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8de8ceccb8bc..6dd1a5cc286d 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h@@ -93,6 +93,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_SLD 18 /* Restore split lock detection on context switch */ +#define TIF_TASK_ISOLATION 19 /* task isolation enabled for task */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -123,6 +124,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_SLD (1 << TIF_SLD) +#define _TIF_TASK_ISOLATION (1 << TIF_TASK_ISOLATION) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
@@ -136,7 +138,7 @@ struct thread_info { /* Work to do before invoking the actual syscall. */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | _TIF_TASK_ISOLATION) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 6ca0f91372fd..b4dfaad6a440 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c@@ -2,6 +2,7 @@ #include <linux/cpumask.h> #include <linux/smp.h> +#include <linux/isolation.h> #include "local.h"
@@ -67,6 +68,7 @@ void native_smp_send_reschedule(int cpu) WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", cpu); return; } + task_isolation_remote(cpu, "reschedule IPI"); apic->send_IPI(cpu, RESCHEDULE_VECTOR); }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1ead568c0101..e16a4f5c7e57 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c@@ -18,6 +18,7 @@ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_recover_from_page_fault()*/ #include <linux/mm_types.h> +#include <linux/isolation.h> /* task_isolation_interrupt */ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */
@@ -1332,6 +1333,9 @@ void do_user_addr_fault(struct pt_regs *regs, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } + /* No signal was generated, but notify task-isolation tasks. */ + task_isolation_interrupt("page fault at %#lx", address); + check_v8086_mode(regs, address, tsk); } NOKPROBE_SYMBOL(do_user_addr_fault);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 2097fa0ebdb5..9a3a9bae7d06 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c@@ -4,6 +4,7 @@ #include <linux/slab.h> #include <linux/cpumask.h> #include <linux/percpu.h> +#include <linux/isolation.h> #include <xen/events.h>
@@ -265,6 +266,7 @@ void xen_send_IPI_allbutself(int vector) static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { + task_isolation_kernel_enter(); irq_enter(); generic_smp_call_function_interrupt(); inc_irq_stat(irq_call_count);
@@ -275,6 +277,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { + task_isolation_kernel_enter(); irq_enter(); generic_smp_call_function_single_interrupt(); inc_irq_stat(irq_call_count);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 171aff1b11f2..d71d3cc36c51 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c@@ -24,6 +24,7 @@ #include <linux/cpuhotplug.h> #include <linux/stackprotector.h> #include <linux/pgtable.h> +#include <linux/isolation.h> #include <asm/paravirt.h> #include <asm/idtentry.h>
@@ -482,6 +483,7 @@ static void xen_pv_stop_other_cpus(int wait) static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) { + task_isolation_kernel_enter(); irq_enter(); irq_work_run(); inc_irq_stat(apic_irq_work_irqs);
--
2.26.2