--- v2
+++ v4
@@ -1,104 +1,416 @@
-Enables CONFIG_HARDENED_USERCOPY checks on x86. This is done both in
-copy_*_user() and __copy_*_user() because copy_*_user() actually calls
-down to _copy_*_user() and not __copy_*_user().
+This is the start of porting PAX_USERCOPY into the mainline kernel. This
+is the first set of features, controlled by CONFIG_HARDENED_USERCOPY. The
+work is based on code by PaX Team and Brad Spengler, and an earlier port
+from Casey Schaufler. Additional non-slab page tests are from Rik van Riel.
-Based on code from PaX and grsecurity.
+This patch contains the logic for validating several conditions when
+performing copy_to_user() and copy_from_user() on the kernel object
+being copied to/from:
+- address range doesn't wrap around
+- address range isn't NULL or zero-allocated (with a non-zero copy size)
+- if on the slab allocator:
+ - object size must be less than or equal to copy size (when check is
+ implemented in the allocator, which appear in subsequent patches)
+- otherwise, object must not span page allocations (excepting Reserved
+ and CMA ranges)
+- if on the stack
+ - object must not extend before/after the current process stack
+ - object must be contained by a valid stack frame (when there is
+ arch/build support for identifying stack frames)
+- object must not overlap with kernel text
Signed-off-by: Kees Cook <keescook@chromium.org>
+Tested-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
+Tested-by: Michael Ellerman <mpe@ellerman.id.au>
---
- arch/x86/Kconfig | 2 ++
- arch/x86/include/asm/uaccess.h | 10 ++++++----
- arch/x86/include/asm/uaccess_32.h | 2 ++
- arch/x86/include/asm/uaccess_64.h | 2 ++
- 4 files changed, 12 insertions(+), 4 deletions(-)
+ include/linux/slab.h | 12 ++
+ include/linux/thread_info.h | 15 +++
+ mm/Makefile | 4 +
+ mm/usercopy.c | 268 ++++++++++++++++++++++++++++++++++++++++++++
+ security/Kconfig | 28 +++++
+ 5 files changed, 327 insertions(+)
+ create mode 100644 mm/usercopy.c
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 4407f596b72c..39d89e058249 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -80,11 +80,13 @@ config X86
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
- select HAVE_AOUT if X86_32
- select HAVE_ARCH_AUDITSYSCALL
-+ select HAVE_ARCH_HARDENED_USERCOPY
- select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
- select HAVE_ARCH_JUMP_LABEL
- select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
- select HAVE_ARCH_KGDB
- select HAVE_ARCH_KMEMCHECK
-+ select HAVE_ARCH_LINEAR_KERNEL_MAPPING if X86_64
- select HAVE_ARCH_MMAP_RND_BITS if MMU
- select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
- select HAVE_ARCH_SECCOMP_FILTER
-diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
-index 2982387ba817..aa9cc58409c6 100644
---- a/arch/x86/include/asm/uaccess.h
-+++ b/arch/x86/include/asm/uaccess.h
-@@ -742,9 +742,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
- * case, and do only runtime checking for non-constant sizes.
- */
+diff --git a/include/linux/slab.h b/include/linux/slab.h
+index aeb3e6d00a66..96a16a3fb7cb 100644
+--- a/include/linux/slab.h
++++ b/include/linux/slab.h
+@@ -155,6 +155,18 @@ void kfree(const void *);
+ void kzfree(const void *);
+ size_t ksize(const void *);
-- if (likely(sz < 0 || sz >= n))
-+ if (likely(sz < 0 || sz >= n)) {
-+ check_object_size(to, n, false);
- n = _copy_from_user(to, from, n);
-- else if(__builtin_constant_p(n))
-+ } else if(__builtin_constant_p(n))
- copy_from_user_overflow();
- else
- __copy_from_user_overflow(sz, n);
-@@ -762,9 +763,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
- might_fault();
++#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
++const char *__check_heap_object(const void *ptr, unsigned long n,
++ struct page *page);
++#else
++static inline const char *__check_heap_object(const void *ptr,
++ unsigned long n,
++ struct page *page)
++{
++ return NULL;
++}
++#endif
++
+ /*
+ * Some archs want to perform DMA into kmalloc caches and need a guaranteed
+ * alignment larger than the alignment of a 64-bit integer.
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index 3d5c80b4391d..f24b99eac969 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -155,6 +155,21 @@ static inline int arch_within_stack_frames(const void * const stack,
+ }
+ #endif
- /* See the comment in copy_from_user() above. */
-- if (likely(sz < 0 || sz >= n))
-+ if (likely(sz < 0 || sz >= n)) {
-+ check_object_size(from, n, true);
- n = _copy_to_user(to, from, n);
-- else if(__builtin_constant_p(n))
-+ } else if(__builtin_constant_p(n))
- copy_to_user_overflow();
- else
- __copy_to_user_overflow(sz, n);
-diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
-index 4b32da24faaf..7d3bdd1ed697 100644
---- a/arch/x86/include/asm/uaccess_32.h
-+++ b/arch/x86/include/asm/uaccess_32.h
-@@ -37,6 +37,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
- static __always_inline unsigned long __must_check
- __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
- {
-+ check_object_size(from, n, true);
- return __copy_to_user_ll(to, from, n);
- }
++#ifdef CONFIG_HARDENED_USERCOPY
++extern void __check_object_size(const void *ptr, unsigned long n,
++ bool to_user);
++
++static inline void check_object_size(const void *ptr, unsigned long n,
++ bool to_user)
++{
++ __check_object_size(ptr, n, to_user);
++}
++#else
++static inline void check_object_size(const void *ptr, unsigned long n,
++ bool to_user)
++{ }
++#endif /* CONFIG_HARDENED_USERCOPY */
++
+ #endif /* __KERNEL__ */
-@@ -95,6 +96,7 @@ static __always_inline unsigned long
- __copy_from_user(void *to, const void __user *from, unsigned long n)
- {
- might_fault();
-+ check_object_size(to, n, false);
- if (__builtin_constant_p(n)) {
- unsigned long ret;
+ #endif /* _LINUX_THREAD_INFO_H */
+diff --git a/mm/Makefile b/mm/Makefile
+index 78c6f7dedb83..32d37247c7e5 100644
+--- a/mm/Makefile
++++ b/mm/Makefile
+@@ -21,6 +21,9 @@ KCOV_INSTRUMENT_memcontrol.o := n
+ KCOV_INSTRUMENT_mmzone.o := n
+ KCOV_INSTRUMENT_vmstat.o := n
-diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
-index 2eac2aa3e37f..673059a109fe 100644
---- a/arch/x86/include/asm/uaccess_64.h
-+++ b/arch/x86/include/asm/uaccess_64.h
-@@ -54,6 +54,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
- {
- int ret = 0;
++# Since __builtin_frame_address does work as used, disable the warning.
++CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
++
+ mmu-y := nommu.o
+ mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
+ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
+@@ -99,3 +102,4 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
+ obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
+ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
+ obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
++obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
+diff --git a/mm/usercopy.c b/mm/usercopy.c
+new file mode 100644
+index 000000000000..8ebae91a6b55
+--- /dev/null
++++ b/mm/usercopy.c
+@@ -0,0 +1,268 @@
++/*
++ * This implements the various checks for CONFIG_HARDENED_USERCOPY*,
++ * which are designed to protect kernel memory from needless exposure
++ * and overwrite under many unintended conditions. This code is based
++ * on PAX_USERCOPY, which is:
++ *
++ * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
++ * Security Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ */
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <asm/sections.h>
++
++enum {
++ BAD_STACK = -1,
++ NOT_STACK = 0,
++ GOOD_FRAME,
++ GOOD_STACK,
++};
++
++/*
++ * Checks if a given pointer and length is contained by the current
++ * stack frame (if possible).
++ *
++ * Returns:
++ * NOT_STACK: not at all on the stack
++ * GOOD_FRAME: fully within a valid stack frame
++ * GOOD_STACK: fully on the stack (when can't do frame-checking)
++ * BAD_STACK: error condition (invalid stack position or bad stack frame)
++ */
++static noinline int check_stack_object(const void *obj, unsigned long len)
++{
++ const void * const stack = task_stack_page(current);
++ const void * const stackend = stack + THREAD_SIZE;
++ int ret;
++
++ /* Object is not on the stack at all. */
++ if (obj + len <= stack || stackend <= obj)
++ return NOT_STACK;
++
++ /*
++ * Reject: object partially overlaps the stack (passing the
++ * the check above means@least one end is within the stack,
++ * so if this check fails, the other end is outside the stack).
++ */
++ if (obj < stack || stackend < obj + len)
++ return BAD_STACK;
++
++ /* Check if object is safely within a valid frame. */
++ ret = arch_within_stack_frames(stack, stackend, obj, len);
++ if (ret)
++ return ret;
++
++ return GOOD_STACK;
++}
++
++static void report_usercopy(const void *ptr, unsigned long len,
++ bool to_user, const char *type)
++{
++ pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n",
++ to_user ? "exposure" : "overwrite",
++ to_user ? "from" : "to", ptr, type ? : "unknown", len);
++ /*
++ * For greater effect, it would be nice to do do_group_exit(),
++ * but BUG() actually hooks all the lock-breaking and per-arch
++ * Oops code, so that is used here instead.
++ */
++ BUG();
++}
++
++/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
++static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
++ unsigned long high)
++{
++ unsigned long check_low = (uintptr_t)ptr;
++ unsigned long check_high = check_low + n;
++
++ /* Does not overlap if entirely above or entirely below. */
++ if (check_low >= high || check_high < low)
++ return false;
++
++ return true;
++}
++
++/* Is this address range in the kernel text area? */
++static inline const char *check_kernel_text_object(const void *ptr,
++ unsigned long n)
++{
++ unsigned long textlow = (unsigned long)_stext;
++ unsigned long texthigh = (unsigned long)_etext;
++ unsigned long textlow_linear, texthigh_linear;
++
++ if (overlaps(ptr, n, textlow, texthigh))
++ return "<kernel text>";
++
++ /*
++ * Some architectures have virtual memory mappings with a secondary
++ * mapping of the kernel text, i.e. there is more than one virtual
++ * kernel address that points to the kernel image. It is usually
++ * when there is a separate linear physical memory mapping, in that
++ * __pa() is not just the reverse of __va(). This can be detected
++ * and checked:
++ */
++ textlow_linear = (unsigned long)__va(__pa(textlow));
++ /* No different mapping: we're done. */
++ if (textlow_linear == textlow)
++ return NULL;
++
++ /* Check the secondary mapping... */
++ texthigh_linear = (unsigned long)__va(__pa(texthigh));
++ if (overlaps(ptr, n, textlow_linear, texthigh_linear))
++ return "<linear kernel text>";
++
++ return NULL;
++}
++
++static inline const char *check_bogus_address(const void *ptr, unsigned long n)
++{
++ /* Reject if object wraps past end of memory. */
++ if (ptr + n < ptr)
++ return "<wrapped address>";
++
++ /* Reject if NULL or ZERO-allocation. */
++ if (ZERO_OR_NULL_PTR(ptr))
++ return "<null>";
++
++ return NULL;
++}
++
++static inline const char *check_heap_object(const void *ptr, unsigned long n,
++ bool to_user)
++{
++ struct page *page, *endpage;
++ const void *end = ptr + n - 1;
++ bool is_reserved, is_cma;
++
++ /*
++ * Some architectures (arm64) return true for virt_addr_valid() on
++ * vmalloced addresses. Work around this by checking for vmalloc
++ * first.
++ */
++ if (is_vmalloc_addr(ptr))
++ return NULL;
++
++ if (!virt_addr_valid(ptr))
++ return NULL;
++
++ page = virt_to_head_page(ptr);
++
++ /* Check slab allocator for flags and size. */
++ if (PageSlab(page))
++ return __check_heap_object(ptr, n, page);
++
++ /*
++ * Sometimes the kernel data regions are not marked Reserved (see
++ * check below). And sometimes [_sdata,_edata) does not cover
++ * rodata and/or bss, so check each range explicitly.
++ */
++
++ /* Allow reads of kernel rodata region (if not marked as Reserved). */
++ if (ptr >= (const void *)__start_rodata &&
++ end <= (const void *)__end_rodata) {
++ if (!to_user)
++ return "<rodata>";
++ return NULL;
++ }
++
++ /* Allow kernel data region (if not marked as Reserved). */
++ if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
++ return NULL;
++
++ /* Allow kernel bss region (if not marked as Reserved). */
++ if (ptr >= (const void *)__bss_start &&
++ end <= (const void *)__bss_stop)
++ return NULL;
++
++ /* Is the object wholly within one base page? */
++ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
++ ((unsigned long)end & (unsigned long)PAGE_MASK)))
++ return NULL;
++
++ /* Allow if start and end are inside the same compound page. */
++ endpage = virt_to_head_page(end);
++ if (likely(endpage == page))
++ return NULL;
++
++ /*
++ * Reject if range is entirely either Reserved (i.e. special or
++ * device memory), or CMA. Otherwise, reject since the object spans
++ * several independently allocated pages.
++ */
++ is_reserved = PageReserved(page);
++ is_cma = is_migrate_cma_page(page);
++ if (!is_reserved && !is_cma)
++ goto reject;
++
++ for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
++ page = virt_to_head_page(ptr);
++ if (is_reserved && !PageReserved(page))
++ goto reject;
++ if (is_cma && !is_migrate_cma_page(page))
++ goto reject;
++ }
++
++ return NULL;
++
++reject:
++ return "<spans multiple pages>";
++}
++
++/*
++ * Validates that the given object is:
++ * - not bogus address
++ * - known-safe heap or stack object
++ * - not in kernel text
++ */
++void __check_object_size(const void *ptr, unsigned long n, bool to_user)
++{
++ const char *err;
++
++ /* Skip all tests if size is zero. */
++ if (!n)
++ return;
++
++ /* Check for invalid addresses. */
++ err = check_bogus_address(ptr, n);
++ if (err)
++ goto report;
++
++ /* Check for bad heap object. */
++ err = check_heap_object(ptr, n, to_user);
++ if (err)
++ goto report;
++
++ /* Check for bad stack object. */
++ switch (check_stack_object(ptr, n)) {
++ case NOT_STACK:
++ /* Object is not touching the current process stack. */
++ break;
++ case GOOD_FRAME:
++ case GOOD_STACK:
++ /*
++ * Object is either in the correct frame (when it
++ * is possible to check) or just generally on the
++ * process stack (when frame checking not available).
++ */
++ return;
++ default:
++ err = "<process stack>";
++ goto report;
++ }
++
++ /* Check for object in kernel to avoid text exposure. */
++ err = check_kernel_text_object(ptr, n);
++ if (!err)
++ return;
++
++report:
++ report_usercopy(ptr, n, to_user, err);
++}
++EXPORT_SYMBOL(__check_object_size);
+diff --git a/security/Kconfig b/security/Kconfig
+index 176758cdfa57..df28f2b6f3e1 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -118,6 +118,34 @@ config LSM_MMAP_MIN_ADDR
+ this low address space will need the permission specific to the
+ systems running LSM.
-+ check_object_size(dst, size, false);
- if (!__builtin_constant_p(size))
- return copy_user_generic(dst, (__force void *)src, size);
- switch (size) {
-@@ -119,6 +120,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
- {
- int ret = 0;
-
-+ check_object_size(src, size, true);
- if (!__builtin_constant_p(size))
- return copy_user_generic((__force void *)dst, src, size);
- switch (size) {
++config HAVE_HARDENED_USERCOPY_ALLOCATOR
++ bool
++ help
++ The heap allocator implements __check_heap_object() for
++ validating memory ranges against heap object sizes in
++ support of CONFIG_HARDENED_USERCOPY.
++
++config HAVE_ARCH_HARDENED_USERCOPY
++ bool
++ help
++ The architecture supports CONFIG_HARDENED_USERCOPY by
++ calling check_object_size() just before performing the
++ userspace copies in the low level implementation of
++ copy_to_user() and copy_from_user().
++
++config HARDENED_USERCOPY
++ bool "Harden memory copies between kernel and userspace"
++ depends on HAVE_ARCH_HARDENED_USERCOPY
++ select BUG
++ help
++ This option checks for obviously wrong memory regions when
++ copying memory to/from the kernel (via copy_to_user() and
++ copy_from_user() functions) by rejecting memory ranges that
++ are larger than the specified heap object, span multiple
++ separately allocates pages, are not on the process stack,
++ or are part of the kernel text. This kills entire classes
++ of heap overflow exploits and similar kernel memory exposures.
++
+ source security/selinux/Kconfig
+ source security/smack/Kconfig
+ source security/tomoyo/Kconfig
--
2.7.4