Inter-revision diff: patch 17

Comparing v3 (message) to v8 (message)

--- v3
+++ v8
@@ -5,173 +5,40 @@
 unusual properties, which requires some core mm changes to function
 properly.
 
-With the introduction of shadow stack memory there are two ways a pte can
-be writable: regular writable memory and shadow stack memory.
+Future patches will introduce a new VM flag VM_SHADOW_STACK that will be
+VM_HIGH_ARCH_BIT_5. VM_HIGH_ARCH_BIT_1 through VM_HIGH_ARCH_BIT_4 are
+bits 32-36, and bit 37 is the unrelated VM_UFFD_MINOR_BIT. For the sake
+of order, make all VM_HIGH_ARCH_BITs stay together by moving
+VM_UFFD_MINOR_BIT from 37 to 38. This will allow VM_SHADOW_STACK to be
+introduced as 37.
 
-In past patches, maybe_mkwrite() has been updated to apply pte_mkwrite()
-or pte_mkwrite_shstk() depending on the VMA flag. This covers most cases
-where a PTE is made writable. However, there are places where pte_mkwrite()
-is called directly and the logic should now also create a shadow stack PTE
-in the case of a shadow stack VMA.
-
-- do_anonymous_page() and migrate_vma_insert_page() check VM_WRITE
-  directly and call pte_mkwrite(). Teach it about pte_mkwrite_shstk()
-
-- When userfaultfd is creating a PTE after userspace handles the fault
-  it calls pte_mkwrite() directly. Teach it about pte_mkwrite_shstk()
-
-To make the code cleaner, introduce is_shstk_write() which simplifies
-checking for VM_WRITE | VM_SHADOW_STACK together.
-
-In other cases where pte_mkwrite() is called directly, the VMA will not
-be VM_SHADOW_STACK, and so shadow stack memory should not be created.
- - In the case of pte_savedwrite(), shadow stack VMA's are excluded.
- - In the case of the "dirty_accountable" optimization in mprotect(),
-   shadow stack VMA's won't be VM_SHARED, so it is not nessary.
-
+Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
+Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
+Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Acked-by: Peter Xu <peterx@redhat.com>
 Tested-by: Pengfei Xu <pengfei.xu@intel.com>
 Tested-by: John Allen <john.allen@amd.com>
-Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
-Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
-Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
-Cc: Kees Cook <keescook@chromium.org>
+Tested-by: Kees Cook <keescook@chromium.org>
+---
+ include/linux/mm.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
 
----
-
-v3:
- - Restore do_anonymous_page() that accidetally moved commits (Kirill)
- - Open code maybe_mkwrite() cases from v2, so the behavior doesn't change
-   to mark that non-writable PTEs dirty. (Nadav)
-
-v2:
- - Updated commit log with comment's from Dave Hansen
- - Dave also suggested (I understood) to maybe tweak vm_get_page_prot()
-   to avoid having to call maybe_mkwrite(). After playing around with
-   this I opted to *not* do this. Shadow stack memory memory is
-   effectively writable, so having the default permissions be writable
-   ended up mapping the zero page as writable and other surprises. So
-   creating shadow stack memory needs to be done with manual logic
-   like pte_mkwrite().
- - Drop change in change_pte_range() because it couldn't actually trigger
-   for shadow stack VMAs.
- - Clarify reasoning for skipped cases of pte_mkwrite().
-
-Yu-cheng v25:
- - Apply same changes to do_huge_pmd_numa_page() as to do_numa_page().
-
- arch/x86/include/asm/pgtable.h |  3 +++
- arch/x86/mm/pgtable.c          |  6 ++++++
- include/linux/pgtable.h        |  7 +++++++
- mm/memory.c                    |  5 ++++-
- mm/migrate_device.c            |  4 +++-
- mm/userfaultfd.c               | 10 +++++++---
- 6 files changed, 30 insertions(+), 5 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index df67bcf9f69e..d57dc1b2d3e8 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -919,6 +919,9 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
- }
- #endif  /* CONFIG_PAGE_TABLE_ISOLATION */
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index af652444fbba..a1b31caae013 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -377,7 +377,7 @@ extern unsigned int kobjsize(const void *objp);
+ #endif
  
-+#define is_shstk_write is_shstk_write
-+extern bool is_shstk_write(unsigned long vm_flags);
-+
- #endif	/* __ASSEMBLY__ */
- 
- 
-diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index 8525f2876fb4..f0e536bea3ca 100644
---- a/arch/x86/mm/pgtable.c
-+++ b/arch/x86/mm/pgtable.c
-@@ -876,3 +876,9 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
- 
- #endif /* CONFIG_X86_64 */
- #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
-+
-+bool is_shstk_write(unsigned long vm_flags)
-+{
-+	return (vm_flags & (VM_SHADOW_STACK | VM_WRITE)) ==
-+	       (VM_SHADOW_STACK | VM_WRITE);
-+}
-diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
-index 5ce6732a6b65..36926a207b6d 100644
---- a/include/linux/pgtable.h
-+++ b/include/linux/pgtable.h
-@@ -1567,6 +1567,13 @@ static inline bool arch_has_pfn_modify_check(void)
- }
- #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
- 
-+#ifndef is_shstk_write
-+static inline bool is_shstk_write(unsigned long vm_flags)
-+{
-+	return false;
-+}
-+#endif
-+
- /*
-  * Architecture PAGE_KERNEL_* fallbacks
-  *
-diff --git a/mm/memory.c b/mm/memory.c
-index f88c351aecd4..b9bee283aad3 100644
---- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -4128,7 +4128,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
- 
- 	entry = mk_pte(page, vma->vm_page_prot);
- 	entry = pte_sw_mkyoung(entry);
--	if (vma->vm_flags & VM_WRITE)
-+
-+	if (is_shstk_write(vma->vm_flags))
-+		entry = pte_mkwrite_shstk(pte_mkdirty(entry));
-+	else if (vma->vm_flags & VM_WRITE)
- 		entry = pte_mkwrite(pte_mkdirty(entry));
- 
- 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
-diff --git a/mm/migrate_device.c b/mm/migrate_device.c
-index 6fa682eef7a0..4c21c600bf46 100644
---- a/mm/migrate_device.c
-+++ b/mm/migrate_device.c
-@@ -641,7 +641,9 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
- 			goto abort;
- 		}
- 		entry = mk_pte(page, vma->vm_page_prot);
--		if (vma->vm_flags & VM_WRITE)
-+		if (is_shstk_write(vma->vm_flags))
-+			entry = pte_mkwrite_shstk(pte_mkdirty(entry));
-+		else if (vma->vm_flags & VM_WRITE)
- 			entry = pte_mkwrite(pte_mkdirty(entry));
- 	}
- 
-diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
-index 3d0fef3980b3..503135b079b6 100644
---- a/mm/userfaultfd.c
-+++ b/mm/userfaultfd.c
-@@ -63,6 +63,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
- 	int ret;
- 	pte_t _dst_pte, *dst_pte;
- 	bool writable = dst_vma->vm_flags & VM_WRITE;
-+	bool shstk = dst_vma->vm_flags & VM_SHADOW_STACK;
- 	bool vm_shared = dst_vma->vm_flags & VM_SHARED;
- 	bool page_in_cache = page->mapping;
- 	spinlock_t *ptl;
-@@ -83,9 +84,12 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
- 		writable = false;
- 	}
- 
--	if (writable)
--		_dst_pte = pte_mkwrite(_dst_pte);
--	else
-+	if (writable) {
-+		if (shstk)
-+			_dst_pte = pte_mkwrite_shstk(_dst_pte);
-+		else
-+			_dst_pte = pte_mkwrite(_dst_pte);
-+	} else
- 		/*
- 		 * We need this to make sure write bit removed; as mk_pte()
- 		 * could return a pte with write bit set.
+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+-# define VM_UFFD_MINOR_BIT	37
++# define VM_UFFD_MINOR_BIT	38
+ # define VM_UFFD_MINOR		BIT(VM_UFFD_MINOR_BIT)	/* UFFD minor faults */
+ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+ # define VM_UFFD_MINOR		VM_NONE
 -- 
 2.17.1
 
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help