--- v4
+++ v8
@@ -1,146 +1,44 @@
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
-When serving a page fault, maybe_mkwrite() makes a PTE writable if there is
-a write access to it, and its vma has VM_WRITE. Shadow stack accesses to
-shadow stack vma's are also treated as write accesses by the fault handler.
-This is because setting shadow stack memory makes it writable via some
-instructions, so COW has to happen even for shadow stack reads.
+The x86 Control-flow Enforcement Technology (CET) feature includes a new
+type of memory called shadow stack. This shadow stack memory has some
+unusual properties, which requires some core mm changes to function
+properly.
-So maybe_mkwrite() should continue to set VM_WRITE vma's as normally
-writable, but also set VM_WRITE|VM_SHADOW_STACK vma's as shadow stack.
+Future patches will introduce a new VM flag VM_SHADOW_STACK that will be
+VM_HIGH_ARCH_BIT_5. VM_HIGH_ARCH_BIT_1 through VM_HIGH_ARCH_BIT_4 are
+bits 32-36, and bit 37 is the unrelated VM_UFFD_MINOR_BIT. For the sake
+of order, make all VM_HIGH_ARCH_BITs stay together by moving
+VM_UFFD_MINOR_BIT from 37 to 38. This will allow VM_SHADOW_STACK to be
+introduced as 37.
-Do this by adding a pte_mkwrite_shstk() and a cross-arch stub. Check for
-VM_SHADOW_STACK in maybe_mkwrite() and call pte_mkwrite_shstk()
-accordingly.
-
-Apply the same changes to maybe_pmd_mkwrite().
-
+Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
+Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
+Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Acked-by: Peter Xu <peterx@redhat.com>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: John Allen <john.allen@amd.com>
-Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
-Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
-Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
-Cc: Kees Cook <keescook@chromium.org>
+Tested-by: Kees Cook <keescook@chromium.org>
---
+ include/linux/mm.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
-v3:
- - Remove unneeded define for maybe_mkwrite (Peterz)
- - Switch to cleaner version of maybe_mkwrite() (Peterz)
-
-v2:
- - Change to handle shadow stacks that are VM_WRITE|VM_SHADOW_STACK
- - Ditch arch specific maybe_mkwrite(), and make the code generic
- - Move do_anonymous_page() to next patch (Kirill)
-
-Yu-cheng v29:
- - Remove likely()'s.
-
- arch/x86/include/asm/pgtable.h | 2 ++
- include/linux/mm.h | 13 ++++++++++---
- include/linux/pgtable.h | 14 ++++++++++++++
- mm/huge_memory.c | 10 +++++++---
- 4 files changed, 33 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 4a149cec0c07..e4530b39f378 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -420,6 +420,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
- return pte_set_flags(pte, dirty | _PAGE_SOFT_DIRTY);
- }
-
-+#define pte_mkwrite_shstk pte_mkwrite_shstk
- static inline pte_t pte_mkwrite_shstk(pte_t pte)
- {
- /* pte_clear_cow() also sets Dirty=1 */
-@@ -556,6 +557,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
- return pmd_set_flags(pmd, dirty | _PAGE_SOFT_DIRTY);
- }
-
-+#define pmd_mkwrite_shstk pmd_mkwrite_shstk
- static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
- {
- return pmd_clear_cow(pmd);
diff --git a/include/linux/mm.h b/include/linux/mm.h
-index b982c2749e7b..f10797a1b236 100644
+index af652444fbba..a1b31caae013 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
-@@ -1004,12 +1004,19 @@ void free_compound_page(struct page *page);
- * servicing faults for write access. In the normal case, do always want
- * pte_mkwrite. But get_user_pages can cause write faults for mappings
- * that do not have writing enabled, when used by access_process_vm.
-+ *
-+ * If a vma is shadow stack (a type of writable memory), mark the pte shadow
-+ * stack.
- */
- static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
- {
-- if (likely(vma->vm_flags & VM_WRITE))
-- pte = pte_mkwrite(pte);
-- return pte;
-+ if (!(vma->vm_flags & VM_WRITE))
-+ return pte;
-+
-+ if (vma->vm_flags & VM_SHADOW_STACK)
-+ return pte_mkwrite_shstk(pte);
-+
-+ return pte_mkwrite(pte);
- }
-
- vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
-diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
-index 70e2a7e06a76..d8096578610a 100644
---- a/include/linux/pgtable.h
-+++ b/include/linux/pgtable.h
-@@ -524,6 +524,13 @@ static inline pte_t pte_sw_mkyoung(pte_t pte)
- #define pte_mk_savedwrite pte_mkwrite
+@@ -377,7 +377,7 @@ extern unsigned int kobjsize(const void *objp);
#endif
-+#ifndef pte_mkwrite_shstk
-+static inline pte_t pte_mkwrite_shstk(pte_t pte)
-+{
-+ return pte;
-+}
-+#endif
-+
- #ifndef pte_clear_savedwrite
- #define pte_clear_savedwrite pte_wrprotect
- #endif
-@@ -532,6 +539,13 @@ static inline pte_t pte_sw_mkyoung(pte_t pte)
- #define pmd_savedwrite pmd_write
- #endif
-
-+#ifndef pmd_mkwrite_shstk
-+static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
-+{
-+ return pmd;
-+}
-+#endif
-+
- #ifndef pmd_mk_savedwrite
- #define pmd_mk_savedwrite pmd_mkwrite
- #endif
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 811d19b5c4f6..60451e588955 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -553,9 +553,13 @@ __setup("transparent_hugepage=", setup_transparent_hugepage);
-
- pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
- {
-- if (likely(vma->vm_flags & VM_WRITE))
-- pmd = pmd_mkwrite(pmd);
-- return pmd;
-+ if (!(vma->vm_flags & VM_WRITE))
-+ return pmd;
-+
-+ if (vma->vm_flags & VM_SHADOW_STACK)
-+ return pmd_mkwrite_shstk(pmd);
-+
-+ return pmd_mkwrite(pmd);
- }
-
- #ifdef CONFIG_MEMCG
+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+-# define VM_UFFD_MINOR_BIT 37
++# define VM_UFFD_MINOR_BIT 38
+ # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
+ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+ # define VM_UFFD_MINOR VM_NONE
--
2.17.1