--- v5
+++ v8
@@ -1,140 +1,44 @@
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
-When serving a page fault, maybe_mkwrite() makes a PTE writable if there is
-a write access to it, and its vma has VM_WRITE. Shadow stack accesses to
-shadow stack vma's are also treated as write accesses by the fault handler.
-This is because setting shadow stack memory makes it writable via some
-instructions, so COW has to happen even for shadow stack reads.
+The x86 Control-flow Enforcement Technology (CET) feature includes a new
+type of memory called shadow stack. This shadow stack memory has some
+unusual properties, which requires some core mm changes to function
+properly.
-So maybe_mkwrite() should continue to set VM_WRITE vma's as normally
-writable, but also set VM_WRITE|VM_SHADOW_STACK vma's as shadow stack.
+Future patches will introduce a new VM flag VM_SHADOW_STACK that will be
+VM_HIGH_ARCH_BIT_5. VM_HIGH_ARCH_BIT_1 through VM_HIGH_ARCH_BIT_4 are
+bits 32-36, and bit 37 is the unrelated VM_UFFD_MINOR_BIT. For the sake
+of order, make all VM_HIGH_ARCH_BITs stay together by moving
+VM_UFFD_MINOR_BIT from 37 to 38. This will allow VM_SHADOW_STACK to be
+introduced as 37.
-Do this by adding a pte_mkwrite_shstk() and a cross-arch stub. Check for
-VM_SHADOW_STACK in maybe_mkwrite() and call pte_mkwrite_shstk()
-accordingly.
-
-Apply the same changes to maybe_pmd_mkwrite().
-
+Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
+Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
+Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Acked-by: Peter Xu <peterx@redhat.com>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: John Allen <john.allen@amd.com>
-Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
-Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
-Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
-Cc: Kees Cook <keescook@chromium.org>
+Tested-by: Kees Cook <keescook@chromium.org>
---
+ include/linux/mm.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
-v3:
- - Remove unneeded define for maybe_mkwrite (Peterz)
- - Switch to cleaner version of maybe_mkwrite() (Peterz)
-
-v2:
- - Change to handle shadow stacks that are VM_WRITE|VM_SHADOW_STACK
- - Ditch arch specific maybe_mkwrite(), and make the code generic
- - Move do_anonymous_page() to next patch (Kirill)
-
-Yu-cheng v29:
- - Remove likely()'s.
-
- arch/x86/include/asm/pgtable.h | 2 ++
- include/linux/mm.h | 13 ++++++++++---
- include/linux/pgtable.h | 14 ++++++++++++++
- mm/huge_memory.c | 10 +++++++---
- 4 files changed, 33 insertions(+), 6 deletions(-)
-
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index e96558abc8ec..45b1a8f058fe 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -445,6 +445,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
- return __pte_mkdirty(pte, true);
- }
-
-+#define pte_mkwrite_shstk pte_mkwrite_shstk
- static inline pte_t pte_mkwrite_shstk(pte_t pte)
- {
- /* pte_clear_cow() also sets Dirty=1 */
-@@ -589,6 +590,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
- return __pmd_mkdirty(pmd, true);
- }
-
-+#define pmd_mkwrite_shstk pmd_mkwrite_shstk
- static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
- {
- return pmd_clear_cow(pmd);
diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 824e730b21af..e15d2fc04007 100644
+index af652444fbba..a1b31caae013 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
-@@ -1106,12 +1106,19 @@ void free_compound_page(struct page *page);
- * servicing faults for write access. In the normal case, do always want
- * pte_mkwrite. But get_user_pages can cause write faults for mappings
- * that do not have writing enabled, when used by access_process_vm.
-+ *
-+ * If a vma is shadow stack (a type of writable memory), mark the pte shadow
-+ * stack.
- */
- static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
- {
-- if (likely(vma->vm_flags & VM_WRITE))
-- pte = pte_mkwrite(pte);
-- return pte;
-+ if (!(vma->vm_flags & VM_WRITE))
-+ return pte;
-+
-+ if (vma->vm_flags & VM_SHADOW_STACK)
-+ return pte_mkwrite_shstk(pte);
-+
-+ return pte_mkwrite(pte);
- }
-
- vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
-diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
-index 1159b25b0542..14a820a45a37 100644
---- a/include/linux/pgtable.h
-+++ b/include/linux/pgtable.h
-@@ -532,6 +532,20 @@ static inline pte_t pte_sw_mkyoung(pte_t pte)
- #define pte_sw_mkyoung pte_sw_mkyoung
+@@ -377,7 +377,7 @@ extern unsigned int kobjsize(const void *objp);
#endif
-+#ifndef pte_mkwrite_shstk
-+static inline pte_t pte_mkwrite_shstk(pte_t pte)
-+{
-+ return pte;
-+}
-+#endif
-+
-+#ifndef pmd_mkwrite_shstk
-+static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd)
-+{
-+ return pmd;
-+}
-+#endif
-+
- #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE
- static inline void pmdp_set_wrprotect(struct mm_struct *mm,
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index abe6cfd92ffa..fbb8beb9265e 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -553,9 +553,13 @@ __setup("transparent_hugepage=", setup_transparent_hugepage);
-
- pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
- {
-- if (likely(vma->vm_flags & VM_WRITE))
-- pmd = pmd_mkwrite(pmd);
-- return pmd;
-+ if (!(vma->vm_flags & VM_WRITE))
-+ return pmd;
-+
-+ if (vma->vm_flags & VM_SHADOW_STACK)
-+ return pmd_mkwrite_shstk(pmd);
-+
-+ return pmd_mkwrite(pmd);
- }
-
- #ifdef CONFIG_MEMCG
+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+-# define VM_UFFD_MINOR_BIT 37
++# define VM_UFFD_MINOR_BIT 38
+ # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
+ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+ # define VM_UFFD_MINOR VM_NONE
--
2.17.1