--- v29
+++ v21
@@ -1,86 +1,81 @@
-The read-only and Dirty PTE has been used to indicate copy-on-write pages.
-However, newer x86 processors also regard a read-only and Dirty PTE as a
-shadow stack page. In order to separate the two, the software-defined
-_PAGE_COW is created to replace _PAGE_DIRTY for the copy-on-write case, and
-pte_*() are updated.
+When Shadow Stack is introduced, [R/O + _PAGE_DIRTY] PTE is reserved for
+shadow stack. Copy-on-write PTEs have [R/O + _PAGE_COW].
-Pte_modify() changes a PTE to 'newprot', but it doesn't use the pte_*().
-Introduce fixup_dirty_pte(), which sets a dirty PTE, based on _PAGE_RW,
-to either _PAGE_DIRTY or _PAGE_COW.
+When a PTE goes from [R/W + _PAGE_DIRTY] to [R/O + _PAGE_COW], it could
+become a transient shadow stack PTE in two cases:
-Apply the same changes to pmd_modify().
+The first case is that some processors can start a write but end up seeing
+a read-only PTE by the time they get to the Dirty bit, creating a transient
+shadow stack PTE. However, this will not occur on processors supporting
+Shadow Stack, and a TLB flush is not necessary.
+
+The second case is that when _PAGE_DIRTY is replaced with _PAGE_COW non-
+atomically, a transient shadow stack PTE can be created as a result.
+Thus, prevent that with cmpxchg.
+
+Dave Hansen, Jann Horn, Andy Lutomirski, and Peter Zijlstra provided many
+insights to the issue. Jann Horn provided the cmpxchg solution.
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
-Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
---
- arch/x86/include/asm/pgtable.h | 37 ++++++++++++++++++++++++++++++++++
- 1 file changed, 37 insertions(+)
+ arch/x86/include/asm/pgtable.h | 36 ++++++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 9f1ba76ed79a..cf7316e968df 100644
+index 7a7f6af01ab5..71cc4abec704 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
-@@ -771,6 +771,23 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
-
- static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
-
-+static inline pteval_t fixup_dirty_pte(pteval_t pteval)
-+{
-+ pte_t pte = __pte(pteval);
-+
+@@ -1280,6 +1280,24 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+ static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+ {
+ /*
-+ * Fix up potential shadow stack page flags because the RO, Dirty
-+ * PTE is special.
++ * If Shadow Stack is enabled, pte_wrprotect() moves _PAGE_DIRTY
++ * to _PAGE_COW (see comments at pte_wrprotect()).
++ * When a thread reads a RW=1, Dirty=0 PTE and before changing it
++ * to RW=0, Dirty=0, another thread could have written to the page
++ * and the PTE is RW=1, Dirty=1 now. Use try_cmpxchg() to detect
++ * PTE changes and update old_pte, then try again.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_SHSTK)) {
-+ if (pte_dirty(pte)) {
-+ pte = pte_mkclean(pte);
-+ pte = pte_mkdirty(pte);
-+ }
++ pte_t old_pte, new_pte;
++
++ old_pte = READ_ONCE(*ptep);
++ do {
++ new_pte = pte_wrprotect(old_pte);
++ } while (!try_cmpxchg(&ptep->pte, &old_pte.pte, new_pte.pte));
++
++ return;
+ }
-+ return pte_val(pte);
-+}
-+
- static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
- {
- pteval_t val = pte_val(pte), oldval = val;
-@@ -781,16 +798,36 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
- */
- val &= _PAGE_CHG_MASK;
- val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
-+ val = fixup_dirty_pte(val);
- val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
- return __pte(val);
+ clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
}
-+static inline int pmd_write(pmd_t pmd);
-+static inline pmdval_t fixup_dirty_pmd(pmdval_t pmdval)
-+{
-+ pmd_t pmd = __pmd(pmdval);
-+
+@@ -1330,6 +1348,24 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+ {
+ /*
-+ * Fix up potential shadow stack page flags because the RO, Dirty
-+ * PMD is special.
++ * If Shadow Stack is enabled, pmd_wrprotect() moves _PAGE_DIRTY
++ * to _PAGE_COW (see comments at pmd_wrprotect()).
++ * When a thread reads a RW=1, Dirty=0 PMD and before changing it
++ * to RW=0, Dirty=0, another thread could have written to the page
++ * and the PMD is RW=1, Dirty=1 now. Use try_cmpxchg() to detect
++ * PMD changes and update old_pmd, then try again.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_SHSTK)) {
-+ if (pmd_dirty(pmd)) {
-+ pmd = pmd_mkclean(pmd);
-+ pmd = pmd_mkdirty(pmd);
-+ }
++ pmd_t old_pmd, new_pmd;
++
++ old_pmd = READ_ONCE(*pmdp);
++ do {
++ new_pmd = pmd_wrprotect(old_pmd);
++ } while (!try_cmpxchg((pmdval_t *)pmdp, (pmdval_t *)&old_pmd, pmd_val(new_pmd)));
++
++ return;
+ }
-+ return pmd_val(pmd);
-+}
-+
- static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
- {
- pmdval_t val = pmd_val(pmd), oldval = val;
+ clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
+ }
- val &= _HPAGE_CHG_MASK;
- val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
-+ val = fixup_dirty_pmd(val);
- val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
- return __pmd(val);
- }
--
2.21.0