--- v6
+++ v10
@@ -13,33 +13,24 @@
[Port to 4.12 kernel]
[Remove the comment about the fault_env structure which has been
implemented as the vm_fault structure in the kernel]
+[move pte_map_lock()'s definition upper in the file]
+[move the define of FAULT_FLAG_SPECULATIVE later in the series]
+[review error path in do_swap_page(), do_anonymous_page() and
+ wp_page_copy()]
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
---
- include/linux/mm.h | 1 +
- mm/memory.c | 56 ++++++++++++++++++++++++++++++++++++++----------------
- 2 files changed, 41 insertions(+), 16 deletions(-)
-
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 63f7ba111f64..ad299ed7b85c 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -302,6 +302,7 @@ extern pgprot_t protection_map[16];
- #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
- #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
- #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
-+#define FAULT_FLAG_SPECULATIVE 0x200 /* Speculative fault, not holding mmap_sem */
-
- #define FAULT_FLAG_TRACE \
- { FAULT_FLAG_WRITE, "WRITE" }, \
+ mm/memory.c | 87 ++++++++++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 58 insertions(+), 29 deletions(-)
+
diff --git a/mm/memory.c b/mm/memory.c
-index 259f621345b2..868424ab850c 100644
+index a1f990e33e38..4528bd584b7a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
-@@ -2438,6 +2438,13 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
- pte_unmap_unlock(vmf->pte, vmf->ptl);
+@@ -2288,6 +2288,13 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
}
-
-+static bool pte_map_lock(struct vm_fault *vmf)
+ EXPORT_SYMBOL_GPL(apply_to_page_range);
+
++static inline bool pte_map_lock(struct vm_fault *vmf)
+{
+ vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
+ vmf->address, &vmf->ptl);
@@ -47,31 +38,62 @@
+}
+
/*
- * Handle the case of a page which we actually need to copy to a new page.
- *
-@@ -2465,6 +2472,7 @@ static int wp_page_copy(struct vm_fault *vmf)
+ * handle_pte_fault chooses page fault handler according to an entry which was
+ * read non-atomically. Before making any commitment, on those architectures
+@@ -2477,25 +2484,26 @@ static int wp_page_copy(struct vm_fault *vmf)
const unsigned long mmun_start = vmf->address & PAGE_MASK;
const unsigned long mmun_end = mmun_start + PAGE_SIZE;
struct mem_cgroup *memcg;
+ int ret = VM_FAULT_OOM;
if (unlikely(anon_vma_prepare(vma)))
- goto oom;
-@@ -2492,7 +2500,11 @@ static int wp_page_copy(struct vm_fault *vmf)
+- goto oom;
++ goto out;
+
+ if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
+ new_page = alloc_zeroed_user_highpage_movable(vma,
+ vmf->address);
+ if (!new_page)
+- goto oom;
++ goto out;
+ } else {
+ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+ vmf->address);
+ if (!new_page)
+- goto oom;
++ goto out;
+ cow_user_page(new_page, old_page, vmf->address, vma);
+ }
+
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
+- goto oom_free_new;
++ goto out_free_new;
+
+ __SetPageUptodate(new_page);
+
+@@ -2504,7 +2512,10 @@ static int wp_page_copy(struct vm_fault *vmf)
/*
* Re-check the pte - we dropped the lock
*/
- vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
+ if (!pte_map_lock(vmf)) {
-+ mem_cgroup_cancel_charge(new_page, memcg, false);
+ ret = VM_FAULT_RETRY;
-+ goto oom_free_new;
++ goto out_uncharge;
+ }
if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
if (old_page) {
if (!PageAnon(old_page)) {
-@@ -2584,7 +2596,7 @@ static int wp_page_copy(struct vm_fault *vmf)
- oom:
+@@ -2591,12 +2602,14 @@ static int wp_page_copy(struct vm_fault *vmf)
+ put_page(old_page);
+ }
+ return page_copied ? VM_FAULT_WRITE : 0;
+-oom_free_new:
++out_uncharge:
++ mem_cgroup_cancel_charge(new_page, memcg, false);
++out_free_new:
+ put_page(new_page);
+-oom:
++out:
if (old_page)
put_page(old_page);
- return VM_FAULT_OOM;
@@ -79,7 +101,7 @@
}
/**
-@@ -2605,8 +2617,8 @@ static int wp_page_copy(struct vm_fault *vmf)
+@@ -2617,8 +2630,8 @@ static int wp_page_copy(struct vm_fault *vmf)
int finish_mkwrite_fault(struct vm_fault *vmf)
{
WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
@@ -90,7 +112,7 @@
/*
* We might have raced with another page fault while we released the
* pte_offset_map_lock.
-@@ -2724,8 +2736,11 @@ static int do_wp_page(struct vm_fault *vmf)
+@@ -2736,8 +2749,11 @@ static int do_wp_page(struct vm_fault *vmf)
get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
lock_page(vmf->page);
@@ -104,34 +126,55 @@
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
unlock_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
-@@ -2953,8 +2968,10 @@ int do_swap_page(struct vm_fault *vmf)
- * Back out if somebody else faulted in this pte
- * while we released the pte lock.
+@@ -2944,11 +2960,15 @@ int do_swap_page(struct vm_fault *vmf)
+
+ if (!page) {
+ /*
+- * Back out if somebody else faulted in this pte
+- * while we released the pte lock.
++ * Back out if the VMA has changed in our back during
++ * a speculative page fault or if somebody else
++ * faulted in this pte while we released the pte lock.
*/
- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
- vmf->address, &vmf->ptl);
+ if (!pte_map_lock(vmf)) {
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-+ return VM_FAULT_RETRY;
++ ret = VM_FAULT_RETRY;
++ goto out;
+ }
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
-@@ -3010,8 +3027,11 @@ int do_swap_page(struct vm_fault *vmf)
+@@ -3001,10 +3021,13 @@ int do_swap_page(struct vm_fault *vmf)
+ }
+
/*
- * Back out if somebody else already faulted in this pte.
+- * Back out if somebody else already faulted in this pte.
++ * Back out if the VMA has changed in our back during a speculative
++ * page fault or if somebody else already faulted in this pte.
*/
- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
- &vmf->ptl);
+ if (!pte_map_lock(vmf)) {
+ ret = VM_FAULT_RETRY;
-+ mem_cgroup_cancel_charge(page, memcg, false);
-+ goto out_page;
++ goto out_cancel_cgroup;
+ }
if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte)))
goto out_nomap;
-@@ -3140,8 +3160,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
+@@ -3082,8 +3105,9 @@ int do_swap_page(struct vm_fault *vmf)
+ out:
+ return ret;
+ out_nomap:
+- mem_cgroup_cancel_charge(page, memcg, false);
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
++out_cancel_cgroup:
++ mem_cgroup_cancel_charge(page, memcg, false);
+ out_page:
+ unlock_page(page);
+ out_release:
+@@ -3134,8 +3158,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
!mm_forbids_zeropage(vma->vm_mm)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
vma->vm_page_prot));
@@ -142,21 +185,42 @@
if (!pte_none(*vmf->pte))
goto unlock;
ret = check_stable_address_space(vma->vm_mm);
-@@ -3176,8 +3196,11 @@ static int do_anonymous_page(struct vm_fault *vmf)
+@@ -3170,14 +3194,16 @@ static int do_anonymous_page(struct vm_fault *vmf)
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
- &vmf->ptl);
+- if (!pte_none(*vmf->pte))
+ if (!pte_map_lock(vmf)) {
-+ mem_cgroup_cancel_charge(page, memcg, false);
-+ put_page(page);
-+ return VM_FAULT_RETRY;
++ ret = VM_FAULT_RETRY;
+ goto release;
+ }
- if (!pte_none(*vmf->pte))
- goto release;
-
-@@ -3301,8 +3324,9 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
++ if (!pte_none(*vmf->pte))
++ goto unlock_and_release;
+
+ ret = check_stable_address_space(vma->vm_mm);
+ if (ret)
+- goto release;
++ goto unlock_and_release;
+
+ /* Deliver the page fault to userland, check inside PT lock */
+ if (userfaultfd_missing(vma)) {
+@@ -3199,10 +3225,12 @@ static int do_anonymous_page(struct vm_fault *vmf)
+ unlock:
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ return ret;
++unlock_and_release:
++ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ release:
+ mem_cgroup_cancel_charge(page, memcg, false);
+ put_page(page);
+- goto unlock;
++ return ret;
+ oom_free_page:
+ put_page(page);
+ oom:
+@@ -3295,8 +3323,9 @@ static int pte_alloc_one_map(struct vm_fault *vmf)
* pte_none() under vmf->ptl protection when we return to
* alloc_set_pte().
*/