[RFC PATCH RESEND 06/28] mm: mark VMA as locked whenever vma->vm_flags are modified
From: Suren Baghdasaryan <surenb@google.com>
Date: 2022-09-01 17:36:45
Also in:
linux-arm-kernel, linux-mm, lkml
Subsystem:
filesystems (vfs and infrastructure), memory management, memory management - userfaultfd, memory mapping, memory mapping - madvise (memory advice), proc filesystem, the rest · Maintainers:
Alexander Viro, Christian Brauner, Andrew Morton, Mike Rapoport, Liam R. Howlett, Lorenzo Stoakes, David Hildenbrand, Linus Torvalds
VMA flag modifications should be done under VMA lock to prevent concurrent page fault handling in that area. Signed-off-by: Suren Baghdasaryan <surenb@google.com> --- fs/proc/task_mmu.c | 1 + fs/userfaultfd.c | 6 ++++++ mm/madvise.c | 1 + mm/mlock.c | 2 ++ mm/mmap.c | 1 + mm/mprotect.c | 1 + 6 files changed, 12 insertions(+)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4e0023643f8b..ceffa5c2c650 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c@@ -1285,6 +1285,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; + vma_mark_locked(vma); vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); }
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 175de70e3adf..fe557b3d1c07 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c@@ -620,6 +620,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, mmap_write_lock(mm); for (vma = mm->mmap; vma; vma = vma->vm_next) if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { + vma_mark_locked(vma); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; vma->vm_flags &= ~__VM_UFFD_FLAGS; }
@@ -653,6 +654,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) octx = vma->vm_userfaultfd_ctx.ctx; if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { + vma_mark_locked(vma); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; vma->vm_flags &= ~__VM_UFFD_FLAGS; return 0;
@@ -734,6 +736,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, atomic_inc(&ctx->mmap_changing); } else { /* Drop uffd context if remap feature not enabled */ + vma_mark_locked(vma); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; vma->vm_flags &= ~__VM_UFFD_FLAGS; }
@@ -891,6 +894,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) vma = prev; else prev = vma; + vma_mark_locked(vma); vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; }
@@ -1449,6 +1453,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ + vma_mark_locked(vma); vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx.ctx = ctx;
@@ -1630,6 +1635,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ + vma_mark_locked(vma); vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
diff --git a/mm/madvise.c b/mm/madvise.c
index 5f0f0948a50e..a173f0025abd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c@@ -181,6 +181,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, /* * vm_flags is protected by the mmap_lock held in write mode. */ + vma_mark_locked(vma); vma->vm_flags = new_flags; if (!vma->vm_file) { error = replace_anon_vma_name(vma, anon_name);
diff --git a/mm/mlock.c b/mm/mlock.c
index b14e929084cc..f62e1a4d05f2 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c@@ -380,6 +380,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma, */ if (newflags & VM_LOCKED) newflags |= VM_IO; + vma_mark_locked(vma); WRITE_ONCE(vma->vm_flags, newflags); lru_add_drain();
@@ -456,6 +457,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) { /* No work to do, and mlocking twice would be wrong */ + vma_mark_locked(vma); vma->vm_flags = newflags; } else { mlock_vma_pages_range(vma, start, end, newflags);
diff --git a/mm/mmap.c b/mm/mmap.c
index 693e6776be39..f89c9b058105 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c@@ -1818,6 +1818,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, out: perf_event_mmap(vma); + vma_mark_locked(vma); vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
diff --git a/mm/mprotect.c b/mm/mprotect.c
index bc6bddd156ca..df47fc21b0e4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c@@ -621,6 +621,7 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, * vm_flags and vm_page_prot are protected by the mmap_lock * held in write mode. */ + vma_mark_locked(vma); vma->vm_flags = newflags; /* * We want to check manually if we can change individual PTEs writable
--
2.37.2.789.g6183377224-goog