[RFC PATCH 1/2] mm: make lazy MMU mode context-aware
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: 2026-03-25 07:41:38
Also in:
linux-s390, lkml
Subsystem:
filesystems (vfs and infrastructure), memory management, memory management - core, memory mapping, memory mapping - madvise (memory advice), proc filesystem, the rest, vmalloc · Maintainers:
Alexander Viro, Christian Brauner, Andrew Morton, David Hildenbrand, Liam R. Howlett, Lorenzo Stoakes, Linus Torvalds, Uladzislau Rezki
Lazy MMU mode is assumed to be context-independent, in the sense that it does not need any additional information while operating. However, the s390 architecture benefits from knowing the exact page table entries being modified. Introduce lazy_mmu_mode_enable_pte(), which is provided with the process address space and the page table being operated on. This information is required to enable s390-specific optimizations. The function takes parameters that are typically passed to page- table level walkers, which implies that the span of PTE entries never crosses a page table boundary. Architectures that do not require such information simply do not need to define the arch_enter_lazy_mmu_mode_pte() callback. Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com> --- fs/proc/task_mmu.c | 2 +- include/linux/pgtable.h | 42 +++++++++++++++++++++++++++++++++++++++++ mm/madvise.c | 8 ++++---- mm/memory.c | 8 ++++---- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- mm/vmalloc.c | 6 +++--- 7 files changed, 56 insertions(+), 14 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e091931d7ca1..4e3b1987874a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c@@ -2752,7 +2752,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, return 0; } - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(vma->vm_mm, start, end, start_pte); if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a50df42a893f..481b45954800 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h@@ -271,6 +271,44 @@ static inline void lazy_mmu_mode_enable(void) arch_enter_lazy_mmu_mode(); } +#ifndef arch_enter_lazy_mmu_mode_pte +static inline void arch_enter_lazy_mmu_mode_pte(struct mm_struct *mm, + unsigned long addr, + unsigned long end, + pte_t *ptep) +{ + arch_enter_lazy_mmu_mode(); +} +#endif + +/** + * lazy_mmu_mode_enable_pte() - Enable the lazy MMU mode with parameters + * + * Enters a new lazy MMU mode section; if the mode was not already enabled, + * enables it and calls arch_enter_lazy_mmu_mode_pte(). + * + * Must be paired with a call to lazy_mmu_mode_disable(). + * + * Has no effect if called: + * - While paused - see lazy_mmu_mode_pause() + * - In interrupt context + */ +static inline void lazy_mmu_mode_enable_pte(struct mm_struct *mm, + unsigned long addr, + unsigned long end, + pte_t *ptep) +{ + struct lazy_mmu_state *state = ¤t->lazy_mmu_state; + + if (in_interrupt() || state->pause_count > 0) + return; + + VM_WARN_ON_ONCE(state->enable_count == U8_MAX); + + if (state->enable_count++ == 0) + arch_enter_lazy_mmu_mode_pte(mm, addr, end, ptep); +} + /** * lazy_mmu_mode_disable() - Disable the lazy MMU mode. *
@@ -353,6 +391,10 @@ static inline void lazy_mmu_mode_resume(void) } #else static inline void lazy_mmu_mode_enable(void) {} +static inline void lazy_mmu_mode_enable_pte(struct mm_struct *mm, + unsigned long addr, + unsigned long end, + pte_t *ptep) {} static inline void lazy_mmu_mode_disable(void) {} static inline void lazy_mmu_mode_pause(void) {} static inline void lazy_mmu_mode_resume(void) {}
diff --git a/mm/madvise.c b/mm/madvise.c
index dbb69400786d..02edc80f678b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c@@ -451,7 +451,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (!start_pte) return 0; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, addr, end, start_pte); for (; addr < end; pte += nr, addr += nr * PAGE_SIZE) { nr = 1; ptent = ptep_get(pte);
@@ -506,7 +506,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (!start_pte) break; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, addr, end, start_pte); if (!err) nr = 0; continue;
@@ -673,7 +673,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (!start_pte) return 0; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, addr, end, start_pte); for (; addr != end; pte += nr, addr += PAGE_SIZE * nr) { nr = 1; ptent = ptep_get(pte);
@@ -733,7 +733,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (!start_pte) break; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, addr, end, pte); if (!err) nr = 0; continue;
diff --git a/mm/memory.c b/mm/memory.c
index 2f815a34d924..43fa9965fb5f 100644
--- a/mm/memory.c
+++ b/mm/memory.c@@ -1269,7 +1269,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); orig_src_pte = src_pte; orig_dst_pte = dst_pte; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(src_mm, addr, end, src_pte); do { nr = 1;
@@ -1917,7 +1917,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, return addr; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, addr, end, start_pte); do { bool any_skipped = false;
@@ -2875,7 +2875,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, addr, end, mapped_pte); do { BUG_ON(!pte_none(ptep_get(pte))); if (!pfn_modify_allowed(pfn, prot)) {
@@ -3235,7 +3235,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, return -EINVAL; } - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, addr, end, mapped_pte); if (fn) { do {
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c0571445bef7..43a2a65b8caf 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c@@ -233,7 +233,7 @@ static long change_pte_range(struct mmu_gather *tlb, is_private_single_threaded = vma_is_single_threaded_private(vma); flush_tlb_batched_pending(vma->vm_mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(vma->vm_mm, addr, end, pte); do { nr_ptes = 1; oldpte = ptep_get(pte);
diff --git a/mm/mremap.c b/mm/mremap.c
index 2be876a70cc0..ac7f649f3aad 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c@@ -260,7 +260,7 @@ static int move_ptes(struct pagetable_move_control *pmc, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); flush_tlb_batched_pending(vma->vm_mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(mm, old_addr, old_end, old_ptep); for (; old_addr < old_end; old_ptep += nr_ptes, old_addr += nr_ptes * PAGE_SIZE, new_ptep += nr_ptes, new_addr += nr_ptes * PAGE_SIZE) {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 61caa55a4402..5e702bcf03fd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c@@ -108,7 +108,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (!pte) return -ENOMEM; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(&init_mm, addr, end, pte); do { if (unlikely(!pte_none(ptep_get(pte)))) {
@@ -371,7 +371,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long size = PAGE_SIZE; pte = pte_offset_kernel(pmd, addr); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(&init_mm, addr, end, pte); do { #ifdef CONFIG_HUGETLB_PAGE
@@ -538,7 +538,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, if (!pte) return -ENOMEM; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_pte(&init_mm, addr, end, pte); do { struct page *page = pages[*nr];
--
2.51.0