Re: [PATCH v4 3/7] mm/huge_memory: respect MADV_COLLAPSE with PR_THP_DISABLE_EXCEPT_ADVISED
From: Zi Yan <ziy@nvidia.com>
Date: 2025-08-14 15:15:01
Also in:
linux-fsdevel, linux-mm, lkml
On 13 Aug 2025, at 9:55, Usama Arif wrote:
quoted hunk ↗ jump to hunk
From: David Hildenbrand <redacted> Let's allow for making MADV_COLLAPSE succeed on areas that neither have VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled unless explicitly advised (PR_THP_DISABLE_EXCEPT_ADVISED). MADV_COLLAPSE is a clear advice that we want to collapse. Note that we still respect the VM_NOHUGEPAGE flag, just like MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED, including for shmem. Co-developed-by: Usama Arif <redacted> Signed-off-by: Usama Arif <redacted> Signed-off-by: David Hildenbrand <redacted> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> Reviewed-by: Lorenzo Stoakes <redacted> --- include/linux/huge_mm.h | 8 +++++++- include/uapi/linux/prctl.h | 2 +- mm/huge_memory.c | 5 +++-- mm/memory.c | 6 ++++-- mm/shmem.c | 2 +- 5 files changed, 16 insertions(+), 7 deletions(-)diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 92ea0b9771fae..1ac0d06fb3c1d 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h@@ -329,7 +329,7 @@ struct thpsize { * through madvise or prctl. */ static inline bool vma_thp_disabled(struct vm_area_struct *vma, - vm_flags_t vm_flags) + vm_flags_t vm_flags, bool forced_collapse) { /* Are THPs disabled for this VMA? */ if (vm_flags & VM_NOHUGEPAGE)@@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma, */ if (vm_flags & VM_HUGEPAGE) return false; + /* + * Forcing a collapse (e.g., madv_collapse), is a clear advice to + * use THPs. + */ + if (forced_collapse) + return false; return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm); }diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 150b6deebfb1e..51c4e8c82b1e9 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h@@ -185,7 +185,7 @@ struct prctl_mm_map { #define PR_SET_THP_DISABLE 41 /* * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / - * VM_HUGEPAGE). + * VM_HUGEPAGE, MADV_COLLAPSE). */ # define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) #define PR_GET_THP_DISABLE 42diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9c716be949cbf..1eca2d543449c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c@@ -104,7 +104,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, { const bool smaps = type == TVA_SMAPS; const bool in_pf = type == TVA_PAGEFAULT; - const bool enforce_sysfs = type != TVA_FORCED_COLLAPSE; + const bool forced_collapse = type == TVA_FORCED_COLLAPSE; + const bool enforce_sysfs = !forced_collapse; unsigned long supported_orders; /* Check the intersection of requested and supported orders. */@@ -122,7 +123,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, if (!vma->vm_mm) /* vdso */ return 0; - if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags)) + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags, forced_collapse)) return 0; /* khugepaged doesn't collapse DAX vma, but page fault is fine. */diff --git a/mm/memory.c b/mm/memory.c index 7b1e8f137fa3f..e4f533655305a 100644 --- a/mm/memory.c +++ b/mm/memory.c@@ -5332,9 +5332,11 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa * It is too late to allocate a small folio, we already have a large * folio in the pagecache: especially s390 KVM cannot tolerate any * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any - * PMD mappings if THPs are disabled. + * PMD mappings if THPs are disabled. As we already have a THP ... + * behave as if we are forcing a collapse.
What does the “...” mean here? Shouldn’t it be: As we already have a THP, behave as if we are forcing a collapse.
quoted hunk ↗ jump to hunk
*/ - if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags, + /* forced_collapse=*/ true)) return ret; if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))diff --git a/mm/shmem.c b/mm/shmem.c index e2c76a30802b6..d945de3a7f0e7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c@@ -1817,7 +1817,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, vm_flags_t vm_flags = vma ? vma->vm_flags : 0; unsigned int global_orders; - if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) + if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force))) return 0; global_orders = shmem_huge_global_enabled(inode, index, write_end,-- 2.47.3
Otherwise, LGTM. Reviewed-by: Zi Yan [off-list ref] Best Regards, Yan, Zi