Thread (59 messages) 59 messages, 8 authors, 2025-06-12

Re: [PATCH 03/12] mm/pagewalk: Skip dax pages in pagewalk

From: Dan Williams <hidden>
Date: 2025-06-05 01:59:28
Also in: dri-devel, linux-arm-kernel, linux-cxl, linux-ext4, linux-fsdevel, linux-mm, linux-riscv, linux-xfs, lkml, loongarch, nvdimm
Subsystem: memory management, memory management - core, the rest · Maintainers: Andrew Morton, David Hildenbrand, Linus Torvalds

Alistair Popple wrote:
quoted hunk ↗ jump to hunk
Previously dax pages were skipped by the pagewalk code as pud_special() or
vm_normal_page{_pmd}() would be false for DAX pages. Now that dax pages are
refcounted normally that is no longer the case, so add explicit checks to
skip them.

Signed-off-by: Alistair Popple <apopple@nvidia.com>
---
 include/linux/memremap.h | 11 +++++++++++
 mm/pagewalk.c            | 12 ++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 4aa1519..54e8b57 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -198,6 +198,17 @@ static inline bool folio_is_fsdax(const struct folio *folio)
 	return is_fsdax_page(&folio->page);
 }
 
+static inline bool is_devdax_page(const struct page *page)
+{
+	return is_zone_device_page(page) &&
+		page_pgmap(page)->type == MEMORY_DEVICE_GENERIC;
+}
+
+static inline bool folio_is_devdax(const struct folio *folio)
+{
+	return is_devdax_page(&folio->page);
+}
+
 #ifdef CONFIG_ZONE_DEVICE
 void zone_device_page_init(struct page *page);
 void *memremap_pages(struct dev_pagemap *pgmap, int nid);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index e478777..0dfb9c2 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -884,6 +884,12 @@ struct folio *folio_walk_start(struct folio_walk *fw,
 		 * support PUD mappings in VM_PFNMAP|VM_MIXEDMAP VMAs.
 		 */
 		page = pud_page(pud);
+
+		if (is_devdax_page(page)) {
+			spin_unlock(ptl);
+			goto not_found;
+		}
+
 		goto found;
 	}
 
@@ -911,7 +917,8 @@ struct folio *folio_walk_start(struct folio_walk *fw,
 			goto pte_table;
 		} else if (pmd_present(pmd)) {
 			page = vm_normal_page_pmd(vma, addr, pmd);
-			if (page) {
+			if (page && !is_devdax_page(page) &&
+			    !is_fsdax_page(page)) {
It just looks awkward to say "yup, normal page, but not *that*
'normal'".

What about something like the below? Either way you can add:

Reviewed-by: Dan Williams <redacted>
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 12d96659e8b4..4e549669166b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2471,6 +2471,27 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma,
 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 				pmd_t pmd);
 
+/* return normal pages backed by the page allocator */
+static inline struct page *vm_normal_gfp_pmd(struct vm_area_struct *vma,
+					     unsigned long addr, pmd_t pmd)
+{
+	struct page *page = vm_normal_page_pmd(vma, addr, pmd);
+
+	if (!is_devdax_page(page) && !is_fsdax_page(page))
+		return page;
+	return NULL;
+}
+
+static inline struct page *vm_normal_gfp_pte(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t pte)
+{
+	struct page *page = vm_normal_page(vma, addr, pte);
+
+	if (!is_devdax_page(page) && !is_fsdax_page(page))
+		return page;
+	return NULL;
+}
+
 void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		  unsigned long size);
 void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index cca170fe5be5..54bfece05323 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -914,9 +914,8 @@ struct folio *folio_walk_start(struct folio_walk *fw,
 			spin_unlock(ptl);
 			goto pte_table;
 		} else if (pmd_present(pmd)) {
-			page = vm_normal_page_pmd(vma, addr, pmd);
-			if (page && !is_devdax_page(page) &&
-			    !is_fsdax_page(page)) {
+			page = vm_normal_gfp_pmd(vma, addr, pmd);
+			if (page) {
 				goto found;
 			} else if ((flags & FW_ZEROPAGE) &&
 				    is_huge_zero_pmd(pmd)) {
@@ -949,9 +948,8 @@ struct folio *folio_walk_start(struct folio_walk *fw,
 	fw->pte = pte;
 
 	if (pte_present(pte)) {
-		page = vm_normal_page(vma, addr, pte);
-		if (page && !is_devdax_page(page) &&
-		    !is_fsdax_page(page))
+		page = vm_normal_gfp_pte(vma, addr, pte);
+		if (page)
 			goto found;
 		if ((flags & FW_ZEROPAGE) &&
 		    is_zero_pfn(pte_pfn(pte))) {

Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help