--- v6
+++ v4
@@ -1,142 +1,46 @@
-With TRANSPARENT_HUGEPAGE_PUD enabled the kernel can find huge PUD entries.
-Add a helper to move huge PUD entries on mremap().
-
-This will be used by a later patch to optimize mremap of PUD_SIZE aligned
-level 4 PTE mapped address
-
-This also make sure we support mremap on huge PUD entries even with
-CONFIG_HAVE_MOVE_PUD disabled.
+pmd/pud_populate is the right interface to be used to set the respective
+page table entries. Some architectures like ppc64 do assume that set_pmd/pud_at
+can only be used to set a hugepage PTE. Since we are not setting up a hugepage
+PTE here, use the pmd/pud_populate interface.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
- mm/mremap.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 73 insertions(+), 7 deletions(-)
+ mm/mremap.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
-index ec8f840399ed..1d6fadbd4820 100644
+index ec8f840399ed..574287f9bb39 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
-@@ -324,10 +324,62 @@ static inline bool move_normal_pud(struct vm_area_struct *vma,
- }
- #endif
+@@ -26,6 +26,7 @@
-+
-+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PUD
-+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
-+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
-+{
-+ spinlock_t *old_ptl, *new_ptl;
-+ struct mm_struct *mm = vma->vm_mm;
-+ pud_t pud;
-+
-+ /*
-+ * The destination pud shouldn't be established, free_pgtables()
-+ * should have released it.
-+ */
-+ if (WARN_ON_ONCE(!pud_none(*new_pud)))
-+ return false;
-+
-+ /*
-+ * We don't have to worry about the ordering of src and dst
-+ * ptlocks because exclusive mmap_lock prevents deadlock.
-+ */
-+ old_ptl = pud_lock(vma->vm_mm, old_pud);
-+ new_ptl = pud_lockptr(mm, new_pud);
-+ if (new_ptl != old_ptl)
-+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
-+
-+ /* Clear the pud */
-+ pud = *old_pud;
-+ pud_clear(old_pud);
-+
-+ VM_BUG_ON(!pud_none(*new_pud));
-+
-+ /* Set the new pud */
-+ /* mark soft_ditry when we add pud level soft dirty support */
-+ set_pud_at(mm, new_addr, new_pud, pud);
-+ flush_pud_tlb_range(vma, old_addr, old_addr + HPAGE_PUD_SIZE);
-+ if (new_ptl != old_ptl)
-+ spin_unlock(new_ptl);
-+ spin_unlock(old_ptl);
-+
-+ return true;
-+}
-+#else
-+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
-+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
-+{
-+ WARN_ON_ONCE(1);
-+ return false;
-+
-+}
-+#endif
-+
- enum pgt_entry {
- NORMAL_PMD,
- HPAGE_PMD,
- NORMAL_PUD,
-+ HPAGE_PUD,
- };
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
++#include <asm/pgalloc.h>
- /*
-@@ -347,6 +399,7 @@ static __always_inline unsigned long get_extent(enum pgt_entry entry,
- mask = PMD_MASK;
- size = PMD_SIZE;
- break;
-+ case HPAGE_PUD:
- case NORMAL_PUD:
- mask = PUD_MASK;
- size = PUD_SIZE;
-@@ -395,6 +448,11 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
- move_huge_pmd(vma, old_addr, new_addr, old_entry,
- new_entry);
- break;
-+ case HPAGE_PUD:
-+ moved = move_huge_pud(vma, old_addr, new_addr, old_entry,
-+ new_entry);
-+ break;
-+
- default:
- WARN_ON_ONCE(1);
- break;
-@@ -414,6 +472,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
- unsigned long extent, old_end;
- struct mmu_notifier_range range;
- pmd_t *old_pmd, *new_pmd;
-+ pud_t *old_pud, *new_pud;
+ #include "internal.h"
- old_end = old_addr + len;
- flush_cache_range(vma, old_addr, old_end);
-@@ -429,15 +488,22 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
- * PUD level if possible.
- */
- extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
-- if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
-- pud_t *old_pud, *new_pud;
+@@ -257,9 +258,8 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+ pmd_clear(old_pmd);
-- old_pud = get_old_pud(vma->vm_mm, old_addr);
-- if (!old_pud)
-+ old_pud = get_old_pud(vma->vm_mm, old_addr);
-+ if (!old_pud)
-+ continue;
-+ new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
-+ if (!new_pud)
-+ break;
-+ if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) {
-+ if (extent == HPAGE_PUD_SIZE) {
-+ move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr,
-+ old_pud, new_pud, need_rmap_locks);
-+ /* We ignore and continue on error? */
- continue;
-- new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
-- if (!new_pud)
-- break;
-+ }
-+ } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
-+
- if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
- old_pud, new_pud, need_rmap_locks))
- continue;
+ VM_BUG_ON(!pmd_none(*new_pmd));
++ pmd_populate(mm, new_pmd, (pgtable_t)pmd_page_vaddr(pmd));
+
+- /* Set the new pmd */
+- set_pmd_at(mm, new_addr, new_pmd, pmd);
+ flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+@@ -306,8 +306,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
+
+ VM_BUG_ON(!pud_none(*new_pud));
+
+- /* Set the new pud */
+- set_pud_at(mm, new_addr, new_pud, pud);
++ pud_populate(mm, new_pud, (pmd_t *)pud_page_vaddr(pud));
+ flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
--
-2.31.1
+2.30.2