--- v6
+++ v3
@@ -1,225 +1,281 @@
-For a pte entry we will have _PAGE_PTE set. Our pte page
-address have a minimum alignment requirement of HUGEPD_SHIFT_MASK + 1.
-We use the lower 7 bits to indicate hugepd. ie.
+W.r.t hugetlb, we support two format for pmd. With book3s_64 and
+64K linux page size, we can have pte at the pmd level. Hence we
+don't need to support hugepd there. For everything else hugepd
+is supported and pmd_huge is (0).
-For pmd and pgd we can find:
-1) _PAGE_PTE set pte -> indicate PTE
-2) bits [2..6] non zero -> indicate hugepd.
- They also encode the size. We skip bit 1 (_PAGE_PRESENT).
-3) othewise pointer to next table.
-
-Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
- arch/powerpc/include/asm/book3s/64/hash-4k.h | 9 ++++++---
- arch/powerpc/include/asm/book3s/64/hash-64k.h | 23 +++++++++--------------
- arch/powerpc/include/asm/book3s/64/hash.h | 13 +++++++------
- arch/powerpc/include/asm/book3s/64/pgtable.h | 3 +--
- arch/powerpc/include/asm/pte-common.h | 5 +++++
- arch/powerpc/mm/hugetlbpage.c | 4 ++--
- arch/powerpc/mm/pgtable.c | 4 ++++
- arch/powerpc/mm/pgtable_64.c | 7 +------
- 8 files changed, 35 insertions(+), 33 deletions(-)
+ arch/powerpc/include/asm/book3s/64/hash-4k.h | 31 +++++++++++++++
+ arch/powerpc/include/asm/book3s/64/hash-64k.h | 51 +++++++++++++++++++++++++
+ arch/powerpc/include/asm/nohash/pgtable.h | 25 +++++++++++++
+ arch/powerpc/include/asm/page.h | 42 ++-------------------
+ arch/powerpc/mm/hugetlbpage.c | 54 ---------------------------
+ 5 files changed, 111 insertions(+), 92 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
-index b4d25529d179..e59832c94609 100644
+index 75e8b9326e4b..b4d25529d179 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
-@@ -116,10 +116,13 @@ static inline int pgd_huge(pgd_t pgd)
- static inline int hugepd_ok(hugepd_t hpd)
- {
- /*
+@@ -93,6 +93,37 @@ extern struct page *pgd_page(pgd_t pgd);
+ #define remap_4k_pfn(vma, addr, pfn, prot) \
+ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
+
++#ifdef CONFIG_HUGETLB_PAGE
++/*
++ * For 4k page size, we support explicit hugepage via hugepd
++ */
++static inline int pmd_huge(pmd_t pmd)
++{
++ return 0;
++}
++
++static inline int pud_huge(pud_t pud)
++{
++ return 0;
++}
++
++static inline int pgd_huge(pgd_t pgd)
++{
++ return 0;
++}
++#define pgd_huge pgd_huge
++
++static inline int hugepd_ok(hugepd_t hpd)
++{
++ /*
++ * hugepd pointer, bottom two bits == 00 and next 4 bits
++ * indicate size of table
++ */
++ return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
++}
++#define is_hugepd(hpd) (hugepd_ok(hpd))
++#endif
++
+ #endif /* !__ASSEMBLY__ */
+
+ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */
+diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
+index f46fbd6cd837..20865ca7a179 100644
+--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
++++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
+@@ -119,6 +119,57 @@ static inline bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
+ #define pgd_pte(pgd) (pud_pte(((pud_t){ pgd })))
+ #define pte_pgd(pte) ((pgd_t)pte_pud(pte))
+
++#ifdef CONFIG_HUGETLB_PAGE
++/*
++ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
++ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
++ *
++ * Defined in such a way that we can optimize away code block at build time
++ * if CONFIG_HUGETLB_PAGE=n.
++ */
++static inline int pmd_huge(pmd_t pmd)
++{
++ /*
++ * leaf pte for huge page, bottom two bits != 00
++ */
++ return ((pmd_val(pmd) & 0x3) != 0x0);
++}
++
++static inline int pud_huge(pud_t pud)
++{
++ /*
++ * leaf pte for huge page, bottom two bits != 00
++ */
++ return ((pud_val(pud) & 0x3) != 0x0);
++}
++
++static inline int pgd_huge(pgd_t pgd)
++{
++ /*
++ * leaf pte for huge page, bottom two bits != 00
++ */
++ return ((pgd_val(pgd) & 0x3) != 0x0);
++}
++#define pgd_huge pgd_huge
++
++#ifdef CONFIG_DEBUG_VM
++extern int hugepd_ok(hugepd_t hpd);
++#define is_hugepd(hpd) (hugepd_ok(hpd))
++#else
++/*
++ * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
++ * need to setup hugepage directory for them. Our pte and page directory format
++ * enable us to have this enabled.
++ */
++static inline int hugepd_ok(hugepd_t hpd)
++{
++ return 0;
++}
++#define is_hugepd(pdep) 0
++#endif /* CONFIG_DEBUG_VM */
++
++#endif /* CONFIG_HUGETLB_PAGE */
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */
+diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
+index c0c41a2409d2..1263c22d60d8 100644
+--- a/arch/powerpc/include/asm/nohash/pgtable.h
++++ b/arch/powerpc/include/asm/nohash/pgtable.h
+@@ -223,5 +223,30 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot);
+ #define __HAVE_PHYS_MEM_ACCESS_PROT
+
++#ifdef CONFIG_HUGETLB_PAGE
++static inline int hugepd_ok(hugepd_t hpd)
++{
++ return (hpd.pd > 0);
++}
++
++static inline int pmd_huge(pmd_t pmd)
++{
++ return 0;
++}
++
++static inline int pud_huge(pud_t pud)
++{
++ return 0;
++}
++
++static inline int pgd_huge(pgd_t pgd)
++{
++ return 0;
++}
++#define pgd_huge pgd_huge
++
++#define is_hugepd(hpd) (hugepd_ok(hpd))
++#endif
++
+ #endif /* __ASSEMBLY__ */
+ #endif
+diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
+index 9c3211eb487c..f63b2761cdd0 100644
+--- a/arch/powerpc/include/asm/page.h
++++ b/arch/powerpc/include/asm/page.h
+@@ -386,45 +386,11 @@ typedef unsigned long pgprot_t;
+
+ typedef struct { signed long pd; } hugepd_t;
+
+-#ifdef CONFIG_HUGETLB_PAGE
+-#ifdef CONFIG_PPC_BOOK3S_64
+-#ifdef CONFIG_PPC_64K_PAGES
+-/*
+- * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
+- * need to setup hugepage directory for them. Our pte and page directory format
+- * enable us to have this enabled. But to avoid errors when implementing new
+- * features disable hugepd for 64K. We enable a debug version here, So we catch
+- * wrong usage.
+- */
+-#ifdef CONFIG_DEBUG_VM
+-extern int hugepd_ok(hugepd_t hpd);
+-#else
+-#define hugepd_ok(x) (0)
+-#endif
+-#else
+-static inline int hugepd_ok(hugepd_t hpd)
+-{
+- /*
- * hugepd pointer, bottom two bits == 00 and next 4 bits
- * indicate size of table
-+ * if it is not a pte and have hugepd shift mask
-+ * set, then it is a hugepd directory pointer
- */
+- */
- return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
-+ if (!(hpd.pd & _PAGE_PTE) &&
-+ ((hpd.pd & HUGEPD_SHIFT_MASK) != 0))
-+ return true;
-+ return false;
- }
- #define is_hugepd(hpd) (hugepd_ok(hpd))
- #endif
-diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
-index 7570677c11c3..52110d7af659 100644
---- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
-+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
-@@ -130,25 +130,25 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
- static inline int pmd_huge(pmd_t pmd)
- {
- /*
-- * leaf pte for huge page, bottom two bits != 00
-+ * leaf pte for huge page
- */
-- return ((pmd_val(pmd) & 0x3) != 0x0);
-+ return !!(pmd_val(pmd) & _PAGE_PTE);
- }
-
- static inline int pud_huge(pud_t pud)
- {
- /*
-- * leaf pte for huge page, bottom two bits != 00
-+ * leaf pte for huge page
- */
-- return ((pud_val(pud) & 0x3) != 0x0);
-+ return !!(pud_val(pud) & _PAGE_PTE);
- }
-
- static inline int pgd_huge(pgd_t pgd)
- {
- /*
-- * leaf pte for huge page, bottom two bits != 00
-+ * leaf pte for huge page
- */
-- return ((pgd_val(pgd) & 0x3) != 0x0);
-+ return !!(pgd_val(pgd) & _PAGE_PTE);
- }
- #define pgd_huge pgd_huge
-
-@@ -236,10 +236,8 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
- */
- static inline int pmd_trans_huge(pmd_t pmd)
- {
-- /*
-- * leaf pte for huge page, bottom two bits != 00
-- */
-- return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
-+ return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) ==
-+ (_PAGE_PTE | _PAGE_THP_HUGE));
- }
-
- static inline int pmd_trans_splitting(pmd_t pmd)
-@@ -251,10 +249,7 @@ static inline int pmd_trans_splitting(pmd_t pmd)
-
- static inline int pmd_large(pmd_t pmd)
- {
-- /*
-- * leaf pte for huge page, bottom two bits != 00
-- */
-- return ((pmd_val(pmd) & 0x3) != 0x0);
-+ return !!(pmd_val(pmd) & _PAGE_PTE);
- }
-
- static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
-index 42e1273adad1..8b929e531758 100644
---- a/arch/powerpc/include/asm/book3s/64/hash.h
-+++ b/arch/powerpc/include/asm/book3s/64/hash.h
-@@ -14,11 +14,12 @@
- * We could create separate kernel read-only if we used the 3 PP bits
- * combinations that newer processors provide but we currently don't.
- */
--#define _PAGE_PRESENT 0x00001 /* software: pte contains a translation */
--#define _PAGE_USER 0x00002 /* matches one of the PP bits */
-+#define _PAGE_PTE 0x00001
-+#define _PAGE_PRESENT 0x00002 /* software: pte contains a translation */
- #define _PAGE_BIT_SWAP_TYPE 2
--#define _PAGE_EXEC 0x00004 /* No execute on POWER4 and newer (we invert) */
--#define _PAGE_GUARDED 0x00008
-+#define _PAGE_USER 0x00004 /* matches one of the PP bits */
-+#define _PAGE_EXEC 0x00008 /* No execute on POWER4 and newer (we invert) */
-+#define _PAGE_GUARDED 0x00010
- /* We can derive Memory coherence from _PAGE_NO_CACHE */
- #define _PAGE_COHERENT 0x0
- #define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */
-@@ -49,7 +50,7 @@
- */
- #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
- _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-- _PAGE_THP_HUGE)
-+ _PAGE_THP_HUGE | _PAGE_PTE)
-
- #ifdef CONFIG_PPC_64K_PAGES
- #include <asm/book3s/64/hash-64k.h>
-@@ -135,7 +136,7 @@
- * pgprot changes
- */
- #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-- _PAGE_ACCESSED | _PAGE_SPECIAL)
-+ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE)
- /*
- * Mask of bits returned by pte_pgprot()
- */
-diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
-index f2ace2cac7bb..bb97b6a52b84 100644
---- a/arch/powerpc/include/asm/book3s/64/pgtable.h
-+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
-@@ -213,8 +213,7 @@ static inline int pmd_protnone(pmd_t pmd)
-
- static inline pmd_t pmd_mkhuge(pmd_t pmd)
- {
-- /* Do nothing, mk_pmd() does this part. */
-- return pmd;
-+ return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_THP_HUGE));
- }
-
- #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
-index 71537a319fc8..1ec67b043065 100644
---- a/arch/powerpc/include/asm/pte-common.h
-+++ b/arch/powerpc/include/asm/pte-common.h
-@@ -40,6 +40,11 @@
- #else
- #define _PAGE_RW 0
- #endif
-+
-+#ifndef _PAGE_PTE
-+#define _PAGE_PTE 0
-+#endif
-+
- #ifndef _PMD_PRESENT_MASK
- #define _PMD_PRESENT_MASK _PMD_PRESENT
- #endif
+-}
+-#endif
+-#else
+-static inline int hugepd_ok(hugepd_t hpd)
+-{
+- return (hpd.pd > 0);
+-}
+-#endif
+-
+-#define is_hugepd(hpd) (hugepd_ok(hpd))
+-#define pgd_huge pgd_huge
+-int pgd_huge(pgd_t pgd);
+-#else /* CONFIG_HUGETLB_PAGE */
+-#define is_hugepd(pdep) 0
+-#define pgd_huge(pgd) 0
++#ifndef CONFIG_HUGETLB_PAGE
++#define is_hugepd(pdep) (0)
++#define pgd_huge(pgd) (0)
+ #endif /* CONFIG_HUGETLB_PAGE */
++
+ #define __hugepd(x) ((hugepd_t) { (x) })
+
+ struct page;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
-index bc72e542a83e..61b8b7ccea4f 100644
+index 9833fee493ec..f20c1a9065da 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
-@@ -894,8 +894,8 @@ void flush_dcache_icache_hugepage(struct page *page)
- * We have 4 cases for pgds and pmds:
- * (1) invalid (all zeroes)
- * (2) pointer to next table, as normal; bottom 6 bits == 0
-- * (3) leaf pte for huge page, bottom two bits != 00
-- * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
-+ * (3) leaf pte for huge page _PAGE_PTE set
-+ * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
- *
- * So long as we atomically load page table pointers we are safe against teardown,
- * we can follow the address down to the the page and take a ref on it.
-diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
-index 83dfcb55ffef..83dfd7925c72 100644
---- a/arch/powerpc/mm/pgtable.c
-+++ b/arch/powerpc/mm/pgtable.c
-@@ -179,6 +179,10 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
- */
- VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
- (_PAGE_PRESENT | _PAGE_USER));
-+ /*
-+ * Add the pte bit when tryint set a pte
-+ */
-+ pte = __pte(pte_val(pte) | _PAGE_PTE);
-
- /* Note: mm->context.id might not yet have been assigned as
- * this context might not have been activated yet when this
-diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
-index d42dd289abfe..ea6bc31debb0 100644
---- a/arch/powerpc/mm/pgtable_64.c
-+++ b/arch/powerpc/mm/pgtable_64.c
-@@ -765,13 +765,8 @@ static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
- pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+@@ -53,43 +53,6 @@ static unsigned nr_gpages;
+
+ #define hugepd_none(hpd) ((hpd).pd == 0)
+
+-#ifdef CONFIG_PPC_BOOK3S_64
+-/*
+- * At this point we do the placement change only for BOOK3S 64. This would
+- * possibly work on other subarchs.
+- */
+-
+-/*
+- * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
+- * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+- *
+- * Defined in such a way that we can optimize away code block at build time
+- * if CONFIG_HUGETLB_PAGE=n.
+- */
+-int pmd_huge(pmd_t pmd)
+-{
+- /*
+- * leaf pte for huge page, bottom two bits != 00
+- */
+- return ((pmd_val(pmd) & 0x3) != 0x0);
+-}
+-
+-int pud_huge(pud_t pud)
+-{
+- /*
+- * leaf pte for huge page, bottom two bits != 00
+- */
+- return ((pud_val(pud) & 0x3) != 0x0);
+-}
+-
+-int pgd_huge(pgd_t pgd)
+-{
+- /*
+- * leaf pte for huge page, bottom two bits != 00
+- */
+- return ((pgd_val(pgd) & 0x3) != 0x0);
+-}
+-
+ #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM)
+ /*
+ * This enables us to catch the wrong page directory format
+@@ -108,23 +71,6 @@ int hugepd_ok(hugepd_t hpd)
+ }
+ #endif
+
+-#else
+-int pmd_huge(pmd_t pmd)
+-{
+- return 0;
+-}
+-
+-int pud_huge(pud_t pud)
+-{
+- return 0;
+-}
+-
+-int pgd_huge(pgd_t pgd)
+-{
+- return 0;
+-}
+-#endif
+-
+ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
- unsigned long pmdv;
-- /*
-- * For a valid pte, we would have _PAGE_PRESENT always
-- * set. We use this to check THP page at pmd level.
-- * leaf pte for huge page, bottom two bits != 00
-- */
-+
- pmdv = pfn << PTE_RPN_SHIFT;
-- pmdv |= _PAGE_THP_HUGE;
- return pmd_set_protbits(__pmd(pmdv), pgprot);
- }
-
+ /* Only called for hugetlbfs pages, hence can ignore THP */
--
2.5.0