[PATCH 3/3] powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP
From: Nicholas Piggin <npiggin@gmail.com>
Date: 2019-06-10 03:15:56
Subsystem:
documentation, linux for powerpc (32-bit and 64-bit), the rest · Maintainers:
Jonathan Corbet, Madhavan Srinivasan, Michael Ellerman, Linus Torvalds
This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required
page table functions.
This enables huge (2MB and 1GB) ioremap mappings. I don't have a
benchmark for this change, but huge vmap will be used by a later core
kernel change to enable huge vmalloc memory mappings. This improves
cached `git diff` performance by about 5% on a 2-node POWER9 with 32MB
size dentry cache hash.
Profiling git diff dTLB misses with a vanilla kernel:
81.75% git [kernel.vmlinux] [k] __d_lookup_rcu
7.21% git [kernel.vmlinux] [k] strncpy_from_user
1.77% git [kernel.vmlinux] [k] find_get_entry
1.59% git [kernel.vmlinux] [k] kmem_cache_free
40,168 dTLB-miss
0.100342754 seconds time elapsed
With powerpc huge vmalloc:
2,987 dTLB-miss
0.095933138 seconds time elapsed
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
.../admin-guide/kernel-parameters.txt | 2 +-
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/book3s/64/pgtable.h | 8 ++
arch/powerpc/mm/book3s64/radix_pgtable.c | 100 ++++++++++++++++++
4 files changed, 110 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..a4c3538857e9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt@@ -2927,7 +2927,7 @@ register save and restore. The kernel will only save legacy floating-point registers on task switch. - nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. + nohugeiomap [KNL,x86,PPC] Disable kernel huge I/O mappings. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1.
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8c1c636308c8..f0e5b38d52e8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig@@ -167,6 +167,7 @@ config PPC select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if PPC32 select HAVE_ARCH_KGDB
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ccf00a8b98c6..5faceeefd9f9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h@@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end; #define VMALLOC_START __vmalloc_start #define VMALLOC_END __vmalloc_end +static inline unsigned int ioremap_max_order(void) +{ + if (radix_enabled()) + return PUD_SHIFT; + return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */ +} +#define IOREMAP_MAX_ORDER ioremap_max_order() + extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; extern unsigned long __kernel_io_start;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index db993bc1aef3..35cf04bbd50b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c@@ -1124,6 +1124,106 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); } +int __init arch_ioremap_pud_supported(void) +{ + /* HPT does not cope with large pages in the vmalloc area */ + return radix_enabled(); +} + +int __init arch_ioremap_pmd_supported(void) +{ + return radix_enabled(); +} + +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; +} + +int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pud; + pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot); + + if (!radix_enabled()) + return 0; + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud); + + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (pud_huge(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd; + int i; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + pud_clear(pud); + + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte; + pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(&init_mm, pte); + } + } + + pmd_free(&init_mm, pmd); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pmd; + pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot); + + if (!radix_enabled()) + return 0; + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd); + + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (pmd_huge(*pmd)) { + pmd_clear(pmd); + return 1; + } + + return 0; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + + pte_free_kernel(&init_mm, pte); + + return 1; +} + int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) {
--
2.20.1