From d909f9109c301f4e9e41678025c45d719ab8f7d7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 10 Jun 2019 13:08:18 +1000 Subject: [PATCH] powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required page table functions. This enables huge (2MB and 1GB) ioremap mappings. I don't have a benchmark for this change, but huge vmap will be used by a later core kernel change to enable huge vmalloc memory mappings. This improves cached `git diff` performance by about 5% on a 2-node POWER9 with 32MB size dentry cache hash. Profiling git diff dTLB misses with a vanilla kernel: 81.75% git [kernel.vmlinux] [k] __d_lookup_rcu 7.21% git [kernel.vmlinux] [k] strncpy_from_user 1.77% git [kernel.vmlinux] [k] find_get_entry 1.59% git [kernel.vmlinux] [k] kmem_cache_free 40,168 dTLB-miss 0.100342754 seconds time elapsed With powerpc huge vmalloc: 2,987 dTLB-miss 0.095933138 seconds time elapsed Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 8 ++ arch/powerpc/mm/book3s64/radix_pgtable.c | 100 ++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 138f6664b2e2..a4c3538857e9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2927,7 +2927,7 @@ register save and restore. The kernel will only save legacy floating-point registers on task switch. - nohugeiomap [KNL,x86] Disable kernel huge I/O mappings. + nohugeiomap [KNL,x86,PPC] Disable kernel huge I/O mappings. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8c1c636308c8..f0e5b38d52e8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -167,6 +167,7 @@ config PPC select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if PPC32 select HAVE_ARCH_KGDB diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 7dede2e34b70..ac6eb9816b64 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end; #define VMALLOC_START __vmalloc_start #define VMALLOC_END __vmalloc_end +static inline unsigned int ioremap_max_order(void) +{ + if (radix_enabled()) + return PUD_SHIFT; + return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */ +} +#define IOREMAP_MAX_ORDER ioremap_max_order() + extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; extern unsigned long __kernel_io_start; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 06a5ff2ee626..8904aa1243d8 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1124,6 +1124,106 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); } +int __init arch_ioremap_pud_supported(void) +{ + /* HPT does not cope with large pages in the vmalloc area */ + return radix_enabled(); +} + +int __init arch_ioremap_pmd_supported(void) +{ + return radix_enabled(); +} + +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; +} + +int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pud; + pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot); + + if (!radix_enabled()) + return 0; + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud); + + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (pud_huge(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd; + int i; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + pud_clear(pud); + + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte; + pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(&init_mm, pte); + } + } + + pmd_free(&init_mm, pmd); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pmd; + pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot); + + if (!radix_enabled()) + return 0; + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd); + + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (pmd_huge(*pmd)) { + pmd_clear(pmd); + return 1; + } + + return 0; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + + pte_free_kernel(&init_mm, pte); + + return 1; +} + int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) { -- 2.11.0