mm: hugetlb: optionally allocate gigantic hugepages using cma

author Roman Gushchin <guro@fb.com>

Fri, 10 Apr 2020 21:32:45 +0000 (14:32 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 10 Apr 2020 22:36:21 +0000 (15:36 -0700)
author Roman Gushchin <guro@fb.com>
Fri, 10 Apr 2020 21:32:45 +0000 (14:32 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 10 Apr 2020 22:36:21 +0000 (15:36 -0700)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 86aae1f..d7df9a8 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1475,6 +1475,14 @@
         hpet_mmap=      [X86, HPET_MMAP] Allow userspace to mmap HPET
                         registers.  Default set by CONFIG_HPET_MMAP_DEFAULT.
  
+       hugetlb_cma=    [HW] The size of a cma area used for allocation
+                       of gigantic hugepages.
+                       Format: nn[KMGTPE]
+
+                       Reserve a cma area of given size and allocate gigantic
+                       hugepages using the cma allocator. If enabled, the
+                       boot-time allocation of gigantic hugepages is skipped.
+
         hugepages=      [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
         hugepagesz=     [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
                         On x86-64 and powerpc, this option can be specified
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c

index b65dffd..e42727e 100644 (file)
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -29,6 +29,7 @@
  #include <linux/mm.h>
  #include <linux/kexec.h>
  #include <linux/crash_dump.h>
+#include <linux/hugetlb.h>
  
  #include <asm/boot.h>
  #include <asm/fixmap.h>
@@ -457,6 +458,11 @@ void __init arm64_memblock_init(void)
         high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
  
         dma_contiguous_reserve(arm64_dma32_phys_limit);
+
+#ifdef CONFIG_ARM64_4K_PAGES
+       hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+#endif
+
  }
  
  void __init bootmem_init(void)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c

index e6b5450..4b3fa6c 100644 (file)
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -16,6 +16,7 @@
  #include <linux/pci.h>
  #include <linux/root_dev.h>
  #include <linux/sfi.h>
+#include <linux/hugetlb.h>
  #include <linux/tboot.h>
  #include <linux/usb/xhci-dbgp.h>
  
@@ -1157,6 +1158,9 @@ void __init setup_arch(char **cmdline_p)
         initmem_init();
         dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
  
+       if (boot_cpu_has(X86_FEATURE_GBPAGES))
+               hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+
         /*
          * Reserve memory for crash kernel after SRAT is parsed so that it
          * won't consume hotpluggable memory.
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 5ea0587..43a1cef 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -895,4 +895,16 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h,
         return ptl;
  }
  
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA)
+extern void __init hugetlb_cma_reserve(int order);
+extern void __init hugetlb_cma_check(void);
+#else
+static inline __init void hugetlb_cma_reserve(int order)
+{
+}
+static inline __init void hugetlb_cma_check(void)
+{
+}
+#endif
+
  #endif /* _LINUX_HUGETLB_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index f5fb53f..cd45915 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -28,6 +28,7 @@
  #include <linux/jhash.h>
  #include <linux/numa.h>
  #include <linux/llist.h>
+#include <linux/cma.h>
  
  #include <asm/page.h>
  #include <asm/pgtable.h>
@@ -44,6 +45,9 @@
  int hugetlb_max_hstate __read_mostly;
  unsigned int default_hstate_idx;
  struct hstate hstates[HUGE_MAX_HSTATE];
+
+static struct cma *hugetlb_cma[MAX_NUMNODES];
+
  /*
   * Minimum page order among possible hugepage sizes, set to a proper value
   * at boot time.
@@ -1228,6 +1232,14 @@ static void destroy_compound_gigantic_page(struct page *page,
  
  static void free_gigantic_page(struct page *page, unsigned int order)
  {
+       /*
+        * If the page isn't allocated using the cma allocator,
+        * cma_release() returns false.
+        */
+       if (IS_ENABLED(CONFIG_CMA) &&
+           cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+               return;
+
         free_contig_range(page_to_pfn(page), 1 << order);
  }
  
@@ -1237,6 +1249,21 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
  {
         unsigned long nr_pages = 1UL << huge_page_order(h);
  
+       if (IS_ENABLED(CONFIG_CMA)) {
+               struct page *page;
+               int node;
+
+               for_each_node_mask(node, *nodemask) {
+                       if (!hugetlb_cma[node])
+                               continue;
+
+                       page = cma_alloc(hugetlb_cma[node], nr_pages,
+                                        huge_page_order(h), true);
+                       if (page)
+                               return page;
+               }
+       }
+
         return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
  }
  
@@ -1281,8 +1308,14 @@ static void update_and_free_page(struct hstate *h, struct page *page)
         set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
         set_page_refcounted(page);
         if (hstate_is_gigantic(h)) {
+               /*
+                * Temporarily drop the hugetlb_lock, because
+                * we might block in free_gigantic_page().
+                */
+               spin_unlock(&hugetlb_lock);
                 destroy_compound_gigantic_page(page, huge_page_order(h));
                 free_gigantic_page(page, huge_page_order(h));
+               spin_lock(&hugetlb_lock);
         } else {
                 __free_pages(page, huge_page_order(h));
         }
@@ -2539,6 +2572,10 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
  
         for (i = 0; i < h->max_huge_pages; ++i) {
                 if (hstate_is_gigantic(h)) {
+                       if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) {
+                               pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
+                               break;
+                       }
                         if (!alloc_bootmem_huge_page(h))
                                 break;
                 } else if (!alloc_pool_huge_page(h,
@@ -3194,6 +3231,7 @@ static int __init hugetlb_init(void)
                         default_hstate.max_huge_pages = default_hstate_max_huge_pages;
         }
  
+       hugetlb_cma_check();
         hugetlb_init_hstates();
         gather_bootmem_prealloc();
         report_hugepages();
@@ -5506,3 +5544,74 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
                 spin_unlock(&hugetlb_lock);
         }
  }
+
+#ifdef CONFIG_CMA
+static unsigned long hugetlb_cma_size __initdata;
+static bool cma_reserve_called __initdata;
+
+static int __init cmdline_parse_hugetlb_cma(char *p)
+{
+       hugetlb_cma_size = memparse(p, &p);
+       return 0;
+}
+
+early_param("hugetlb_cma", cmdline_parse_hugetlb_cma);
+
+void __init hugetlb_cma_reserve(int order)
+{
+       unsigned long size, reserved, per_node;
+       int nid;
+
+       cma_reserve_called = true;
+
+       if (!hugetlb_cma_size)
+               return;
+
+       if (hugetlb_cma_size < (PAGE_SIZE << order)) {
+               pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n",
+                       (PAGE_SIZE << order) / SZ_1M);
+               return;
+       }
+
+       /*
+        * If 3 GB area is requested on a machine with 4 numa nodes,
+        * let's allocate 1 GB on first three nodes and ignore the last one.
+        */
+       per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
+       pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
+               hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
+
+       reserved = 0;
+       for_each_node_state(nid, N_ONLINE) {
+               int res;
+
+               size = min(per_node, hugetlb_cma_size - reserved);
+               size = round_up(size, PAGE_SIZE << order);
+
+               res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order,
+                                                0, false, "hugetlb",
+                                                &hugetlb_cma[nid], nid);
+               if (res) {
+                       pr_warn("hugetlb_cma: reservation failed: err %d, node %d",
+                               res, nid);
+                       continue;
+               }
+
+               reserved += size;
+               pr_info("hugetlb_cma: reserved %lu MiB on node %d\n",
+                       size / SZ_1M, nid);
+
+               if (reserved >= hugetlb_cma_size)
+                       break;
+       }
+}
+
+void __init hugetlb_cma_check(void)
+{
+       if (!hugetlb_cma_size || cma_reserve_called)
+               return;
+
+       pr_warn("hugetlb_cma: the option isn't supported by current arch\n");
+}
+
+#endif /* CONFIG_CMA */
author	Roman Gushchin <guro@fb.com>
	Fri, 10 Apr 2020 21:32:45 +0000 (14:32 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 10 Apr 2020 22:36:21 +0000 (15:36 -0700)
Documentation/admin-guide/kernel-parameters.txt		patch \| blob \| history
arch/arm64/mm/init.c		patch \| blob \| history
arch/x86/kernel/setup.c		patch \| blob \| history
include/linux/hugetlb.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history