arm64: Memory hotplug support for arm64 platform

author Maciej Bielski <m.bielski@virtualopensystems.com>

Fri, 28 Apr 2017 08:44:14 +0000 (14:14 +0530)

committer Arun KS <arunks@codeaurora.org>

Wed, 22 Nov 2017 02:41:50 +0000 (08:11 +0530)
author Maciej Bielski <m.bielski@virtualopensystems.com>
Fri, 28 Apr 2017 08:44:14 +0000 (14:14 +0530)
committer Arun KS <arunks@codeaurora.org>
Wed, 22 Nov 2017 02:41:50 +0000 (08:11 +0530)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index 07090b4..104588d 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -653,9 +653,7 @@ config HOTPLUG_CPU
           can be controlled through /sys/devices/system/cpu.
  
  config ARCH_ENABLE_MEMORY_HOTPLUG
-       def_bool y
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
+    depends on !NUMA
         def_bool y
  
  # The GPIO number here must be sorted by descending number. In case of
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h

index 990124a..0944bfc 100644 (file)
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -35,5 +35,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
                                unsigned long virt, phys_addr_t size,
                                pgprot_t prot);
  extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern void hotplug_paging(phys_addr_t start, phys_addr_t size);
+#endif
  
  #endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c

index 1d4dcd5..bc45677 100644 (file)
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -503,37 +503,80 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
         struct zone *zone;
         unsigned long start_pfn = start >> PAGE_SHIFT;
         unsigned long nr_pages = size >> PAGE_SHIFT;
+       unsigned long end_pfn = start_pfn + nr_pages;
+       unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
+       unsigned long pfn;
         int ret;
  
+       if (end_pfn > max_sparsemem_pfn) {
+               pr_err("end_pfn too big");
+               return -1;
+       }
+       hotplug_paging(start, size);
+
+       /*
+        * Mark the first page in the range as unusable. This is needed
+        * because __add_section (within __add_pages) wants pfn_valid
+        * of it to be false, and in arm64 pfn falid is implemented by
+        * just checking at the nomap flag for existing blocks.
+        *
+        * A small trick here is that __add_section() requires only
+        * phys_start_pfn (that is the first pfn of a section) to be
+        * invalid. Regardless of whether it was assumed (by the function
+        * author) that all pfns within a section are either all valid
+        * or all invalid, it allows to avoid looping twice (once here,
+        * second when memblock_clear_nomap() is called) through all
+        * pfns of the section and modify only one pfn. Thanks to that,
+        * further, in __add_zone() only this very first pfn is skipped
+        * and corresponding page is not flagged reserved. Therefore it
+        * is enough to correct this setup only for it.
+        *
+        * When arch_add_memory() returns the walk_memory_range() function
+        * is called and passed with online_memory_block() callback,
+        * which execution finally reaches the memory_block_action()
+        * function, where also only the first pfn of a memory block is
+        * checked to be reserved. Above, it was first pfn of a section,
+        * here it is a block but
+        * (drivers/base/memory.c):
+        *     sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+        * (include/linux/memory.h):
+        *     #define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
+        * so we can consider block and section equivalently
+        */
+       memblock_mark_nomap(start, 1<<PAGE_SHIFT);
+
         pgdat = NODE_DATA(nid);
  
         zone = pgdat->node_zones +
                 zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
         ret = __add_pages(nid, zone, start_pfn, nr_pages);
  
-       if (ret)
-               pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
-                       __func__, ret);
-
-       return ret;
-}
+       /*
+        * Make the pages usable after they have been added.
+        * This will make pfn_valid return true
+        */
+       memblock_clear_nomap(start, 1<<PAGE_SHIFT);
  
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
-{
-       unsigned long start_pfn = start >> PAGE_SHIFT;
-       unsigned long nr_pages = size >> PAGE_SHIFT;
-       struct zone *zone;
-       int ret;
+       /*
+        * This is a hack to avoid having to mix arch specific code
+        * into arch independent code. SetPageReserved is supposed
+        * to be called by __add_zone (within __add_section, within
+        * __add_pages). However, when it is called there, it assumes that
+        * pfn_valid returns true.  For the way pfn_valid is implemented
+        * in arm64 (a check on the nomap flag), the only way to make
+        * this evaluate true inside __add_zone is to clear the nomap
+        * flags of blocks in architecture independent code.
+        *
+        * To avoid this, we set the Reserved flag here after we cleared
+        * the nomap flag in the line above.
+        */
+       SetPageReserved(pfn_to_page(start_pfn));
  
-       zone = page_zone(pfn_to_page(start_pfn));
-       ret = __remove_pages(zone, start_pfn, nr_pages);
         if (ret)
-               pr_warn("%s: Problem encountered in __remove_pages() ret=%d\n",
+               pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
                         __func__, ret);
  
         return ret;
  }
  #endif
-#endif
  
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c

index 6c444d9..cdb9338 100644 (file)
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  /*
   * Based on arch/arm/mm/mmu.c
   *
@@ -130,6 +131,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
                 phys_addr_t pte_phys;
                 BUG_ON(!pgtable_alloc);
                 pte_phys = pgtable_alloc();
+               pr_debug("Allocating PTE at %pK\n", __va(pte_phys));
                 pte = pte_set_fixmap(pte_phys);
                 if (pmd_sect(*pmd))
                         split_pmd(pmd, pte);
@@ -194,6 +196,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
                 phys_addr_t pmd_phys;
                 BUG_ON(!pgtable_alloc);
                 pmd_phys = pgtable_alloc();
+               pr_debug("Allocating PMD at %pK\n", __va(pmd_phys));
                 pmd = pmd_set_fixmap(pmd_phys);
                 if (pud_sect(*pud)) {
                         /*
@@ -262,6 +265,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
                 phys_addr_t pud_phys;
                 BUG_ON(!pgtable_alloc);
                 pud_phys = pgtable_alloc();
+               pr_debug("Allocating PUD at %pK\n", __va(pud_phys));
                 __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
         }
         BUG_ON(pgd_bad(*pgd));
@@ -605,6 +609,47 @@ void __init paging_init(void)
         bootmem_init();
  }
  
+#ifdef CONFIG_MEMORY_HOTPLUG
+static phys_addr_t pgd_pgtable_alloc(void)
+{
+        void *ptr = (void *)__get_free_page(PGALLOC_GFP);
+        if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+                BUG();
+
+        /* Ensure the zeroed page is visible to the page table walker */
+        dsb(ishst);
+        return __pa(ptr);
+}
+
+/*
+ * hotplug_paging() is used by memory hotplug to build new page tables
+ * for hot added memory.
+ */
+void hotplug_paging(phys_addr_t start, phys_addr_t size)
+{
+
+       struct page *pg;
+       phys_addr_t pgd_phys = pgd_pgtable_alloc();
+       pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+
+       memcpy(pgd, swapper_pg_dir, PAGE_SIZE);
+
+       __create_pgd_mapping(pgd, start, __phys_to_virt(start), size,
+               PAGE_KERNEL, pgd_pgtable_alloc);
+
+       cpu_replace_ttbr1(__va(pgd_phys));
+       memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+       cpu_replace_ttbr1(swapper_pg_dir);
+
+       pgd_clear_fixmap();
+
+       pg = phys_to_page(pgd_phys);
+       pgtable_page_dtor(pg);
+       __free_pages(pg, 0);
+}
+
+#endif
+
  /*
   * Check whether a kernel address is valid (derived from arch/x86/).
   */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h

index 2d79ec1..d3f41bf 100644 (file)
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -84,6 +84,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
  int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
  int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
  int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
+int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
  ulong choose_memblock_flags(void);
  unsigned long memblock_region_resize_late_begin(void);
  void memblock_region_resize_late_end(unsigned long);
diff --git a/mm/memblock.c b/mm/memblock.c

index 2412255..fb63a9c 100644 (file)
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -840,6 +840,16 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
  }
  
  /**
+ * memblock_clear_nomap - Clear a flag of MEMBLOCK_NOMAP memory region
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ */
+int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
+{
+       return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
+}
+
+/**
   * __next_reserved_mem_region - next function for for_each_reserved_region()
   * @idx: pointer to u64 loop variable
   * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL
author	Maciej Bielski <m.bielski@virtualopensystems.com>
	Fri, 28 Apr 2017 08:44:14 +0000 (14:14 +0530)
committer	Arun KS <arunks@codeaurora.org>
	Wed, 22 Nov 2017 02:41:50 +0000 (08:11 +0530)
arch/arm64/Kconfig		patch \| blob \| history
arch/arm64/include/asm/mmu.h		patch \| blob \| history
arch/arm64/mm/init.c		patch \| blob \| history
arch/arm64/mm/mmu.c		patch \| blob \| history
include/linux/memblock.h		patch \| blob \| history
mm/memblock.c		patch \| blob \| history