struct zone *zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
+ unsigned long pfn;
int ret;
+ if (end_pfn > max_sparsemem_pfn) {
+ pr_err("end_pfn too big");
+ return -1;
+ }
+ hotplug_paging(start, size);
+
+ /*
+ * Mark the first page in the range as unusable. This is needed
+ * because __add_section (within __add_pages) wants pfn_valid
+ * of it to be false, and in arm64 pfn falid is implemented by
+ * just checking at the nomap flag for existing blocks.
+ *
+ * A small trick here is that __add_section() requires only
+ * phys_start_pfn (that is the first pfn of a section) to be
+ * invalid. Regardless of whether it was assumed (by the function
+ * author) that all pfns within a section are either all valid
+ * or all invalid, it allows to avoid looping twice (once here,
+ * second when memblock_clear_nomap() is called) through all
+ * pfns of the section and modify only one pfn. Thanks to that,
+ * further, in __add_zone() only this very first pfn is skipped
+ * and corresponding page is not flagged reserved. Therefore it
+ * is enough to correct this setup only for it.
+ *
+ * When arch_add_memory() returns the walk_memory_range() function
+ * is called and passed with online_memory_block() callback,
+ * which execution finally reaches the memory_block_action()
+ * function, where also only the first pfn of a memory block is
+ * checked to be reserved. Above, it was first pfn of a section,
+ * here it is a block but
+ * (drivers/base/memory.c):
+ * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+ * (include/linux/memory.h):
+ * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
+ * so we can consider block and section equivalently
+ */
+ memblock_mark_nomap(start, 1<<PAGE_SHIFT);
+
pgdat = NODE_DATA(nid);
zone = pgdat->node_zones +
zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
ret = __add_pages(nid, zone, start_pfn, nr_pages);
- if (ret)
- pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
- __func__, ret);
-
- return ret;
-}
+ /*
+ * Make the pages usable after they have been added.
+ * This will make pfn_valid return true
+ */
+ memblock_clear_nomap(start, 1<<PAGE_SHIFT);
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
-{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- int ret;
+ /*
+ * This is a hack to avoid having to mix arch specific code
+ * into arch independent code. SetPageReserved is supposed
+ * to be called by __add_zone (within __add_section, within
+ * __add_pages). However, when it is called there, it assumes that
+ * pfn_valid returns true. For the way pfn_valid is implemented
+ * in arm64 (a check on the nomap flag), the only way to make
+ * this evaluate true inside __add_zone is to clear the nomap
+ * flags of blocks in architecture independent code.
+ *
+ * To avoid this, we set the Reserved flag here after we cleared
+ * the nomap flag in the line above.
+ */
+ SetPageReserved(pfn_to_page(start_pfn));
- zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
if (ret)
- pr_warn("%s: Problem encountered in __remove_pages() ret=%d\n",
+ pr_warn("%s: Problem encountered in __add_pages() ret=%d\n",
__func__, ret);
return ret;
}
#endif
-#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
/*
* Based on arch/arm/mm/mmu.c
*
phys_addr_t pte_phys;
BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc();
+ pr_debug("Allocating PTE at %pK\n", __va(pte_phys));
pte = pte_set_fixmap(pte_phys);
if (pmd_sect(*pmd))
split_pmd(pmd, pte);
phys_addr_t pmd_phys;
BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc();
+ pr_debug("Allocating PMD at %pK\n", __va(pmd_phys));
pmd = pmd_set_fixmap(pmd_phys);
if (pud_sect(*pud)) {
/*
phys_addr_t pud_phys;
BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc();
+ pr_debug("Allocating PUD at %pK\n", __va(pud_phys));
__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
}
BUG_ON(pgd_bad(*pgd));
bootmem_init();
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static phys_addr_t pgd_pgtable_alloc(void)
+{
+ void *ptr = (void *)__get_free_page(PGALLOC_GFP);
+ if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+ BUG();
+
+ /* Ensure the zeroed page is visible to the page table walker */
+ dsb(ishst);
+ return __pa(ptr);
+}
+
+/*
+ * hotplug_paging() is used by memory hotplug to build new page tables
+ * for hot added memory.
+ */
+void hotplug_paging(phys_addr_t start, phys_addr_t size)
+{
+
+ struct page *pg;
+ phys_addr_t pgd_phys = pgd_pgtable_alloc();
+ pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+
+ memcpy(pgd, swapper_pg_dir, PAGE_SIZE);
+
+ __create_pgd_mapping(pgd, start, __phys_to_virt(start), size,
+ PAGE_KERNEL, pgd_pgtable_alloc);
+
+ cpu_replace_ttbr1(__va(pgd_phys));
+ memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+ cpu_replace_ttbr1(swapper_pg_dir);
+
+ pgd_clear_fixmap();
+
+ pg = phys_to_page(pgd_phys);
+ pgtable_page_dtor(pg);
+ __free_pages(pg, 0);
+}
+
+#endif
+
/*
* Check whether a kernel address is valid (derived from arch/x86/).
*/