OSDN Git Service

mm: meminit: initialise remaining struct pages in parallel with kswapd
[uclinux-h8/linux.git] / mm / page_alloc.c
index 5e6fa06..c30f5a0 100644 (file)
@@ -235,6 +235,77 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static inline void reset_deferred_meminit(pg_data_t *pgdat)
+{
+       pgdat->first_deferred_pfn = ULONG_MAX;
+}
+
+/* Returns true if the struct page for the pfn is uninitialised */
+static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
+{
+       int nid = early_pfn_to_nid(pfn);
+
+       if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
+               return true;
+
+       return false;
+}
+
+static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
+{
+       if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
+               return true;
+
+       return false;
+}
+
+/*
+ * Returns false when the remaining initialisation should be deferred until
+ * later in the boot cycle when it can be parallelised.
+ */
+static inline bool update_defer_init(pg_data_t *pgdat,
+                               unsigned long pfn, unsigned long zone_end,
+                               unsigned long *nr_initialised)
+{
+       /* Always populate low zones for address-contrained allocations */
+       if (zone_end < pgdat_end_pfn(pgdat))
+               return true;
+
+       /* Initialise at least 2G of the highest zone */
+       (*nr_initialised)++;
+       if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
+           (pfn & (PAGES_PER_SECTION - 1)) == 0) {
+               pgdat->first_deferred_pfn = pfn;
+               return false;
+       }
+
+       return true;
+}
+#else
+static inline void reset_deferred_meminit(pg_data_t *pgdat)
+{
+}
+
+static inline bool early_page_uninitialised(unsigned long pfn)
+{
+       return false;
+}
+
+static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
+{
+       return false;
+}
+
+static inline bool update_defer_init(pg_data_t *pgdat,
+                               unsigned long pfn, unsigned long zone_end,
+                               unsigned long *nr_initialised)
+{
+       return true;
+}
+#endif
+
+
 void set_pageblock_migratetype(struct page *page, int migratetype)
 {
        if (unlikely(page_group_by_mobility_disabled &&
@@ -764,6 +835,97 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
        return 0;
 }
 
+static void __meminit __init_single_page(struct page *page, unsigned long pfn,
+                               unsigned long zone, int nid)
+{
+       struct zone *z = &NODE_DATA(nid)->node_zones[zone];
+
+       set_page_links(page, zone, nid, pfn);
+       mminit_verify_page_links(page, zone, nid, pfn);
+       init_page_count(page);
+       page_mapcount_reset(page);
+       page_cpupid_reset_last(page);
+
+       /*
+        * Mark the block movable so that blocks are reserved for
+        * movable at startup. This will force kernel allocations
+        * to reserve their blocks rather than leaking throughout
+        * the address space during boot when many long-lived
+        * kernel allocations are made. Later some blocks near
+        * the start are marked MIGRATE_RESERVE by
+        * setup_zone_migrate_reserve()
+        *
+        * bitmap is created for zone's valid pfn range. but memmap
+        * can be created for invalid pages (for alignment)
+        * check here not to call set_pageblock_migratetype() against
+        * pfn out of zone.
+        */
+       if ((z->zone_start_pfn <= pfn)
+           && (pfn < zone_end_pfn(z))
+           && !(pfn & (pageblock_nr_pages - 1)))
+               set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+
+       INIT_LIST_HEAD(&page->lru);
+#ifdef WANT_PAGE_VIRTUAL
+       /* The shift won't overflow because ZONE_NORMAL is below 4G. */
+       if (!is_highmem_idx(zone))
+               set_page_address(page, __va(pfn << PAGE_SHIFT));
+#endif
+}
+
+static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
+                                       int nid)
+{
+       return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
+}
+
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static void init_reserved_page(unsigned long pfn)
+{
+       pg_data_t *pgdat;
+       int nid, zid;
+
+       if (!early_page_uninitialised(pfn))
+               return;
+
+       nid = early_pfn_to_nid(pfn);
+       pgdat = NODE_DATA(nid);
+
+       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+               struct zone *zone = &pgdat->node_zones[zid];
+
+               if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
+                       break;
+       }
+       __init_single_pfn(pfn, zid, nid);
+}
+#else
+static inline void init_reserved_page(unsigned long pfn)
+{
+}
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+/*
+ * Initialised pages do not have PageReserved set. This function is
+ * called for each range allocated by the bootmem allocator and
+ * marks the pages PageReserved. The remaining valid pages are later
+ * sent to the buddy page allocator.
+ */
+void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
+{
+       unsigned long start_pfn = PFN_DOWN(start);
+       unsigned long end_pfn = PFN_UP(end);
+
+       for (; start_pfn < end_pfn; start_pfn++) {
+               if (pfn_valid(start_pfn)) {
+                       struct page *page = pfn_to_page(start_pfn);
+
+                       init_reserved_page(start_pfn);
+                       SetPageReserved(page);
+               }
+       }
+}
+
 static bool free_pages_prepare(struct page *page, unsigned int order)
 {
        bool compound = PageCompound(page);
@@ -818,7 +980,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        local_irq_restore(flags);
 }
 
-void __init __free_pages_bootmem(struct page *page, unsigned int order)
+static void __defer_init __free_pages_boot_core(struct page *page,
+                                       unsigned long pfn, unsigned int order)
 {
        unsigned int nr_pages = 1 << order;
        struct page *p = page;
@@ -838,6 +1001,134 @@ void __init __free_pages_bootmem(struct page *page, unsigned int order)
        __free_pages(page, order);
 }
 
+#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
+       defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
+/* Only safe to use early in boot when initialisation is single-threaded */
+static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
+
+int __meminit early_pfn_to_nid(unsigned long pfn)
+{
+       int nid;
+
+       /* The system will behave unpredictably otherwise */
+       BUG_ON(system_state != SYSTEM_BOOTING);
+
+       nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
+       if (nid >= 0)
+               return nid;
+       /* just returns 0 */
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
+                                       struct mminit_pfnnid_cache *state)
+{
+       int nid;
+
+       nid = __early_pfn_to_nid(pfn, state);
+       if (nid >= 0 && nid != node)
+               return false;
+       return true;
+}
+
+/* Only safe to use early in boot when initialisation is single-threaded */
+static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+{
+       return meminit_pfn_in_nid(pfn, node, &early_pfnnid_cache);
+}
+
+#else
+
+static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+{
+       return true;
+}
+static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
+                                       struct mminit_pfnnid_cache *state)
+{
+       return true;
+}
+#endif
+
+
+void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
+                                                       unsigned int order)
+{
+       if (early_page_uninitialised(pfn))
+               return;
+       return __free_pages_boot_core(page, pfn, order);
+}
+
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+/* Initialise remaining memory on a node */
+void __defermem_init deferred_init_memmap(int nid)
+{
+       struct mminit_pfnnid_cache nid_init_state = { };
+       unsigned long start = jiffies;
+       unsigned long nr_pages = 0;
+       unsigned long walk_start, walk_end;
+       int i, zid;
+       struct zone *zone;
+       pg_data_t *pgdat = NODE_DATA(nid);
+       unsigned long first_init_pfn = pgdat->first_deferred_pfn;
+
+       if (first_init_pfn == ULONG_MAX)
+               return;
+
+       /* Sanity check boundaries */
+       BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
+       BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
+       pgdat->first_deferred_pfn = ULONG_MAX;
+
+       /* Only the highest zone is deferred so find it */
+       for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+               zone = pgdat->node_zones + zid;
+               if (first_init_pfn < zone_end_pfn(zone))
+                       break;
+       }
+
+       for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
+               unsigned long pfn, end_pfn;
+
+               end_pfn = min(walk_end, zone_end_pfn(zone));
+               pfn = first_init_pfn;
+               if (pfn < walk_start)
+                       pfn = walk_start;
+               if (pfn < zone->zone_start_pfn)
+                       pfn = zone->zone_start_pfn;
+
+               for (; pfn < end_pfn; pfn++) {
+                       struct page *page;
+
+                       if (!pfn_valid(pfn))
+                               continue;
+
+                       if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state))
+                               continue;
+
+                       if (page->flags) {
+                               VM_BUG_ON(page_zone(page) != zone);
+                               continue;
+                       }
+
+                       __init_single_page(page, pfn, zid, nid);
+                       __free_pages_boot_core(page, pfn, 0);
+                       nr_pages++;
+                       cond_resched();
+               }
+               first_init_pfn = max(end_pfn, first_init_pfn);
+       }
+
+       /* Sanity check that the next zone really is unpopulated */
+       WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+
+       pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages,
+                                       jiffies_to_msecs(jiffies - start));
+}
+#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
 #ifdef CONFIG_CMA
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void __init init_cma_reserved_pageblock(struct page *page)
@@ -4150,6 +4441,9 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        zone->nr_migrate_reserve_block = reserve;
 
        for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+               if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
+                       return;
+
                if (!pfn_valid(pfn))
                        continue;
                page = pfn_to_page(pfn);
@@ -4212,15 +4506,16 @@ static void setup_zone_migrate_reserve(struct zone *zone)
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                unsigned long start_pfn, enum memmap_context context)
 {
-       struct page *page;
+       pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long end_pfn = start_pfn + size;
        unsigned long pfn;
        struct zone *z;
+       unsigned long nr_initialised = 0;
 
        if (highest_memmap_pfn < end_pfn - 1)
                highest_memmap_pfn = end_pfn - 1;
 
-       z = &NODE_DATA(nid)->node_zones[zone];
+       z = &pgdat->node_zones[zone];
        for (pfn = start_pfn; pfn < end_pfn; pfn++) {
                /*
                 * There can be holes in boot-time mem_map[]s
@@ -4232,39 +4527,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                                continue;
                        if (!early_pfn_in_nid(pfn, nid))
                                continue;
+                       if (!update_defer_init(pgdat, pfn, end_pfn,
+                                               &nr_initialised))
+                               break;
                }
-               page = pfn_to_page(pfn);
-               set_page_links(page, zone, nid, pfn);
-               mminit_verify_page_links(page, zone, nid, pfn);
-               init_page_count(page);
-               page_mapcount_reset(page);
-               page_cpupid_reset_last(page);
-               SetPageReserved(page);
-               /*
-                * Mark the block movable so that blocks are reserved for
-                * movable at startup. This will force kernel allocations
-                * to reserve their blocks rather than leaking throughout
-                * the address space during boot when many long-lived
-                * kernel allocations are made. Later some blocks near
-                * the start are marked MIGRATE_RESERVE by
-                * setup_zone_migrate_reserve()
-                *
-                * bitmap is created for zone's valid pfn range. but memmap
-                * can be created for invalid pages (for alignment)
-                * check here not to call set_pageblock_migratetype() against
-                * pfn out of zone.
-                */
-               if ((z->zone_start_pfn <= pfn)
-                   && (pfn < zone_end_pfn(z))
-                   && !(pfn & (pageblock_nr_pages - 1)))
-                       set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-
-               INIT_LIST_HEAD(&page->lru);
-#ifdef WANT_PAGE_VIRTUAL
-               /* The shift won't overflow because ZONE_NORMAL is below 4G. */
-               if (!is_highmem_idx(zone))
-                       set_page_address(page, __va(pfn << PAGE_SHIFT));
-#endif
+               __init_single_pfn(pfn, zone, nid);
        }
 }
 
@@ -4522,57 +4789,30 @@ int __meminit init_currently_empty_zone(struct zone *zone,
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+
 /*
  * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
  */
-int __meminit __early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn,
+                                       struct mminit_pfnnid_cache *state)
 {
        unsigned long start_pfn, end_pfn;
        int nid;
-       /*
-        * NOTE: The following SMP-unsafe globals are only used early in boot
-        * when the kernel is running single-threaded.
-        */
-       static unsigned long __meminitdata last_start_pfn, last_end_pfn;
-       static int __meminitdata last_nid;
 
-       if (last_start_pfn <= pfn && pfn < last_end_pfn)
-               return last_nid;
+       if (state->last_start <= pfn && pfn < state->last_end)
+               return state->last_nid;
 
        nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
        if (nid != -1) {
-               last_start_pfn = start_pfn;
-               last_end_pfn = end_pfn;
-               last_nid = nid;
+               state->last_start = start_pfn;
+               state->last_end = end_pfn;
+               state->last_nid = nid;
        }
 
        return nid;
 }
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 
-int __meminit early_pfn_to_nid(unsigned long pfn)
-{
-       int nid;
-
-       nid = __early_pfn_to_nid(pfn);
-       if (nid >= 0)
-               return nid;
-       /* just returns 0 */
-       return 0;
-}
-
-#ifdef CONFIG_NODES_SPAN_OTHER_NODES
-bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
-{
-       int nid;
-
-       nid = __early_pfn_to_nid(pfn);
-       if (nid >= 0 && nid != node)
-               return false;
-       return true;
-}
-#endif
-
 /**
  * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range
  * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -5090,6 +5330,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        /* pg_data_t should be reset to zero when it's allocated */
        WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
 
+       reset_deferred_meminit(pgdat);
        pgdat->node_id = nid;
        pgdat->node_start_pfn = node_start_pfn;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP