Merge branch 'asoc-4.19' into asoc-4.20 tas dependency

[uclinux-h8/linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index a790ef4..e75865d 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -155,16 +155,17 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype)
   * The following functions are used by the suspend/hibernate code to temporarily
   * change gfp_allowed_mask in order to avoid using I/O during memory allocations
   * while devices are suspended.  To avoid races with the suspend/hibernate code,
- * they should always be called with pm_mutex held (gfp_allowed_mask also should
- * only be modified with pm_mutex held, unless the suspend/hibernate code is
- * guaranteed not to run in parallel with that modification).
+ * they should always be called with system_transition_mutex held
+ * (gfp_allowed_mask also should only be modified with system_transition_mutex
+ * held, unless the suspend/hibernate code is guaranteed not to run in parallel
+ * with that modification).
   */
  
  static gfp_t saved_gfp_mask;
  
  void pm_restore_gfp_mask(void)
  {
-       WARN_ON(!mutex_is_locked(&pm_mutex));
+       WARN_ON(!mutex_is_locked(&system_transition_mutex));
         if (saved_gfp_mask) {
                 gfp_allowed_mask = saved_gfp_mask;
                 saved_gfp_mask = 0;
@@ -173,7 +174,7 @@ void pm_restore_gfp_mask(void)
  
  void pm_restrict_gfp_mask(void)
  {
-       WARN_ON(!mutex_is_locked(&pm_mutex));
+       WARN_ON(!mutex_is_locked(&system_transition_mutex));
         WARN_ON(saved_gfp_mask);
         saved_gfp_mask = gfp_allowed_mask;
         gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
@@ -2908,10 +2909,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
         if (!static_branch_likely(&vm_numa_stat_key))
                 return;
  
-       if (z->node != numa_node_id())
+       if (zone_to_nid(z) != numa_node_id())
                 local_stat = NUMA_OTHER;
  
-       if (z->node == preferred_zone->node)
+       if (zone_to_nid(z) == zone_to_nid(preferred_zone))
                 __inc_numa_state(z, NUMA_HIT);
         else {
                 __inc_numa_state(z, NUMA_MISS);
@@ -4164,11 +4165,12 @@ retry:
                 alloc_flags = reserve_flags;
  
         /*
-        * Reset the zonelist iterators if memory policies can be ignored.
-        * These allocations are high priority and system rather than user
-        * orientated.
+        * Reset the nodemask and zonelist iterators if memory policies can be
+        * ignored. These allocations are high priority and system rather than
+        * user oriented.
          */
         if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
+               ac->nodemask = NULL;
                 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
                                         ac->high_zoneidx, ac->nodemask);
         }
@@ -4402,19 +4404,15 @@ out:
  EXPORT_SYMBOL(__alloc_pages_nodemask);
  
  /*
- * Common helper functions.
+ * Common helper functions. Never use with __GFP_HIGHMEM because the returned
+ * address cannot represent highmem pages. Use alloc_pages and then kmap if
+ * you need to access high mem.
   */
  unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
  {
         struct page *page;
  
-       /*
-        * __get_free_pages() returns a virtual address, which cannot represent
-        * a highmem page
-        */
-       VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
-
-       page = alloc_pages(gfp_mask, order);
+       page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);
         if (!page)
                 return 0;
         return (unsigned long) page_address(page);
@@ -5280,7 +5278,7 @@ int local_memory_node(int node)
         z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
                                    gfp_zone(GFP_KERNEL),
                                    NULL);
-       return z->zone->node;
+       return zone_to_nid(z->zone);
  }
  #endif
  
@@ -5566,13 +5564,12 @@ static int zone_batchsize(struct zone *zone)
  
         /*
          * The per-cpu-pages pools are set to around 1000th of the
-        * size of the zone.  But no more than 1/2 of a meg.
-        *
-        * OK, so we don't know how big the cache is.  So guess.
+        * size of the zone.
          */
         batch = zone->managed_pages / 1024;
-       if (batch * PAGE_SIZE > 512 * 1024)
-               batch = (512 * 1024) / PAGE_SIZE;
+       /* But no more than a meg. */
+       if (batch * PAGE_SIZE > 1024 * 1024)
+               batch = (1024 * 1024) / PAGE_SIZE;
         batch /= 4;             /* We effectively *= 4 below */
         if (batch < 1)
                 batch = 1;
@@ -6123,7 +6120,7 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l
         return usemapsize / 8;
  }
  
-static void __init setup_usemap(struct pglist_data *pgdat,
+static void __ref setup_usemap(struct pglist_data *pgdat,
                                 struct zone *zone,
                                 unsigned long zone_start_pfn,
                                 unsigned long zonesize)
@@ -6143,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
  #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
  
  /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __paginginit set_pageblock_order(void)
+void __init set_pageblock_order(void)
  {
         unsigned int order;
  
@@ -6171,14 +6168,14 @@ void __paginginit set_pageblock_order(void)
   * include/linux/pageblock-flags.h for the values of pageblock_order based on
   * the kernel config
   */
-void __paginginit set_pageblock_order(void)
+void __init set_pageblock_order(void)
  {
  }
  
  #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
  
-static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
-                                                  unsigned long present_pages)
+static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
+                                               unsigned long present_pages)
  {
         unsigned long pages = spanned_pages;
  
@@ -6197,39 +6194,99 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
         return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
  }
  
-/*
- * Set up the zone data structures:
- *   - mark all pages reserved
- *   - mark all memory queues empty
- *   - clear the memory bitmaps
- *
- * NOTE: pgdat should get zeroed by caller.
- */
-static void __paginginit free_area_init_core(struct pglist_data *pgdat)
-{
-       enum zone_type j;
-       int nid = pgdat->node_id;
-
-       pgdat_resize_init(pgdat);
  #ifdef CONFIG_NUMA_BALANCING
+static void pgdat_init_numabalancing(struct pglist_data *pgdat)
+{
         spin_lock_init(&pgdat->numabalancing_migrate_lock);
         pgdat->numabalancing_migrate_nr_pages = 0;
         pgdat->numabalancing_migrate_next_window = jiffies;
+}
+#else
+static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}
  #endif
+
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pgdat_init_split_queue(struct pglist_data *pgdat)
+{
         spin_lock_init(&pgdat->split_queue_lock);
         INIT_LIST_HEAD(&pgdat->split_queue);
         pgdat->split_queue_len = 0;
+}
+#else
+static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
  #endif
-       init_waitqueue_head(&pgdat->kswapd_wait);
-       init_waitqueue_head(&pgdat->pfmemalloc_wait);
+
  #ifdef CONFIG_COMPACTION
+static void pgdat_init_kcompactd(struct pglist_data *pgdat)
+{
         init_waitqueue_head(&pgdat->kcompactd_wait);
+}
+#else
+static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
  #endif
+
+static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
+{
+       pgdat_resize_init(pgdat);
+
+       pgdat_init_numabalancing(pgdat);
+       pgdat_init_split_queue(pgdat);
+       pgdat_init_kcompactd(pgdat);
+
+       init_waitqueue_head(&pgdat->kswapd_wait);
+       init_waitqueue_head(&pgdat->pfmemalloc_wait);
+
         pgdat_page_ext_init(pgdat);
         spin_lock_init(&pgdat->lru_lock);
         lruvec_init(node_lruvec(pgdat));
+}
+
+static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
+                                                       unsigned long remaining_pages)
+{
+       zone->managed_pages = remaining_pages;
+       zone_set_nid(zone, nid);
+       zone->name = zone_names[idx];
+       zone->zone_pgdat = NODE_DATA(nid);
+       spin_lock_init(&zone->lock);
+       zone_seqlock_init(zone);
+       zone_pcp_init(zone);
+}
+
+/*
+ * Set up the zone data structures
+ * - init pgdat internals
+ * - init all zones belonging to this node
+ *
+ * NOTE: this function is only called during memory hotplug
+ */
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __ref free_area_init_core_hotplug(int nid)
+{
+       enum zone_type z;
+       pg_data_t *pgdat = NODE_DATA(nid);
+
+       pgdat_init_internals(pgdat);
+       for (z = 0; z < MAX_NR_ZONES; z++)
+               zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+}
+#endif
+
+/*
+ * Set up the zone data structures:
+ *   - mark all pages reserved
+ *   - mark all memory queues empty
+ *   - clear the memory bitmaps
+ *
+ * NOTE: pgdat should get zeroed by caller.
+ * NOTE: this function is only called during early init.
+ */
+static void __init free_area_init_core(struct pglist_data *pgdat)
+{
+       enum zone_type j;
+       int nid = pgdat->node_id;
  
+       pgdat_init_internals(pgdat);
         pgdat->per_cpu_nodestats = &boot_nodestats;
  
         for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -6277,15 +6334,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                  * when the bootmem allocator frees pages into the buddy system.
                  * And all highmem pages will be managed by the buddy system.
                  */
-               zone->managed_pages = freesize;
-#ifdef CONFIG_NUMA
-               zone->node = nid;
-#endif
-               zone->name = zone_names[j];
-               zone->zone_pgdat = pgdat;
-               spin_lock_init(&zone->lock);
-               zone_seqlock_init(zone);
-               zone_pcp_init(zone);
+               zone_init_internals(zone, j, nid, freesize);
  
                 if (!size)
                         continue;
@@ -6345,8 +6394,24 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
  static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
  #endif /* CONFIG_FLAT_NODE_MEM_MAP */
  
-void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
-               unsigned long node_start_pfn, unsigned long *zholes_size)
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
+{
+       /*
+        * We start only with one section of pages, more pages are added as
+        * needed until the rest of deferred pages are initialized.
+        */
+       pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
+                                               pgdat->node_spanned_pages);
+       pgdat->first_deferred_pfn = ULONG_MAX;
+}
+#else
+static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
+#endif
+
+void __init free_area_init_node(int nid, unsigned long *zones_size,
+                                  unsigned long node_start_pfn,
+                                  unsigned long *zholes_size)
  {
         pg_data_t *pgdat = NODE_DATA(nid);
         unsigned long start_pfn = 0;
@@ -6370,16 +6435,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                                   zones_size, zholes_size);
  
         alloc_node_mem_map(pgdat);
+       pgdat_set_deferred_range(pgdat);
  
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-       /*
-        * We start only with one section of pages, more pages are added as
-        * needed until the rest of deferred pages are initialized.
-        */
-       pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
-                                        pgdat->node_spanned_pages);
-       pgdat->first_deferred_pfn = ULONG_MAX;
-#endif
         free_area_init_core(pgdat);
  }
  
@@ -6391,7 +6448,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
   * may be accessed (for example page_to_pfn() on some configuration accesses
   * flags). We must explicitly zero those struct pages.
   */
-void __paginginit zero_resv_unavail(void)
+void __init zero_resv_unavail(void)
  {
         phys_addr_t start, end;
         unsigned long pfn;
@@ -6404,8 +6461,11 @@ void __paginginit zero_resv_unavail(void)
         pgcnt = 0;
         for_each_resv_unavail_range(i, &start, &end) {
                 for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) {
-                       if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages)))
+                       if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
+                               pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+                                       + pageblock_nr_pages - 1;
                                 continue;
+                       }
                         mm_zero_struct_page(pfn_to_page(pfn));
                         pgcnt++;
                 }
@@ -6939,9 +6999,21 @@ unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
         start = (void *)PAGE_ALIGN((unsigned long)start);
         end = (void *)((unsigned long)end & PAGE_MASK);
         for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
+               struct page *page = virt_to_page(pos);
+               void *direct_map_addr;
+
+               /*
+                * 'direct_map_addr' might be different from 'pos'
+                * because some architectures' virt_to_page()
+                * work with aliases.  Getting the direct map
+                * address ensures that we get a _writeable_
+                * alias for the memset().
+                */
+               direct_map_addr = page_address(page);
                 if ((unsigned int)poison <= 0xFF)
-                       memset(pos, poison, PAGE_SIZE);
-               free_reserved_page(virt_to_page(pos));
+                       memset(direct_map_addr, poison, PAGE_SIZE);
+
+               free_reserved_page(page);
         }
  
         if (pages && s)
@@ -8024,3 +8096,33 @@ bool is_free_buddy_page(struct page *page)
  
         return order < MAX_ORDER;
  }
+
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Set PG_hwpoison flag if a given page is confirmed to be a free page.  This
+ * test is performed under the zone lock to prevent a race against page
+ * allocation.
+ */
+bool set_hwpoison_free_buddy_page(struct page *page)
+{
+       struct zone *zone = page_zone(page);
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long flags;
+       unsigned int order;
+       bool hwpoisoned = false;
+
+       spin_lock_irqsave(&zone->lock, flags);
+       for (order = 0; order < MAX_ORDER; order++) {
+               struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+               if (PageBuddy(page_head) && page_order(page_head) >= order) {
+                       if (!TestSetPageHWPoison(page))
+                               hwpoisoned = true;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&zone->lock, flags);
+
+       return hwpoisoned;
+}
+#endif