X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=c2ee4ecad083f897d9ce181492502f48645e0bc3;hb=d70ddd7a5d9aa335f9b4b0c3d879e1e70ee1e4e3;hp=2fd31aebef30c4abfb27d8890787bc914e880356;hpb=1d36f46b4e61ae7bd2ce013317cb3ca0298bbb6e;p=uclinux-h8%2Flinux.git diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2fd31aebef30..c2ee4ecad083 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -380,20 +380,6 @@ void prep_compound_page(struct page *page, unsigned long order) } } -static inline void prep_zero_page(struct page *page, unsigned int order, - gfp_t gfp_flags) -{ - int i; - - /* - * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO - * and __GFP_HIGHMEM from hard or soft interrupt context. - */ - VM_BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); - for (i = 0; i < (1 << order); i++) - clear_highpage(page + i); -} - #ifdef CONFIG_DEBUG_PAGEALLOC unsigned int _debug_guardpage_minorder; bool _debug_pagealloc_enabled __read_mostly; @@ -778,6 +764,66 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) return 0; } +static void __meminit __init_single_page(struct page *page, unsigned long pfn, + unsigned long zone, int nid) +{ + struct zone *z = &NODE_DATA(nid)->node_zones[zone]; + + set_page_links(page, zone, nid, pfn); + mminit_verify_page_links(page, zone, nid, pfn); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); + + /* + * Mark the block movable so that blocks are reserved for + * movable at startup. This will force kernel allocations + * to reserve their blocks rather than leaking throughout + * the address space during boot when many long-lived + * kernel allocations are made. Later some blocks near + * the start are marked MIGRATE_RESERVE by + * setup_zone_migrate_reserve() + * + * bitmap is created for zone's valid pfn range. but memmap + * can be created for invalid pages (for alignment) + * check here not to call set_pageblock_migratetype() against + * pfn out of zone. + */ + if ((z->zone_start_pfn <= pfn) + && (pfn < zone_end_pfn(z)) + && !(pfn & (pageblock_nr_pages - 1))) + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + + INIT_LIST_HEAD(&page->lru); +#ifdef WANT_PAGE_VIRTUAL + /* The shift won't overflow because ZONE_NORMAL is below 4G. */ + if (!is_highmem_idx(zone)) + set_page_address(page, __va(pfn << PAGE_SHIFT)); +#endif +} + +static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, + int nid) +{ + return __init_single_page(pfn_to_page(pfn), pfn, zone, nid); +} + +/* + * Initialised pages do not have PageReserved set. This function is + * called for each range allocated by the bootmem allocator and + * marks the pages PageReserved. The remaining valid pages are later + * sent to the buddy page allocator. + */ +void reserve_bootmem_region(unsigned long start, unsigned long end) +{ + unsigned long start_pfn = PFN_DOWN(start); + unsigned long end_pfn = PFN_UP(end); + + for (; start_pfn < end_pfn; start_pfn++) + if (pfn_valid(start_pfn)) + SetPageReserved(pfn_to_page(start_pfn)); +} + static bool free_pages_prepare(struct page *page, unsigned int order) { bool compound = PageCompound(page); @@ -832,7 +878,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } -void __init __free_pages_bootmem(struct page *page, unsigned int order) +void __init __free_pages_bootmem(struct page *page, unsigned long pfn, + unsigned int order) { unsigned int nr_pages = 1 << order; struct page *p = page; @@ -975,7 +1022,8 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, kasan_alloc_pages(page, order); if (gfp_flags & __GFP_ZERO) - prep_zero_page(page, order, gfp_flags); + for (i = 0; i < (1 << order); i++) + clear_highpage(page + i); if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); @@ -2322,48 +2370,6 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) show_mem(filter); } -static inline int -should_alloc_retry(gfp_t gfp_mask, unsigned int order, - unsigned long did_some_progress, - unsigned long pages_reclaimed) -{ - /* Do not loop if specifically requested */ - if (gfp_mask & __GFP_NORETRY) - return 0; - - /* Always retry if specifically requested */ - if (gfp_mask & __GFP_NOFAIL) - return 1; - - /* - * Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim - * making forward progress without invoking OOM. Suspend also disables - * storage devices so kswapd will not help. Bail if we are suspending. - */ - if (!did_some_progress && pm_suspended_storage()) - return 0; - - /* - * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER - * means __GFP_NOFAIL, but that may not be true in other - * implementations. - */ - if (order <= PAGE_ALLOC_COSTLY_ORDER) - return 1; - - /* - * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is - * specified, then we retry until we no longer reclaim any pages - * (above), or we've reclaimed an order of pages at least as - * large as the allocation's order. In both cases, if the - * allocation still fails, we stop retrying. - */ - if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order)) - return 1; - - return 0; -} - static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac, unsigned long *did_some_progress) @@ -2373,10 +2379,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, *did_some_progress = 0; /* - * Acquire the per-zone oom lock for each zone. If that - * fails, somebody else is making progress for us. + * Acquire the oom lock. If that fails, somebody else is + * making progress for us. */ - if (!oom_zonelist_trylock(ac->zonelist, gfp_mask)) { + if (!mutex_trylock(&oom_lock)) { *did_some_progress = 1; schedule_timeout_uninterruptible(1); return NULL; @@ -2402,16 +2408,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, /* The OOM killer does not needlessly kill tasks for lowmem */ if (ac->high_zoneidx < ZONE_NORMAL) goto out; - /* The OOM killer does not compensate for light reclaim */ + /* The OOM killer does not compensate for IO-less reclaim */ if (!(gfp_mask & __GFP_FS)) { /* * XXX: Page reclaim didn't yield anything, * and the OOM killer can't be invoked, but - * keep looping as per should_alloc_retry(). + * keep looping as per tradition. */ *did_some_progress = 1; goto out; } + if (pm_suspended_storage()) + goto out; /* The OOM killer may not free memory on a specific node */ if (gfp_mask & __GFP_THISNODE) goto out; @@ -2421,7 +2429,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) *did_some_progress = 1; out: - oom_zonelist_unlock(ac->zonelist, gfp_mask); + mutex_unlock(&oom_lock); return page; } @@ -2794,40 +2802,40 @@ retry: if (page) goto got_pg; - /* Check if we should retry the allocation */ + /* Do not loop if specifically requested */ + if (gfp_mask & __GFP_NORETRY) + goto noretry; + + /* Keep reclaiming pages as long as there is reasonable progress */ pages_reclaimed += did_some_progress; - if (should_alloc_retry(gfp_mask, order, did_some_progress, - pages_reclaimed)) { - /* - * If we fail to make progress by freeing individual - * pages, but the allocation wants us to keep going, - * start OOM killing tasks. - */ - if (!did_some_progress) { - page = __alloc_pages_may_oom(gfp_mask, order, ac, - &did_some_progress); - if (page) - goto got_pg; - if (!did_some_progress) - goto nopage; - } + if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) || + ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) { /* Wait for some write requests to complete then retry */ wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50); goto retry; - } else { - /* - * High-order allocations do not necessarily loop after - * direct reclaim and reclaim/compaction depends on compaction - * being called after reclaim so call directly if necessary - */ - page = __alloc_pages_direct_compact(gfp_mask, order, - alloc_flags, ac, migration_mode, - &contended_compaction, - &deferred_compaction); - if (page) - goto got_pg; } + /* Reclaim has failed us, start killing things */ + page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); + if (page) + goto got_pg; + + /* Retry as long as the OOM killer is making progress */ + if (did_some_progress) + goto retry; + +noretry: + /* + * High-order allocations do not necessarily loop after + * direct reclaim and reclaim/compaction depends on compaction + * being called after reclaim so call directly if necessary + */ + page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, + ac, migration_mode, + &contended_compaction, + &deferred_compaction); + if (page) + goto got_pg; nopage: warn_alloc_failed(gfp_mask, order, NULL); got_pg: @@ -4265,7 +4273,6 @@ static void setup_zone_migrate_reserve(struct zone *zone) void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, unsigned long start_pfn, enum memmap_context context) { - struct page *page; unsigned long end_pfn = start_pfn + size; unsigned long pfn; struct zone *z; @@ -4286,38 +4293,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (!early_pfn_in_nid(pfn, nid)) continue; } - page = pfn_to_page(pfn); - set_page_links(page, zone, nid, pfn); - mminit_verify_page_links(page, zone, nid, pfn); - init_page_count(page); - page_mapcount_reset(page); - page_cpupid_reset_last(page); - SetPageReserved(page); - /* - * Mark the block movable so that blocks are reserved for - * movable at startup. This will force kernel allocations - * to reserve their blocks rather than leaking throughout - * the address space during boot when many long-lived - * kernel allocations are made. Later some blocks near - * the start are marked MIGRATE_RESERVE by - * setup_zone_migrate_reserve() - * - * bitmap is created for zone's valid pfn range. but memmap - * can be created for invalid pages (for alignment) - * check here not to call set_pageblock_migratetype() against - * pfn out of zone. - */ - if ((z->zone_start_pfn <= pfn) - && (pfn < zone_end_pfn(z)) - && !(pfn & (pageblock_nr_pages - 1))) - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - - INIT_LIST_HEAD(&page->lru); -#ifdef WANT_PAGE_VIRTUAL - /* The shift won't overflow because ZONE_NORMAL is below 4G. */ - if (!is_highmem_idx(zone)) - set_page_address(page, __va(pfn << PAGE_SHIFT)); -#endif + __init_single_pfn(pfn, zone, nid); } } @@ -4867,22 +4843,28 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { - unsigned long realtotalpages, totalpages = 0; + unsigned long realtotalpages = 0, totalpages = 0; enum zone_type i; - for (i = 0; i < MAX_NR_ZONES; i++) - totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, - node_start_pfn, - node_end_pfn, - zones_size); - pgdat->node_spanned_pages = totalpages; - - realtotalpages = totalpages; - for (i = 0; i < MAX_NR_ZONES; i++) - realtotalpages -= - zone_absent_pages_in_node(pgdat->node_id, i, + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zone *zone = pgdat->node_zones + i; + unsigned long size, real_size; + + size = zone_spanned_pages_in_node(pgdat->node_id, i, + node_start_pfn, + node_end_pfn, + zones_size); + real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, node_start_pfn, node_end_pfn, zholes_size); + zone->spanned_pages = size; + zone->present_pages = real_size; + + totalpages += size; + realtotalpages += real_size; + } + + pgdat->node_spanned_pages = totalpages; pgdat->node_present_pages = realtotalpages; printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); @@ -4992,8 +4974,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, * NOTE: pgdat should get zeroed by caller. */ static void __paginginit free_area_init_core(struct pglist_data *pgdat, - unsigned long node_start_pfn, unsigned long node_end_pfn, - unsigned long *zones_size, unsigned long *zholes_size) + unsigned long node_start_pfn, unsigned long node_end_pfn) { enum zone_type j; int nid = pgdat->node_id; @@ -5014,12 +4995,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, freesize, memmap_pages; - size = zone_spanned_pages_in_node(nid, j, node_start_pfn, - node_end_pfn, zones_size); - realsize = freesize = size - zone_absent_pages_in_node(nid, j, - node_start_pfn, - node_end_pfn, - zholes_size); + size = zone->spanned_pages; + realsize = freesize = zone->present_pages; /* * Adjust freesize so that it accounts for how much memory @@ -5054,8 +5031,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, nr_kernel_pages -= memmap_pages; nr_all_pages += freesize; - zone->spanned_pages = size; - zone->present_pages = realsize; /* * Set an approximate value for lowmem here, it will be adjusted * when the bootmem allocator frees pages into the buddy system. @@ -5161,8 +5136,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, (unsigned long)pgdat->node_mem_map); #endif - free_area_init_core(pgdat, start_pfn, end_pfn, - zones_size, zholes_size); + free_area_init_core(pgdat, start_pfn, end_pfn); } #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP @@ -6111,9 +6085,9 @@ out: return ret; } +#ifdef CONFIG_NUMA int hashdist = HASHDIST_DEFAULT; -#ifdef CONFIG_NUMA static int __init set_hashdist(char *str) { if (!str)