X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=20759803a64ad151ef7d49f85308ff99b2b5c073;hb=2f66a68f3fac2e94da360c342ff78ab45553f86c;hp=2f457a756d4678cadc46f012af8691db8da32826;hpb=fa5e084e43eb14c14942027e1e2e894aeed96097;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2f457a7..2075980 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; unsigned long highest_memmap_pfn __read_mostly; int percpu_pagelist_fraction; +gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE int pageblock_order __read_mostly; @@ -486,7 +488,6 @@ static inline void __free_one_page(struct page *page, */ static inline void free_page_mlock(struct page *page) { - __ClearPageMlocked(page); __dec_zone_page_state(page, NR_MLOCK); __count_vm_event(UNEVICTABLE_MLOCKFREED); } @@ -556,7 +557,9 @@ static void __free_pages_ok(struct page *page, unsigned int order) unsigned long flags; int i; int bad = 0; - int clearMlocked = PageMlocked(page); + int wasMlocked = __TestClearPageMlocked(page); + + kmemcheck_free_shadow(page, order); for (i = 0 ; i < (1 << order) ; ++i) bad += free_pages_check(page + i); @@ -572,7 +575,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) kernel_map_pages(page, 1 << order, 0); local_irq_save(flags); - if (unlikely(clearMlocked)) + if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, order, @@ -780,6 +783,17 @@ static int move_freepages_block(struct zone *zone, struct page *page, return move_freepages(zone, start_page, end_page, migratetype); } +static void change_pageblock_range(struct page *pageblock_page, + int start_order, int migratetype) +{ + int nr_pageblocks = 1 << (start_order - pageblock_order); + + while (nr_pageblocks--) { + set_pageblock_migratetype(pageblock_page, migratetype); + pageblock_page += pageblock_nr_pages; + } +} + /* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) @@ -814,13 +828,15 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * agressive about taking ownership of free pages */ if (unlikely(current_order >= (pageblock_order >> 1)) || - start_migratetype == MIGRATE_RECLAIMABLE) { + start_migratetype == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled) { unsigned long pages; pages = move_freepages_block(zone, page, start_migratetype); /* Claim the whole block if over half of it is free */ - if (pages >= (1 << (pageblock_order-1))) + if (pages >= (1 << (pageblock_order-1)) || + page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_migratetype); @@ -831,8 +847,9 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) list_del(&page->lru); rmv_page_order(page); - if (current_order == pageblock_order) - set_pageblock_migratetype(page, + /* Take ownership for orders >= pageblock_order */ + if (current_order >= pageblock_order) + change_pageblock_range(page, current_order, start_migratetype); expand(zone, page, order, current_order, area, migratetype); @@ -879,7 +896,7 @@ retry_reserve: */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, - int migratetype) + int migratetype, int cold) { int i; @@ -898,7 +915,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * merge IO requests if the physical pages are ordered * properly. */ - list_add(&page->lru, list); + if (likely(cold == 0)) + list_add(&page->lru, list); + else + list_add_tail(&page->lru, list); set_page_private(page, migratetype); list = &page->lru; } @@ -1018,7 +1038,9 @@ static void free_hot_cold_page(struct page *page, int cold) struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags; - int clearMlocked = PageMlocked(page); + int wasMlocked = __TestClearPageMlocked(page); + + kmemcheck_free_shadow(page, 0); if (PageAnon(page)) page->mapping = NULL; @@ -1035,7 +1057,7 @@ static void free_hot_cold_page(struct page *page, int cold) pcp = &zone_pcp(zone, get_cpu())->pcp; set_page_private(page, get_pageblock_migratetype(page)); local_irq_save(flags); - if (unlikely(clearMlocked)) + if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_event(PGFREE); @@ -1076,6 +1098,16 @@ void split_page(struct page *page, unsigned int order) VM_BUG_ON(PageCompound(page)); VM_BUG_ON(!page_count(page)); + +#ifdef CONFIG_KMEMCHECK + /* + * Split shadow pages too, because free(page[0]) would + * otherwise free the whole shadow. + */ + if (kmemcheck_page_is_tracked(page)) + split_page(virt_to_page(page[0].shadow), order); +#endif + for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); } @@ -1104,7 +1136,8 @@ again: local_irq_save(flags); if (!pcp->count) { pcp->count = rmqueue_bulk(zone, 0, - pcp->batch, &pcp->list, migratetype); + pcp->batch, &pcp->list, + migratetype, cold); if (unlikely(!pcp->count)) goto failed; } @@ -1122,9 +1155,20 @@ again: /* Allocate more to the pcp list if necessary */ if (unlikely(&page->lru == &pcp->list)) { + int get_one_page = 0; + pcp->count += rmqueue_bulk(zone, 0, - pcp->batch, &pcp->list, migratetype); - page = list_entry(pcp->list.next, struct page, lru); + pcp->batch, &pcp->list, + migratetype, cold); + list_for_each_entry(page, &pcp->list, lru) { + if (get_pageblock_migratetype(page) != + MIGRATE_ISOLATE) { + get_one_page = 1; + break; + } + } + if (!get_one_page) + goto failed; } list_del(&page->lru); @@ -1138,10 +1182,10 @@ again: * properly detect and handle allocation failures. * * We most definitely don't want callers attempting to - * allocate greater than single-page units with + * allocate greater than order-1 page units with * __GFP_NOFAIL. */ - WARN_ON_ONCE(order > 0); + WARN_ON_ONCE(order > 1); } spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order, migratetype); @@ -1605,10 +1649,6 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); - - /* - * The task's cpuset might have expanded its set of allowable nodes - */ p->flags |= PF_MEMALLOC; lockdep_set_current_reclaim_state(gfp_mask); reclaim_state.reclaimed_slab = 0; @@ -1651,7 +1691,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, preferred_zone, migratetype); if (!page && gfp_mask & __GFP_NOFAIL) - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); } while (!page && (gfp_mask & __GFP_NOFAIL)); return page; @@ -1725,8 +1765,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * be using allocators in order of preference for an area that is * too large. */ - if (WARN_ON_ONCE(order >= MAX_ORDER)) + if (order >= MAX_ORDER) { + WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); return NULL; + } /* * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and @@ -1741,6 +1783,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, wake_all_kswapd(order, zonelist, high_zoneidx); +restart: /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according @@ -1748,7 +1791,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, */ alloc_flags = gfp_to_alloc_flags(gfp_mask); -restart: /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, @@ -1774,6 +1816,10 @@ rebalance: if (p->flags & PF_MEMALLOC) goto nopage; + /* Avoid allocations with no watermarks from looping endlessly */ + if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) + goto nopage; + /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, zonelist, high_zoneidx, @@ -1816,7 +1862,7 @@ rebalance: pages_reclaimed += did_some_progress; if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { /* Wait for some write requests to complete then retry */ - congestion_wait(WRITE, HZ/50); + congestion_wait(BLK_RW_ASYNC, HZ/50); goto rebalance; } @@ -1828,7 +1874,10 @@ nopage: dump_stack(); show_mem(); } + return page; got_pg: + if (kmemcheck_enabled) + kmemcheck_pagealloc_alloc(page, order, gfp_mask); return page; } @@ -1845,6 +1894,8 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct page *page; int migratetype = allocflags_to_migratetype(gfp_mask); + gfp_mask &= gfp_allowed_mask; + lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT); @@ -1883,31 +1934,25 @@ EXPORT_SYMBOL(__alloc_pages_nodemask); */ unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { - struct page * page; + struct page *page; + + /* + * __get_free_pages() returns a 32-bit address, which cannot represent + * a highmem page + */ + VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); + page = alloc_pages(gfp_mask, order); if (!page) return 0; return (unsigned long) page_address(page); } - EXPORT_SYMBOL(__get_free_pages); unsigned long get_zeroed_page(gfp_t gfp_mask) { - struct page * page; - - /* - * get_zeroed_page() returns a 32-bit address, which cannot represent - * a highmem page - */ - VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); - - page = alloc_pages(gfp_mask | __GFP_ZERO, 0); - if (page) - return (unsigned long) page_address(page); - return 0; + return __get_free_pages(gfp_mask | __GFP_ZERO, 0); } - EXPORT_SYMBOL(get_zeroed_page); void __pagevec_free(struct pagevec *pvec) @@ -1963,7 +2008,7 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) unsigned long alloc_end = addr + (PAGE_SIZE << order); unsigned long used = addr + PAGE_ALIGN(size); - split_page(virt_to_page(addr), order); + split_page(virt_to_page((void *)addr), order); while (used < alloc_end) { free_page(used); used += PAGE_SIZE; @@ -2095,23 +2140,28 @@ void show_free_areas(void) } } - printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" - " inactive_file:%lu" + printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" + " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu" - " dirty:%lu writeback:%lu unstable:%lu\n" - " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", + " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" + " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" + " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), - global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), + global_page_state(NR_ISOLATED_ANON), + global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_FILE), + global_page_state(NR_ISOLATED_FILE), global_page_state(NR_UNEVICTABLE), global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), + nr_blockdev_pages(), global_page_state(NR_FREE_PAGES), - global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE), + global_page_state(NR_SLAB_RECLAIMABLE), + global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), + global_page_state(NR_SHMEM), global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE)); @@ -2129,7 +2179,21 @@ void show_free_areas(void) " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" + " isolated(anon):%lukB" + " isolated(file):%lukB" " present:%lukB" + " mlocked:%lukB" + " dirty:%lukB" + " writeback:%lukB" + " mapped:%lukB" + " shmem:%lukB" + " slab_reclaimable:%lukB" + " slab_unreclaimable:%lukB" + " kernel_stack:%lukB" + " pagetables:%lukB" + " unstable:%lukB" + " bounce:%lukB" + " writeback_tmp:%lukB" " pages_scanned:%lu" " all_unreclaimable? %s" "\n", @@ -2143,7 +2207,22 @@ void show_free_areas(void) K(zone_page_state(zone, NR_ACTIVE_FILE)), K(zone_page_state(zone, NR_INACTIVE_FILE)), K(zone_page_state(zone, NR_UNEVICTABLE)), + K(zone_page_state(zone, NR_ISOLATED_ANON)), + K(zone_page_state(zone, NR_ISOLATED_FILE)), K(zone->present_pages), + K(zone_page_state(zone, NR_MLOCK)), + K(zone_page_state(zone, NR_FILE_DIRTY)), + K(zone_page_state(zone, NR_WRITEBACK)), + K(zone_page_state(zone, NR_FILE_MAPPED)), + K(zone_page_state(zone, NR_SHMEM)), + K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)), + K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)), + zone_page_state(zone, NR_KERNEL_STACK) * + THREAD_SIZE / 1024, + K(zone_page_state(zone, NR_PAGETABLE)), + K(zone_page_state(zone, NR_UNSTABLE_NFS)), + K(zone_page_state(zone, NR_BOUNCE)), + K(zone_page_state(zone, NR_WRITEBACK_TEMP)), zone->pages_scanned, (zone_is_all_unreclaimable(zone) ? "yes" : "no") ); @@ -2513,7 +2592,6 @@ static void build_zonelists(pg_data_t *pgdat) prev_node = local_node; nodes_clear(used_mask); - memset(node_load, 0, sizeof(node_load)); memset(node_order, 0, sizeof(node_order)); j = 0; @@ -2622,6 +2700,9 @@ static int __build_all_zonelists(void *dummy) { int nid; +#ifdef CONFIG_NUMA + memset(node_load, 0, sizeof(node_load)); +#endif for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); @@ -3006,7 +3087,7 @@ bad: if (dzone == zone) break; kfree(zone_pcp(dzone, cpu)); - zone_pcp(dzone, cpu) = NULL; + zone_pcp(dzone, cpu) = &boot_pageset[cpu]; } return -ENOMEM; } @@ -3021,7 +3102,7 @@ static inline void free_zone_pagesets(int cpu) /* Free per_cpu_pageset if it is slab allocated */ if (pset != &boot_pageset[cpu]) kfree(pset); - zone_pcp(zone, cpu) = NULL; + zone_pcp(zone, cpu) = &boot_pageset[cpu]; } } @@ -3111,6 +3192,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) return 0; } +static int __zone_pcp_update(void *data) +{ + struct zone *zone = data; + int cpu; + unsigned long batch = zone_batchsize(zone), flags; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct per_cpu_pageset *pset; + struct per_cpu_pages *pcp; + + pset = zone_pcp(zone, cpu); + pcp = &pset->pcp; + + local_irq_save(flags); + free_pages_bulk(zone, pcp->count, &pcp->list, 0); + setup_pageset(pset, batch); + local_irq_restore(flags); + } + return 0; +} + +void zone_pcp_update(struct zone *zone) +{ + stop_machine(__zone_pcp_update, zone, NULL); +} + static __meminit void zone_pcp_init(struct zone *zone) { int cpu; @@ -4012,6 +4119,8 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) int i, nid; unsigned long usable_startpfn; unsigned long kernelcore_node, kernelcore_remaining; + /* save the state before borrow the nodemask */ + nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; unsigned long totalpages = early_calculate_totalpages(); int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); @@ -4039,7 +4148,7 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) /* If kernelcore was not specified, there is no ZONE_MOVABLE */ if (!required_kernelcore) - return; + goto out; /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ find_usable_zone_for_movable(); @@ -4138,6 +4247,10 @@ restart: for (nid = 0; nid < MAX_NUMNODES; nid++) zone_movable_pfn[nid] = roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); + +out: + /* restore the node_state */ + node_states[N_HIGH_MEMORY] = saved_node_state; } /* Any regular memory on that node ? */ @@ -4222,11 +4335,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) early_node_map[i].start_pfn, early_node_map[i].end_pfn); - /* - * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init - * that node_mask, clear it at first - */ - nodes_clear(node_states[N_HIGH_MEMORY]); /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); @@ -4473,7 +4581,7 @@ void setup_per_zone_wmarks(void) calculate_totalreserve_pages(); } -/** +/* * The inactive anon list should be small enough that the VM never has to * do too much work, but large enough that each inactive page has a chance * to be referenced again before it is swapped out. @@ -4639,7 +4747,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (!write || (ret == -EINVAL)) return ret; - for_each_zone(zone) { + for_each_populated_zone(zone) { for_each_online_cpu(cpu) { unsigned long high; high = zone->present_pages / percpu_pagelist_fraction; @@ -4724,8 +4832,10 @@ void *__init alloc_large_system_hash(const char *tablename, * some pages at the end of hash table which * alloc_pages_exact() automatically does */ - if (get_order(size) < MAX_ORDER) + if (get_order(size) < MAX_ORDER) { table = alloc_pages_exact(size, GFP_ATOMIC); + kmemleak_alloc(table, size, 1, GFP_ATOMIC); + } } } while (!table && size > PAGE_SIZE && --log2qty); @@ -4743,16 +4853,6 @@ void *__init alloc_large_system_hash(const char *tablename, if (_hash_mask) *_hash_mask = (1 << log2qty) - 1; - /* - * If hashdist is set, the table allocation is done with __vmalloc() - * which invokes the kmemleak_alloc() callback. This function may also - * be called before the slab and kmemleak are initialised when - * kmemleak simply buffers the request to be executed later - * (GFP_ATOMIC flag ignored in this case). - */ - if (!hashdist) - kmemleak_alloc(table, size, 1, GFP_ATOMIC); - return table; } @@ -4846,13 +4946,16 @@ int set_migratetype_isolate(struct page *page) struct zone *zone; unsigned long flags; int ret = -EBUSY; + int zone_idx; zone = page_zone(page); + zone_idx = zone_idx(zone); spin_lock_irqsave(&zone->lock, flags); /* * In future, more migrate types will be able to be isolation target. */ - if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE) + if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && + zone_idx != ZONE_MOVABLE) goto out; set_pageblock_migratetype(page, MIGRATE_ISOLATE); move_freepages_block(zone, page, MIGRATE_ISOLATE);