X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=bf2f6cff1d6aaeb78097e90efb78f5c6dfc61b32;hb=450efcfd2e1d941e302a8c89322fbfcef237be98;hp=75133e1dc4b1c624f2a813f9cfeb9e0305ecf407;hpb=9c7cd6877cf8db15269163deda69392263124c1e;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 75133e1..bf2f6cf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -131,6 +132,10 @@ static unsigned long __initdata dma_reserve; int __initdata nr_nodemap_entries; unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE + unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES]; + unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES]; +#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ #ifdef CONFIG_DEBUG_VM @@ -491,17 +496,16 @@ static void __free_pages_ok(struct page *page, unsigned int order) int i; int reserved = 0; - arch_free_page(page, order); - if (!PageHighMem(page)) - debug_check_no_locks_freed(page_address(page), - PAGE_SIZE<mapping = NULL; if (free_pages_check(page)) return; + if (!PageHighMem(page)) + debug_check_no_locks_freed(page_address(page), PAGE_SIZE); + arch_free_page(page, 0); kernel_map_pages(page, 1, 0); pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; @@ -848,7 +853,7 @@ again: pcp = &zone_pcp(zone, cpu)->pcp[cold]; local_irq_save(flags); if (!pcp->count) { - pcp->count += rmqueue_bulk(zone, 0, + pcp->count = rmqueue_bulk(zone, 0, pcp->batch, &pcp->list); if (unlikely(!pcp->count)) goto failed; @@ -896,7 +901,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags) { /* free_pages my go negative - that's OK */ - long min = mark, free_pages = z->free_pages - (1 << order) + 1; + unsigned long min = mark; + long free_pages = z->free_pages - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -938,7 +944,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, */ do { zone = *z; - if (unlikely((gfp_mask & __GFP_THISNODE) && + if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) && zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) break; if ((alloc_flags & ALLOC_CPUSET) && @@ -1045,7 +1051,7 @@ nofail_alloc: if (page) goto got_pg; if (gfp_mask & __GFP_NOFAIL) { - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto nofail_alloc; } } @@ -1108,7 +1114,7 @@ rebalance: do_retry = 1; } if (do_retry) { - blk_congestion_wait(WRITE, HZ/50); + congestion_wait(WRITE, HZ/50); goto rebalance; } @@ -1252,14 +1258,12 @@ unsigned int nr_free_pagecache_pages(void) { return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER)); } -#ifdef CONFIG_NUMA -static void show_node(struct zone *zone) + +static inline void show_node(struct zone *zone) { - printk("Node %ld ", zone_to_nid(zone)); + if (NUMA_BUILD) + printk("Node %ld ", zone_to_nid(zone)); } -#else -#define show_node(zone) do { } while (0) -#endif void si_meminfo(struct sysinfo *val) { @@ -1301,34 +1305,30 @@ void si_meminfo_node(struct sysinfo *val, int nid) */ void show_free_areas(void) { - int cpu, temperature; + int cpu; unsigned long active; unsigned long inactive; unsigned long free; struct zone *zone; for_each_zone(zone) { - show_node(zone); - printk("%s per-cpu:", zone->name); - - if (!populated_zone(zone)) { - printk(" empty\n"); + if (!populated_zone(zone)) continue; - } else - printk("\n"); + + show_node(zone); + printk("%s per-cpu:\n", zone->name); for_each_online_cpu(cpu) { struct per_cpu_pageset *pageset; pageset = zone_pcp(zone, cpu); - for (temperature = 0; temperature < 2; temperature++) - printk("cpu %d %s: high %d, batch %d used:%d\n", - cpu, - temperature ? "cold" : "hot", - pageset->pcp[temperature].high, - pageset->pcp[temperature].batch, - pageset->pcp[temperature].count); + printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d " + "Cold: hi:%5d, btch:%4d usd:%4d\n", + cpu, pageset->pcp[0].high, + pageset->pcp[0].batch, pageset->pcp[0].count, + pageset->pcp[1].high, pageset->pcp[1].batch, + pageset->pcp[1].count); } } @@ -1350,6 +1350,9 @@ void show_free_areas(void) for_each_zone(zone) { int i; + if (!populated_zone(zone)) + continue; + show_node(zone); printk("%s" " free:%lukB" @@ -1382,12 +1385,11 @@ void show_free_areas(void) for_each_zone(zone) { unsigned long nr[MAX_ORDER], flags, order, total = 0; + if (!populated_zone(zone)) + continue; + show_node(zone); printk("%s: ", zone->name); - if (!populated_zone(zone)) { - printk("empty\n"); - continue; - } spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { @@ -1591,7 +1593,7 @@ static int __meminit __build_all_zonelists(void *dummy) void __meminit build_all_zonelists(void) { if (system_state == SYSTEM_BOOTING) { - __build_all_zonelists(0); + __build_all_zonelists(NULL); cpuset_init_current_mems_allowed(); } else { /* we have to stop all cpus to guaranntee there is no user @@ -1687,6 +1689,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, for (pfn = start_pfn; pfn < end_pfn; pfn++) { if (!early_pfn_valid(pfn)) continue; + if (!early_pfn_in_nid(pfn, nid)) + continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); init_page_count(page); @@ -1829,6 +1833,9 @@ static int __cpuinit process_zones(int cpu) for_each_zone(zone) { + if (!populated_zone(zone)) + continue; + zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), GFP_KERNEL, cpu_to_node(cpu)); if (!zone_pcp(zone, cpu)) @@ -2047,8 +2054,8 @@ int __init early_pfn_to_nid(unsigned long pfn) /** * free_bootmem_with_active_regions - Call free_bootmem_node for each active range - * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed - * @max_low_pfn: The highest PFN that till be passed to free_bootmem_node + * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. + * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node * * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this @@ -2078,11 +2085,11 @@ void __init free_bootmem_with_active_regions(int nid, /** * sparse_memory_present_with_active_regions - Call memory_present for each active range - * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used + * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. * * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this - * this function may be used instead of calling memory_present() manually. + * function may be used instead of calling memory_present() manually. */ void __init sparse_memory_present_with_active_regions(int nid) { @@ -2095,15 +2102,71 @@ void __init sparse_memory_present_with_active_regions(int nid) } /** + * push_node_boundaries - Push node boundaries to at least the requested boundary + * @nid: The nid of the node to push the boundary for + * @start_pfn: The start pfn of the node + * @end_pfn: The end pfn of the node + * + * In reserve-based hot-add, mem_map is allocated that is unused until hotadd + * time. Specifically, on x86_64, SRAT will report ranges that can potentially + * be hotplugged even though no physical memory exists. This function allows + * an arch to push out the node boundaries so mem_map is allocated that can + * be used later. + */ +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE +void __init push_node_boundaries(unsigned int nid, + unsigned long start_pfn, unsigned long end_pfn) +{ + printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n", + nid, start_pfn, end_pfn); + + /* Initialise the boundary for this node if necessary */ + if (node_boundary_end_pfn[nid] == 0) + node_boundary_start_pfn[nid] = -1UL; + + /* Update the boundaries */ + if (node_boundary_start_pfn[nid] > start_pfn) + node_boundary_start_pfn[nid] = start_pfn; + if (node_boundary_end_pfn[nid] < end_pfn) + node_boundary_end_pfn[nid] = end_pfn; +} + +/* If necessary, push the node boundary out for reserve hotadd */ +static void __init account_node_boundary(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n", + nid, *start_pfn, *end_pfn); + + /* Return if boundary information has not been provided */ + if (node_boundary_end_pfn[nid] == 0) + return; + + /* Check the boundaries and update if necessary */ + if (node_boundary_start_pfn[nid] < *start_pfn) + *start_pfn = node_boundary_start_pfn[nid]; + if (node_boundary_end_pfn[nid] > *end_pfn) + *end_pfn = node_boundary_end_pfn[nid]; +} +#else +void __init push_node_boundaries(unsigned int nid, + unsigned long start_pfn, unsigned long end_pfn) {} + +static void __init account_node_boundary(unsigned int nid, + unsigned long *start_pfn, unsigned long *end_pfn) {} +#endif + + +/** * get_pfn_range_for_nid - Return the start and end page frames for a node - * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned - * @start_pfn: Passed by reference. On return, it will have the node start_pfn - * @end_pfn: Passed by reference. On return, it will have the node end_pfn + * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. + * @start_pfn: Passed by reference. On return, it will have the node start_pfn. + * @end_pfn: Passed by reference. On return, it will have the node end_pfn. * * It returns the start and end page frame of a node based on information * provided by an arch calling add_active_range(). If called for a node * with no available memory, a warning is printed and the start and end - * PFNs will be 0 + * PFNs will be 0. */ void __init get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) @@ -2121,6 +2184,9 @@ void __init get_pfn_range_for_nid(unsigned int nid, printk(KERN_WARNING "Node %u active with no memory\n", nid); *start_pfn = 0; } + + /* Push the node boundaries out if requested */ + account_node_boundary(nid, start_pfn, end_pfn); } /* @@ -2153,7 +2219,7 @@ unsigned long __init zone_spanned_pages_in_node(int nid, /* * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, - * then all holes in the requested range will be accounted for + * then all holes in the requested range will be accounted for. */ unsigned long __init __absent_pages_in_range(int nid, unsigned long range_start_pfn, @@ -2195,7 +2261,7 @@ unsigned long __init __absent_pages_in_range(int nid, /* Account for ranges past physical memory on this node */ if (range_end_pfn > prev_end_pfn) - hole_pages = range_end_pfn - + hole_pages += range_end_pfn - max(range_start_pfn, prev_end_pfn); return hole_pages; @@ -2206,7 +2272,7 @@ unsigned long __init __absent_pages_in_range(int nid, * @start_pfn: The start PFN to start searching for holes * @end_pfn: The end PFN to stop searching for holes * - * It returns the number of pages frames in memory holes within a range + * It returns the number of pages frames in memory holes within a range. */ unsigned long __init absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn) @@ -2231,19 +2297,6 @@ unsigned long __init zone_absent_pages_in_node(int nid, return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); } -/* Return the zone index a PFN is in */ -int memmap_zone_idx(struct page *lmem_map) -{ - int i; - unsigned long phys_addr = virt_to_phys(lmem_map); - unsigned long pfn = phys_addr >> PAGE_SHIFT; - - for (i = 0; i < MAX_NR_ZONES; i++) - if (pfn < arch_zone_highest_possible_pfn[i]) - break; - - return i; -} #else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, @@ -2262,10 +2315,6 @@ static inline unsigned long zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } -static inline int memmap_zone_idx(struct page *lmem_map) -{ - return MAX_NR_ZONES; -} #endif static void __init calculate_node_totalpages(struct pglist_data *pgdat, @@ -2346,6 +2395,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->spanned_pages = size; zone->present_pages = realsize; #ifdef CONFIG_NUMA + zone->node = nid; zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) / 100; zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; @@ -2357,7 +2407,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->zone_pgdat = pgdat; zone->free_pages = 0; - zone->temp_priority = zone->prev_priority = DEF_PRIORITY; + zone->prev_priority = DEF_PRIORITY; zone_pcp_init(zone); INIT_LIST_HEAD(&zone->active_list); @@ -2519,14 +2569,19 @@ void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, /** * remove_all_active_ranges - Remove all currently registered regions + * * During discovery, it may be found that a table like SRAT is invalid * and an alternative discovery method must be used. This function removes * all currently registered regions. */ -void __init remove_all_active_ranges() +void __init remove_all_active_ranges(void) { memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0; +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE + memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); + memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); +#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ } /* Compare two active node_active_regions */ @@ -2569,7 +2624,7 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid) * find_min_pfn_with_active_regions - Find the minimum PFN registered * * It returns the minimum PFN based on information provided via - * add_active_range() + * add_active_range(). */ unsigned long __init find_min_pfn_with_active_regions(void) { @@ -2580,7 +2635,7 @@ unsigned long __init find_min_pfn_with_active_regions(void) * find_max_pfn_with_active_regions - Find the maximum PFN registered * * It returns the maximum PFN based on information provided via - * add_active_range() + * add_active_range(). */ unsigned long __init find_max_pfn_with_active_regions(void) { @@ -2595,10 +2650,7 @@ unsigned long __init find_max_pfn_with_active_regions(void) /** * free_area_init_nodes - Initialise all pg_data_t and zone data - * @arch_max_dma_pfn: The maximum PFN usable for ZONE_DMA - * @arch_max_dma32_pfn: The maximum PFN usable for ZONE_DMA32 - * @arch_max_low_pfn: The maximum PFN usable for ZONE_NORMAL - * @arch_max_high_pfn: The maximum PFN usable for ZONE_HIGHMEM + * @max_zone_pfn: an array of max PFNs for each zone * * This will call free_area_init_node() for each active node in the system. * Using the page ranges provided by add_active_range(), the size of each @@ -2656,14 +2708,15 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ /** - * set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA - * @new_dma_reserve - The number of pages to mark reserved + * set_dma_reserve - set the specified number of pages reserved in the first zone + * @new_dma_reserve: The number of pages to mark reserved * * The per-cpu batchsize and zone watermarks are determined by present_pages. * In the DMA zone, a significant percentage may be consumed by kernel image * and other unfreeable allocations which can skew the watermarks badly. This - * function may optionally be used to account for unfreeable pages in - * ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize + * function may optionally be used to account for unfreeable pages in the + * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and + * smaller per-cpu batchsize. */ void __init set_dma_reserve(unsigned long new_dma_reserve) { @@ -2776,10 +2829,11 @@ static void setup_per_zone_lowmem_reserve(void) calculate_totalreserve_pages(); } -/* - * setup_per_zone_pages_min - called when min_free_kbytes changes. Ensures - * that the pages_{min,low,high} values for each zone are set correctly - * with respect to min_free_kbytes. +/** + * setup_per_zone_pages_min - called when min_free_kbytes changes. + * + * Ensures that the pages_{min,low,high} values for each zone are set correctly + * with respect to min_free_kbytes. */ void setup_per_zone_pages_min(void) { @@ -3068,3 +3122,19 @@ unsigned long page_to_pfn(struct page *page) EXPORT_SYMBOL(pfn_to_page); EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ + +#if MAX_NUMNODES > 1 +/* + * Find the highest possible node id. + */ +int highest_possible_node_id(void) +{ + unsigned int node; + unsigned int highest = 0; + + for_each_node_mask(node, node_possible_map) + highest = node; + return highest; +} +EXPORT_SYMBOL(highest_possible_node_id); +#endif