#include <linux/page-isolation.h>
#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
+#include <linux/kmemleak.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
+unsigned long highest_memmap_pfn __read_mostly;
int percpu_pagelist_fraction;
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
static int __meminitdata nr_nodemap_entries;
static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
- static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
- static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
static unsigned long __initdata required_kernelcore;
static unsigned long __initdata required_movablecore;
static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
static void bad_page(struct page *page)
{
- printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
- "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
- current->comm, page, (int)(2*sizeof(unsigned long)),
- (unsigned long)page->flags, page->mapping,
- page_mapcount(page), page_count(page));
+ static unsigned long resume;
+ static unsigned long nr_shown;
+ static unsigned long nr_unshown;
+
+ /*
+ * Allow a burst of 60 reports, then keep quiet for that minute;
+ * or allow a steady drip of one report per second.
+ */
+ if (nr_shown == 60) {
+ if (time_before(jiffies, resume)) {
+ nr_unshown++;
+ goto out;
+ }
+ if (nr_unshown) {
+ printk(KERN_ALERT
+ "BUG: Bad page state: %lu messages suppressed\n",
+ nr_unshown);
+ nr_unshown = 0;
+ }
+ nr_shown = 0;
+ }
+ if (nr_shown++ == 0)
+ resume = jiffies + 60 * HZ;
+
+ printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n",
+ current->comm, page_to_pfn(page));
+ printk(KERN_ALERT
+ "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
+ page, (void *)page->flags, page_count(page),
+ page_mapcount(page), page->mapping, page->index);
- printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
- KERN_EMERG "Backtrace:\n");
dump_stack();
- set_page_count(page, 0);
- reset_page_mapcount(page);
- page->mapping = NULL;
+out:
+ /* Leave bad fields for debug, except PageBuddy could make trouble */
+ __ClearPageBuddy(page);
add_taint(TAINT_BAD_PAGE);
}
}
#endif
-static void destroy_compound_page(struct page *page, unsigned long order)
+static int destroy_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
+ int bad = 0;
- if (unlikely(compound_order(page) != order))
+ if (unlikely(compound_order(page) != order) ||
+ unlikely(!PageHead(page))) {
bad_page(page);
+ bad++;
+ }
- if (unlikely(!PageHead(page)))
- bad_page(page);
__ClearPageHead(page);
+
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
- if (unlikely(!PageTail(p) |
- (p->first_page != page)))
+ if (unlikely(!PageTail(p) || (p->first_page != page))) {
bad_page(page);
+ bad++;
+ }
__ClearPageTail(p);
}
+
+ return bad;
}
static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
int migratetype = get_pageblock_migratetype(page);
if (unlikely(PageCompound(page)))
- destroy_compound_page(page, order);
+ if (unlikely(destroy_compound_page(page, order)))
+ return;
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
(page_count(page) != 0) |
- (page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
+ (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) {
bad_page(page);
- /*
- * For now, we report if PG_reserved was found set, but do not
- * clear it, and do not free the page. But we shall soon need
- * to do more, for when the ZERO_PAGE count wraps negative.
- */
- if (PageReserved(page))
return 1;
+ }
if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
return 0;
{
unsigned long flags;
int i;
- int reserved = 0;
+ int bad = 0;
for (i = 0 ; i < (1 << order) ; ++i)
- reserved += free_pages_check(page + i);
- if (reserved)
+ bad += free_pages_check(page + i);
+ if (bad)
return;
if (!PageHighMem(page)) {
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
(page_count(page) != 0) |
- (page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
+ (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) {
bad_page(page);
-
- /*
- * For now, we report if PG_reserved was found set, but do not
- * clear it, and do not allocate the page: as a safety net.
- */
- if (PageReserved(page))
return 1;
+ }
- page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
set_page_private(page, 0);
set_page_refcounted(page);
unsigned long flags;
struct zone *zone;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
- if (!populated_zone(zone))
- continue;
-
pset = zone_pcp(zone, cpu);
pcp = &pset->pcp;
unsigned long did_some_progress;
unsigned long pages_reclaimed = 0;
+ lockdep_trace_alloc(gfp_mask);
+
might_sleep_if(wait);
if (should_fail_alloc_page(gfp_mask, order))
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
- /*
- * The task's cpuset might have expanded its set of allowable nodes
- */
- cpuset_update_task_memory_state();
+
p->flags |= PF_MEMALLOC;
+
+ lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+ did_some_progress = try_to_free_pages(zonelist, order,
+ gfp_mask, nodemask);
p->reclaim_state = NULL;
+ lockdep_clear_current_reclaim_state();
p->flags &= ~PF_MEMALLOC;
cond_resched();
int cpu;
struct zone *zone;
- for_each_zone(zone) {
- if (!populated_zone(zone))
- continue;
-
+ for_each_populated_zone(zone) {
show_node(zone);
printk("%s per-cpu:\n", zone->name);
global_page_state(NR_PAGETABLE),
global_page_state(NR_BOUNCE));
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
int i;
- if (!populated_zone(zone))
- continue;
-
show_node(zone);
printk("%s"
" free:%lukB"
printk("\n");
}
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
unsigned long nr[MAX_ORDER], flags, order, total = 0;
- if (!populated_zone(zone))
- continue;
-
show_node(zone);
printk("%s: ", zone->name);
int n, val;
int min_val = INT_MAX;
int best_node = -1;
- node_to_cpumask_ptr(tmp, 0);
+ const struct cpumask *tmp = cpumask_of_node(0);
/* Use the local node if we haven't already */
if (!node_isset(node, *used_node_mask)) {
val += (n < node);
/* Give preference to headless and unused nodes */
- node_to_cpumask_ptr_next(tmp, n);
- if (!cpus_empty(*tmp))
+ tmp = cpumask_of_node(n);
+ if (!cpumask_empty(tmp))
val += PENALTY_FOR_NODE_WITH_CPUS;
/* Slight preference for less loaded node */
unsigned long pfn;
struct zone *z;
+ if (highest_memmap_pfn < end_pfn - 1)
+ highest_memmap_pfn = end_pfn - 1;
+
z = &NODE_DATA(nid)->node_zones[zone];
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
static int zone_batchsize(struct zone *zone)
{
+#ifdef CONFIG_MMU
int batch;
/*
* of pages of one half of the possible page colors
* and the other with pages of the other colors.
*/
- batch = (1 << (fls(batch + batch/2)-1)) - 1;
+ batch = rounddown_pow_of_two(batch + batch/2) - 1;
return batch;
+
+#else
+ /* The deferral and batching of frees should be suppressed under NOMMU
+ * conditions.
+ *
+ * The problem is that NOMMU needs to be able to allocate large chunks
+ * of contiguous memory as there's no hardware page translation to
+ * assemble apparent contiguous memory from discontiguous pages.
+ *
+ * Queueing large contiguous runs of pages for batching, however,
+ * causes the pages to actually be freed in smaller chunks. As there
+ * can be a significant delay between the individual batches being
+ * recycled, this leads to the once large chunks of space being
+ * fragmented and becoming unavailable for high-order allocations.
+ */
+ return 0;
+#endif
}
static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
node_set_state(node, N_CPU); /* this node has a cpu */
- for_each_zone(zone) {
-
- if (!populated_zone(zone))
- continue;
-
+ for_each_populated_zone(zone) {
zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
GFP_KERNEL, node);
if (!zone_pcp(zone, cpu))
* was used and there are no special requirements, this is a convenient
* alternative
*/
-int __meminit early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn)
{
int i;
if (start_pfn <= pfn && pfn < end_pfn)
return early_node_map[i].nid;
}
+ /* This is a memory hole */
+ return -1;
+}
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
+int __meminit early_pfn_to_nid(unsigned long pfn)
+{
+ int nid;
+ nid = __early_pfn_to_nid(pfn);
+ if (nid >= 0)
+ return nid;
+ /* just returns 0 */
return 0;
}
-#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+{
+ int nid;
+
+ nid = __early_pfn_to_nid(pfn);
+ if (nid >= 0 && nid != node)
+ return false;
+ return true;
+}
+#endif
/* Basic iterator support to walk early_node_map[] */
#define for_each_active_range_index_in_nid(i, nid) \
}
/**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
- unsigned long start_pfn, unsigned long end_pfn)
-{
- mminit_dprintk(MMINIT_TRACE, "zoneboundary",
- "Entering push_node_boundaries(%u, %lu, %lu)\n",
- nid, start_pfn, end_pfn);
-
- /* Initialise the boundary for this node if necessary */
- if (node_boundary_end_pfn[nid] == 0)
- node_boundary_start_pfn[nid] = -1UL;
-
- /* Update the boundaries */
- if (node_boundary_start_pfn[nid] > start_pfn)
- node_boundary_start_pfn[nid] = start_pfn;
- if (node_boundary_end_pfn[nid] < end_pfn)
- node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
- unsigned long *start_pfn, unsigned long *end_pfn)
-{
- mminit_dprintk(MMINIT_TRACE, "zoneboundary",
- "Entering account_node_boundary(%u, %lu, %lu)\n",
- nid, *start_pfn, *end_pfn);
-
- /* Return if boundary information has not been provided */
- if (node_boundary_end_pfn[nid] == 0)
- return;
-
- /* Check the boundaries and update if necessary */
- if (node_boundary_start_pfn[nid] < *start_pfn)
- *start_pfn = node_boundary_start_pfn[nid];
- if (node_boundary_end_pfn[nid] > *end_pfn)
- *end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
- unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
- unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
* get_pfn_range_for_nid - Return the start and end page frames for a node
* @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
* @start_pfn: Passed by reference. On return, it will have the node start_pfn.
if (*start_pfn == -1UL)
*start_pfn = 0;
-
- /* Push the node boundaries out if requested */
- account_node_boundary(nid, start_pfn, end_pfn);
}
/*
INIT_LIST_HEAD(&zone->lru[l].list);
zone->lru[l].nr_scan = 0;
}
- zone->recent_rotated[0] = 0;
- zone->recent_rotated[1] = 0;
- zone->recent_scanned[0] = 0;
- zone->recent_scanned[1] = 0;
+ zone->reclaim_stat.recent_rotated[0] = 0;
+ zone->reclaim_stat.recent_rotated[1] = 0;
+ zone->reclaim_stat.recent_scanned[0] = 0;
+ zone->reclaim_stat.recent_scanned[1] = 0;
zap_zone_vm_stats(zone);
zone->flags = 0;
if (!size)
{
memset(early_node_map, 0, sizeof(early_node_map));
nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
- memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
- memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
}
/* Compare two active node_active_regions */
if (_hash_mask)
*_hash_mask = (1 << log2qty) - 1;
- return table;
-}
+ /*
+ * If hashdist is set, the table allocation is done with __vmalloc()
+ * which invokes the kmemleak_alloc() callback. This function may also
+ * be called before the slab and kmemleak are initialised when
+ * kmemleak simply buffers the request to be executed later
+ * (GFP_ATOMIC flag ignored in this case).
+ */
+ if (!hashdist)
+ kmemleak_alloc(table, size, 1, GFP_ATOMIC);
-#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
-struct page *pfn_to_page(unsigned long pfn)
-{
- return __pfn_to_page(pfn);
-}
-unsigned long page_to_pfn(struct page *page)
-{
- return __page_to_pfn(page);
+ return table;
}
-EXPORT_SYMBOL(pfn_to_page);
-EXPORT_SYMBOL(page_to_pfn);
-#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
/* Return a pointer to the bitmap storing bits affecting a block of pages */
static inline unsigned long *get_pageblock_bitmap(struct zone *zone,