#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
#include <linux/kmemleak.h>
+#include <linux/memory.h>
+#include <trace/events/kmem.h>
+#include <linux/ftrace_event.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
-unsigned long highest_memmap_pfn __read_mostly;
int percpu_pagelist_fraction;
+gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
+
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The following functions are used by the suspend/hibernate code to temporarily
+ * change gfp_allowed_mask in order to avoid using I/O during memory allocations
+ * while devices are suspended. To avoid races with the suspend/hibernate code,
+ * they should always be called with pm_mutex held (gfp_allowed_mask also should
+ * only be modified with pm_mutex held, unless the suspend/hibernate code is
+ * guaranteed not to run in parallel with that modification).
+ */
+void set_gfp_allowed_mask(gfp_t mask)
+{
+ WARN_ON(!mutex_is_locked(&pm_mutex));
+ gfp_allowed_mask = mask;
+}
+
+gfp_t clear_gfp_allowed_mask(gfp_t mask)
+{
+ gfp_t ret = gfp_allowed_mask;
+
+ WARN_ON(!mutex_is_locked(&pm_mutex));
+ gfp_allowed_mask &= ~mask;
+ return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
int pageblock_order __read_mostly;
int min_free_kbytes = 1024;
-unsigned long __meminitdata nr_kernel_pages;
-unsigned long __meminitdata nr_all_pages;
+static unsigned long __meminitdata nr_kernel_pages;
+static unsigned long __meminitdata nr_all_pages;
static unsigned long __meminitdata dma_reserve;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
static unsigned long nr_shown;
static unsigned long nr_unshown;
+ /* Don't complain about poisoned pages */
+ if (PageHWPoison(page)) {
+ __ClearPageBuddy(page);
+ return;
+ }
+
/*
* Allow a burst of 60 reports, then keep quiet for that minute;
* or allow a steady drip of one report per second.
printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n",
current->comm, page_to_pfn(page));
- printk(KERN_ALERT
- "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
- page, (void *)page->flags, page_count(page),
- page_mapcount(page), page->mapping, page->index);
+ dump_page(page);
dump_stack();
out:
zone->free_area[order].nr_free++;
}
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
/*
* free_page_mlock() -- clean up attempts to free and mlocked() page.
* Page should not be on lru, so no need to fix that up.
*/
static inline void free_page_mlock(struct page *page)
{
- __ClearPageMlocked(page);
__dec_zone_page_state(page, NR_MLOCK);
__count_vm_event(UNEVICTABLE_MLOCKFREED);
}
-#else
-static void free_page_mlock(struct page *page) { }
-#endif
static inline int free_pages_check(struct page *page)
{
}
/*
- * Frees a list of pages.
+ * Frees a number of pages from the PCP lists
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
-static void free_pages_bulk(struct zone *zone, int count,
- struct list_head *list, int order)
+static void free_pcppages_bulk(struct zone *zone, int count,
+ struct per_cpu_pages *pcp)
{
+ int migratetype = 0;
+ int batch_free = 0;
+
spin_lock(&zone->lock);
- zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+ zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
- __mod_zone_page_state(zone, NR_FREE_PAGES, count << order);
- while (count--) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES, count);
+ while (count) {
struct page *page;
+ struct list_head *list;
- VM_BUG_ON(list_empty(list));
- page = list_entry(list->prev, struct page, lru);
- /* have to delete it as __free_one_page list manipulates */
- list_del(&page->lru);
- __free_one_page(page, zone, order, page_private(page));
+ /*
+ * Remove pages from lists in a round-robin fashion. A
+ * batch_free count is maintained that is incremented when an
+ * empty list is encountered. This is so more pages are freed
+ * off fuller lists instead of spinning excessively around empty
+ * lists
+ */
+ do {
+ batch_free++;
+ if (++migratetype == MIGRATE_PCPTYPES)
+ migratetype = 0;
+ list = &pcp->lists[migratetype];
+ } while (list_empty(list));
+
+ do {
+ page = list_entry(list->prev, struct page, lru);
+ /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+ __free_one_page(page, zone, 0, page_private(page));
+ trace_mm_page_pcpu_drain(page, 0, page_private(page));
+ } while (--count && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
}
int migratetype)
{
spin_lock(&zone->lock);
- zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+ zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
unsigned long flags;
int i;
int bad = 0;
- int clearMlocked = PageMlocked(page);
+ int wasMlocked = __TestClearPageMlocked(page);
+
+ trace_mm_page_free_direct(page, order);
+ kmemcheck_free_shadow(page, order);
for (i = 0 ; i < (1 << order) ; ++i)
bad += free_pages_check(page + i);
kernel_map_pages(page, 1 << order, 0);
local_irq_save(flags);
- if (unlikely(clearMlocked))
+ if (unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, order,
/*
* This page is about to be returned from the page allocator
*/
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static inline int check_new_page(struct page *page)
{
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
bad_page(page);
return 1;
}
+ return 0;
+}
+
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+{
+ int i;
+
+ for (i = 0; i < (1 << order); i++) {
+ struct page *p = page + i;
+ if (unlikely(check_new_page(p)))
+ return 1;
+ }
set_page_private(page, 0);
set_page_refcounted(page);
return move_freepages(zone, start_page, end_page, migratetype);
}
+static void change_pageblock_range(struct page *pageblock_page,
+ int start_order, int migratetype)
+{
+ int nr_pageblocks = 1 << (start_order - pageblock_order);
+
+ while (nr_pageblocks--) {
+ set_pageblock_migratetype(pageblock_page, migratetype);
+ pageblock_page += pageblock_nr_pages;
+ }
+}
+
/* Remove an element from the buddy allocator from the fallback list */
static inline struct page *
__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
* agressive about taking ownership of free pages
*/
if (unlikely(current_order >= (pageblock_order >> 1)) ||
- start_migratetype == MIGRATE_RECLAIMABLE) {
+ start_migratetype == MIGRATE_RECLAIMABLE ||
+ page_group_by_mobility_disabled) {
unsigned long pages;
pages = move_freepages_block(zone, page,
start_migratetype);
/* Claim the whole block if over half of it is free */
- if (pages >= (1 << (pageblock_order-1)))
+ if (pages >= (1 << (pageblock_order-1)) ||
+ page_group_by_mobility_disabled)
set_pageblock_migratetype(page,
start_migratetype);
list_del(&page->lru);
rmv_page_order(page);
- if (current_order == pageblock_order)
- set_pageblock_migratetype(page,
+ /* Take ownership for orders >= pageblock_order */
+ if (current_order >= pageblock_order)
+ change_pageblock_range(page, current_order,
start_migratetype);
expand(zone, page, order, current_order, area, migratetype);
+
+ trace_mm_page_alloc_extfrag(page, order, current_order,
+ start_migratetype, migratetype);
+
return page;
}
}
}
}
+ trace_mm_page_alloc_zone_locked(page, order, migratetype);
return page;
}
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
- int migratetype)
+ int migratetype, int cold)
{
int i;
* merge IO requests if the physical pages are ordered
* properly.
*/
- list_add(&page->lru, list);
+ if (likely(cold == 0))
+ list_add(&page->lru, list);
+ else
+ list_add_tail(&page->lru, list);
set_page_private(page, migratetype);
list = &page->lru;
}
to_drain = pcp->batch;
else
to_drain = pcp->count;
- free_pages_bulk(zone, to_drain, &pcp->list, 0);
+ free_pcppages_bulk(zone, to_drain, pcp);
pcp->count -= to_drain;
local_irq_restore(flags);
}
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
- pset = zone_pcp(zone, cpu);
+ local_irq_save(flags);
+ pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- local_irq_save(flags);
- free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+ free_pcppages_bulk(zone, pcp->count, pcp);
pcp->count = 0;
local_irq_restore(flags);
}
/*
* Free a 0-order page
+ * cold == 1 ? free a cold page : free a hot page
*/
-static void free_hot_cold_page(struct page *page, int cold)
+void free_hot_cold_page(struct page *page, int cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
unsigned long flags;
- int clearMlocked = PageMlocked(page);
+ int migratetype;
+ int wasMlocked = __TestClearPageMlocked(page);
+
+ trace_mm_page_free_direct(page, 0);
+ kmemcheck_free_shadow(page, 0);
if (PageAnon(page))
page->mapping = NULL;
arch_free_page(page, 0);
kernel_map_pages(page, 1, 0);
- pcp = &zone_pcp(zone, get_cpu())->pcp;
- set_page_private(page, get_pageblock_migratetype(page));
+ migratetype = get_pageblock_migratetype(page);
+ set_page_private(page, migratetype);
local_irq_save(flags);
- if (unlikely(clearMlocked))
+ if (unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_event(PGFREE);
+ /*
+ * We only track unmovable, reclaimable and movable on pcp lists.
+ * Free ISOLATE pages back to the allocator because they are being
+ * offlined but treat RESERVE as movable pages so we can get those
+ * areas back if necessary. Otherwise, we may have to free
+ * excessively into the page allocator
+ */
+ if (migratetype >= MIGRATE_PCPTYPES) {
+ if (unlikely(migratetype == MIGRATE_ISOLATE)) {
+ free_one_page(zone, page, 0, migratetype);
+ goto out;
+ }
+ migratetype = MIGRATE_MOVABLE;
+ }
+
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (cold)
- list_add_tail(&page->lru, &pcp->list);
+ list_add_tail(&page->lru, &pcp->lists[migratetype]);
else
- list_add(&page->lru, &pcp->list);
+ list_add(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {
- free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+ free_pcppages_bulk(zone, pcp->batch, pcp);
pcp->count -= pcp->batch;
}
- local_irq_restore(flags);
- put_cpu();
-}
-void free_hot_page(struct page *page)
-{
- free_hot_cold_page(page, 0);
-}
-
-void free_cold_page(struct page *page)
-{
- free_hot_cold_page(page, 1);
+out:
+ local_irq_restore(flags);
}
/*
VM_BUG_ON(PageCompound(page));
VM_BUG_ON(!page_count(page));
+
+#ifdef CONFIG_KMEMCHECK
+ /*
+ * Split shadow pages too, because free(page[0]) would
+ * otherwise free the whole shadow.
+ */
+ if (kmemcheck_page_is_tracked(page))
+ split_page(virt_to_page(page[0].shadow), order);
+#endif
+
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
}
unsigned long flags;
struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);
- int cpu;
again:
- cpu = get_cpu();
if (likely(order == 0)) {
struct per_cpu_pages *pcp;
+ struct list_head *list;
- pcp = &zone_pcp(zone, cpu)->pcp;
local_irq_save(flags);
- if (!pcp->count) {
- pcp->count = rmqueue_bulk(zone, 0,
- pcp->batch, &pcp->list, migratetype);
- if (unlikely(!pcp->count))
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ list = &pcp->lists[migratetype];
+ if (list_empty(list)) {
+ pcp->count += rmqueue_bulk(zone, 0,
+ pcp->batch, list,
+ migratetype, cold);
+ if (unlikely(list_empty(list)))
goto failed;
}
- /* Find a page of the appropriate migrate type */
- if (cold) {
- list_for_each_entry_reverse(page, &pcp->list, lru)
- if (page_private(page) == migratetype)
- break;
- } else {
- list_for_each_entry(page, &pcp->list, lru)
- if (page_private(page) == migratetype)
- break;
- }
-
- /* Allocate more to the pcp list if necessary */
- if (unlikely(&page->lru == &pcp->list)) {
- pcp->count += rmqueue_bulk(zone, 0,
- pcp->batch, &pcp->list, migratetype);
- page = list_entry(pcp->list.next, struct page, lru);
- }
+ if (cold)
+ page = list_entry(list->prev, struct page, lru);
+ else
+ page = list_entry(list->next, struct page, lru);
list_del(&page->lru);
pcp->count--;
* properly detect and handle allocation failures.
*
* We most definitely don't want callers attempting to
- * allocate greater than single-page units with
+ * allocate greater than order-1 page units with
* __GFP_NOFAIL.
*/
- WARN_ON_ONCE(order > 0);
+ WARN_ON_ONCE(order > 1);
}
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order, migratetype);
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
spin_unlock(&zone->lock);
if (!page)
goto failed;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
}
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone);
local_irq_restore(flags);
- put_cpu();
VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
failed:
local_irq_restore(flags);
- put_cpu();
return NULL;
}
BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
unsigned long mark;
+ int ret;
+
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
- if (!zone_watermark_ok(zone, order, mark,
- classzone_idx, alloc_flags)) {
- if (!zone_reclaim_mode ||
- !zone_reclaim(zone, gfp_mask, order))
+ if (zone_watermark_ok(zone, order, mark,
+ classzone_idx, alloc_flags))
+ goto try_this_zone;
+
+ if (zone_reclaim_mode == 0)
+ goto this_zone_full;
+
+ ret = zone_reclaim(zone, gfp_mask, order);
+ switch (ret) {
+ case ZONE_RECLAIM_NOSCAN:
+ /* did not scan */
+ goto try_next_zone;
+ case ZONE_RECLAIM_FULL:
+ /* scanned but unreclaimable */
+ goto this_zone_full;
+ default:
+ /* did we reclaim enough */
+ if (!zone_watermark_ok(zone, order, mark,
+ classzone_idx, alloc_flags))
goto this_zone_full;
}
}
+try_this_zone:
page = buffered_rmqueue(preferred_zone, zone, order,
gfp_mask, migratetype);
if (page)
if (page)
goto out;
- /* The OOM killer will not help higher order allocs */
- if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL))
- goto out;
-
+ if (!(gfp_mask & __GFP_NOFAIL)) {
+ /* The OOM killer will not help higher order allocs */
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
+ goto out;
+ /*
+ * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
+ * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
+ * The caller should handle page allocation failure by itself if
+ * it specifies __GFP_THISNODE.
+ * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
+ */
+ if (gfp_mask & __GFP_THISNODE)
+ goto out;
+ }
/* Exhausted what can be done so it's blamo time */
- out_of_memory(zonelist, gfp_mask, order);
+ out_of_memory(zonelist, gfp_mask, order, nodemask);
out:
clear_zonelist_oom(zonelist, gfp_mask);
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
-
- /*
- * The task's cpuset might have expanded its set of allowable nodes
- */
p->flags |= PF_MEMALLOC;
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
preferred_zone, migratetype);
if (!page && gfp_mask & __GFP_NOFAIL)
- congestion_wait(WRITE, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (!page && (gfp_mask & __GFP_NOFAIL));
return page;
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
alloc_flags &= ~ALLOC_CPUSET;
- } else if (unlikely(rt_task(p)))
+ } else if (unlikely(rt_task(p)) && !in_interrupt())
alloc_flags |= ALLOC_HARDER;
if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
* be using allocators in order of preference for an area that is
* too large.
*/
- if (WARN_ON_ONCE(order >= MAX_ORDER))
+ if (order >= MAX_ORDER) {
+ WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
return NULL;
+ }
/*
* GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
goto nopage;
+restart:
wake_all_kswapd(order, zonelist, high_zoneidx);
/*
*/
alloc_flags = gfp_to_alloc_flags(gfp_mask);
-restart:
/* This is the last chance, in general, before the goto nopage. */
page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
if (p->flags & PF_MEMALLOC)
goto nopage;
+ /* Avoid allocations with no watermarks from looping endlessly */
+ if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
+ goto nopage;
+
/* Try direct reclaim and then allocating */
page = __alloc_pages_direct_reclaim(gfp_mask, order,
zonelist, high_zoneidx,
pages_reclaimed += did_some_progress;
if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
/* Wait for some write requests to complete then retry */
- congestion_wait(WRITE, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
goto rebalance;
}
dump_stack();
show_mem();
}
+ return page;
got_pg:
+ if (kmemcheck_enabled)
+ kmemcheck_pagealloc_alloc(page, order, gfp_mask);
return page;
}
struct page *page;
int migratetype = allocflags_to_migratetype(gfp_mask);
+ gfp_mask &= gfp_allowed_mask;
+
lockdep_trace_alloc(gfp_mask);
might_sleep_if(gfp_mask & __GFP_WAIT);
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
+ trace_mm_page_alloc(page, order, gfp_mask, migratetype);
return page;
}
EXPORT_SYMBOL(__alloc_pages_nodemask);
*/
unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
{
- struct page * page;
+ struct page *page;
+
+ /*
+ * __get_free_pages() returns a 32-bit address, which cannot represent
+ * a highmem page
+ */
+ VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
+
page = alloc_pages(gfp_mask, order);
if (!page)
return 0;
return (unsigned long) page_address(page);
}
-
EXPORT_SYMBOL(__get_free_pages);
unsigned long get_zeroed_page(gfp_t gfp_mask)
{
- struct page * page;
-
- /*
- * get_zeroed_page() returns a 32-bit address, which cannot represent
- * a highmem page
- */
- VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
-
- page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
- if (page)
- return (unsigned long) page_address(page);
- return 0;
+ return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
}
-
EXPORT_SYMBOL(get_zeroed_page);
void __pagevec_free(struct pagevec *pvec)
{
int i = pagevec_count(pvec);
- while (--i >= 0)
+ while (--i >= 0) {
+ trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
free_hot_cold_page(pvec->pages[i], pvec->cold);
+ }
}
void __free_pages(struct page *page, unsigned int order)
{
if (put_page_testzero(page)) {
if (order == 0)
- free_hot_page(page);
+ free_hot_cold_page(page, 0);
else
__free_pages_ok(page, order);
}
unsigned long alloc_end = addr + (PAGE_SIZE << order);
unsigned long used = addr + PAGE_ALIGN(size);
- split_page(virt_to_page(addr), order);
+ split_page(virt_to_page((void *)addr), order);
while (used < alloc_end) {
free_page(used);
used += PAGE_SIZE;
for_each_online_cpu(cpu) {
struct per_cpu_pageset *pageset;
- pageset = zone_pcp(zone, cpu);
+ pageset = per_cpu_ptr(zone->pageset, cpu);
printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
cpu, pageset->pcp.high,
}
}
- printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
- " inactive_file:%lu"
+ printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+ " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu"
" dirty:%lu writeback:%lu unstable:%lu\n"
- " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
+ " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
+ " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
global_page_state(NR_ACTIVE_ANON),
- global_page_state(NR_ACTIVE_FILE),
global_page_state(NR_INACTIVE_ANON),
+ global_page_state(NR_ISOLATED_ANON),
+ global_page_state(NR_ACTIVE_FILE),
global_page_state(NR_INACTIVE_FILE),
+ global_page_state(NR_ISOLATED_FILE),
global_page_state(NR_UNEVICTABLE),
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
global_page_state(NR_FREE_PAGES),
- global_page_state(NR_SLAB_RECLAIMABLE) +
- global_page_state(NR_SLAB_UNRECLAIMABLE),
+ global_page_state(NR_SLAB_RECLAIMABLE),
+ global_page_state(NR_SLAB_UNRECLAIMABLE),
global_page_state(NR_FILE_MAPPED),
+ global_page_state(NR_SHMEM),
global_page_state(NR_PAGETABLE),
global_page_state(NR_BOUNCE));
" active_file:%lukB"
" inactive_file:%lukB"
" unevictable:%lukB"
+ " isolated(anon):%lukB"
+ " isolated(file):%lukB"
" present:%lukB"
+ " mlocked:%lukB"
+ " dirty:%lukB"
+ " writeback:%lukB"
+ " mapped:%lukB"
+ " shmem:%lukB"
+ " slab_reclaimable:%lukB"
+ " slab_unreclaimable:%lukB"
+ " kernel_stack:%lukB"
+ " pagetables:%lukB"
+ " unstable:%lukB"
+ " bounce:%lukB"
+ " writeback_tmp:%lukB"
" pages_scanned:%lu"
" all_unreclaimable? %s"
"\n",
K(zone_page_state(zone, NR_ACTIVE_FILE)),
K(zone_page_state(zone, NR_INACTIVE_FILE)),
K(zone_page_state(zone, NR_UNEVICTABLE)),
+ K(zone_page_state(zone, NR_ISOLATED_ANON)),
+ K(zone_page_state(zone, NR_ISOLATED_FILE)),
K(zone->present_pages),
+ K(zone_page_state(zone, NR_MLOCK)),
+ K(zone_page_state(zone, NR_FILE_DIRTY)),
+ K(zone_page_state(zone, NR_WRITEBACK)),
+ K(zone_page_state(zone, NR_FILE_MAPPED)),
+ K(zone_page_state(zone, NR_SHMEM)),
+ K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
+ K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
+ zone_page_state(zone, NR_KERNEL_STACK) *
+ THREAD_SIZE / 1024,
+ K(zone_page_state(zone, NR_PAGETABLE)),
+ K(zone_page_state(zone, NR_UNSTABLE_NFS)),
+ K(zone_page_state(zone, NR_BOUNCE)),
+ K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
zone->pages_scanned,
- (zone_is_all_unreclaimable(zone) ? "yes" : "no")
+ (zone->all_unreclaimable ? "yes" : "no")
);
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
* sysctl handler for numa_zonelist_order
*/
int numa_zonelist_order_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length,
+ void __user *buffer, size_t *length,
loff_t *ppos)
{
char saved_string[NUMA_ZONELIST_ORDER_LEN];
int ret;
+ static DEFINE_MUTEX(zl_order_mutex);
+ mutex_lock(&zl_order_mutex);
if (write)
- strncpy(saved_string, (char*)table->data,
- NUMA_ZONELIST_ORDER_LEN);
- ret = proc_dostring(table, write, file, buffer, length, ppos);
+ strcpy(saved_string, (char*)table->data);
+ ret = proc_dostring(table, write, buffer, length, ppos);
if (ret)
- return ret;
+ goto out;
if (write) {
int oldval = user_zonelist_order;
if (__parse_numa_zonelist_order((char*)table->data)) {
} else if (oldval != user_zonelist_order)
build_all_zonelists();
}
- return 0;
+out:
+ mutex_unlock(&zl_order_mutex);
+ return ret;
}
prev_node = local_node;
nodes_clear(used_mask);
- memset(node_load, 0, sizeof(node_load));
memset(node_order, 0, sizeof(node_order));
j = 0;
#endif /* CONFIG_NUMA */
+/*
+ * Boot pageset table. One per cpu which is going to be used for all
+ * zones and all nodes. The parameters will be set in such a way
+ * that an item put on a list will immediately be handed over to
+ * the buddy list. This is safe since pageset manipulation is done
+ * with interrupts disabled.
+ *
+ * The boot_pagesets must be kept even after bootup is complete for
+ * unused processors and/or zones. They do play a role for bootstrapping
+ * hotplugged processors.
+ *
+ * zoneinfo_show() and maybe other functions do
+ * not check if the processor is online before following the pageset pointer.
+ * Other parts of the kernel may not check if the zone is available.
+ */
+static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
+static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
+
/* return values int ....just for stop_machine() */
static int __build_all_zonelists(void *dummy)
{
int nid;
+ int cpu;
+#ifdef CONFIG_NUMA
+ memset(node_load, 0, sizeof(node_load));
+#endif
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
build_zonelists(pgdat);
build_zonelist_cache(pgdat);
}
+
+ /*
+ * Initialize the boot_pagesets that are going to be used
+ * for bootstrapping processors. The real pagesets for
+ * each zone will be allocated later when the per cpu
+ * allocator is available.
+ *
+ * boot_pagesets are used also for bootstrapping offline
+ * cpus if the system is already booted because the pagesets
+ * are needed to initialize allocators on a specific cpu too.
+ * F.e. the percpu allocator needs the page allocator which
+ * needs the percpu allocator in order to allocate its pagesets
+ * (a chicken-egg dilemma).
+ */
+ for_each_possible_cpu(cpu)
+ setup_pageset(&per_cpu(boot_pageset, cpu), 0);
+
return 0;
}
{
unsigned long start_pfn, pfn, end_pfn;
struct page *page;
- unsigned long reserve, block_migratetype;
+ unsigned long block_migratetype;
+ int reserve;
/* Get the start pfn, end pfn and the number of blocks to reserve */
start_pfn = zone->zone_start_pfn;
reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
pageblock_order;
+ /*
+ * Reserve blocks are generally in place to help high-order atomic
+ * allocations that are short-lived. A min_free_kbytes value that
+ * would result in more than 2 reserve blocks for atomic allocations
+ * is assumed to be in place to help anti-fragmentation for the
+ * future allocation of hugepages at runtime.
+ */
+ reserve = min(2, reserve);
+
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
if (!pfn_valid(pfn))
continue;
static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
{
struct per_cpu_pages *pcp;
+ int migratetype;
memset(p, 0, sizeof(*p));
pcp->count = 0;
pcp->high = 6 * batch;
pcp->batch = max(1UL, 1 * batch);
- INIT_LIST_HEAD(&pcp->list);
+ for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
+ INIT_LIST_HEAD(&pcp->lists[migratetype]);
}
/*
pcp->batch = PAGE_SHIFT * 8;
}
-
-#ifdef CONFIG_NUMA
-/*
- * Boot pageset table. One per cpu which is going to be used for all
- * zones and all nodes. The parameters will be set in such a way
- * that an item put on a list will immediately be handed over to
- * the buddy list. This is safe since pageset manipulation is done
- * with interrupts disabled.
- *
- * Some NUMA counter updates may also be caught by the boot pagesets.
- *
- * The boot_pagesets must be kept even after bootup is complete for
- * unused processors and/or zones. They do play a role for bootstrapping
- * hotplugged processors.
- *
- * zoneinfo_show() and maybe other functions do
- * not check if the processor is online before following the pageset pointer.
- * Other parts of the kernel may not check if the zone is available.
- */
-static struct per_cpu_pageset boot_pageset[NR_CPUS];
-
/*
- * Dynamically allocate memory for the
- * per cpu pageset array in struct zone.
+ * Allocate per cpu pagesets and initialize them.
+ * Before this call only boot pagesets were available.
+ * Boot pagesets will no longer be used by this processorr
+ * after setup_per_cpu_pageset().
*/
-static int __cpuinit process_zones(int cpu)
+void __init setup_per_cpu_pageset(void)
{
- struct zone *zone, *dzone;
- int node = cpu_to_node(cpu);
-
- node_set_state(node, N_CPU); /* this node has a cpu */
+ struct zone *zone;
+ int cpu;
for_each_populated_zone(zone) {
- zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
- GFP_KERNEL, node);
- if (!zone_pcp(zone, cpu))
- goto bad;
-
- setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
-
- if (percpu_pagelist_fraction)
- setup_pagelist_highmark(zone_pcp(zone, cpu),
- (zone->present_pages / percpu_pagelist_fraction));
- }
-
- return 0;
-bad:
- for_each_zone(dzone) {
- if (!populated_zone(dzone))
- continue;
- if (dzone == zone)
- break;
- kfree(zone_pcp(dzone, cpu));
- zone_pcp(dzone, cpu) = NULL;
- }
- return -ENOMEM;
-}
+ zone->pageset = alloc_percpu(struct per_cpu_pageset);
-static inline void free_zone_pagesets(int cpu)
-{
- struct zone *zone;
-
- for_each_zone(zone) {
- struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
- /* Free per_cpu_pageset if it is slab allocated */
- if (pset != &boot_pageset[cpu])
- kfree(pset);
- zone_pcp(zone, cpu) = NULL;
- }
-}
+ setup_pageset(pcp, zone_batchsize(zone));
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- int cpu = (long)hcpu;
- int ret = NOTIFY_OK;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- if (process_zones(cpu))
- ret = NOTIFY_BAD;
- break;
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- free_zone_pagesets(cpu);
- break;
- default:
- break;
+ if (percpu_pagelist_fraction)
+ setup_pagelist_highmark(pcp,
+ (zone->present_pages /
+ percpu_pagelist_fraction));
+ }
}
- return ret;
-}
-
-static struct notifier_block __cpuinitdata pageset_notifier =
- { &pageset_cpuup_callback, NULL, 0 };
-
-void __init setup_per_cpu_pageset(void)
-{
- int err;
-
- /* Initialize per_cpu_pageset for cpu 0.
- * A cpuup callback will do this for every cpu
- * as it comes online
- */
- err = process_zones(smp_processor_id());
- BUG_ON(err);
- register_cpu_notifier(&pageset_notifier);
}
-#endif
-
static noinline __init_refok
int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
{
return 0;
}
-static __meminit void zone_pcp_init(struct zone *zone)
+static int __zone_pcp_update(void *data)
{
+ struct zone *zone = data;
int cpu;
- unsigned long batch = zone_batchsize(zone);
+ unsigned long batch = zone_batchsize(zone), flags;
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_NUMA
- /* Early boot. Slab allocator not functional yet */
- zone_pcp(zone, cpu) = &boot_pageset[cpu];
- setup_pageset(&boot_pageset[cpu],0);
-#else
- setup_pageset(zone_pcp(zone,cpu), batch);
-#endif
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
+
+ pset = per_cpu_ptr(zone->pageset, cpu);
+ pcp = &pset->pcp;
+
+ local_irq_save(flags);
+ free_pcppages_bulk(zone, pcp->count, pcp);
+ setup_pageset(pset, batch);
+ local_irq_restore(flags);
}
+ return 0;
+}
+
+void zone_pcp_update(struct zone *zone)
+{
+ stop_machine(__zone_pcp_update, zone, NULL);
+}
+
+static __meminit void zone_pcp_init(struct zone *zone)
+{
+ /*
+ * per cpu subsystem is not up at this point. The following code
+ * relies on the ability of the linker to provide the
+ * offset of a (static) per cpu variable into the per cpu area.
+ */
+ zone->pageset = &boot_pageset;
+
if (zone->present_pages)
- printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
- zone->name, zone->present_pages, batch);
+ printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
+ zone->name, zone->present_pages,
+ zone_batchsize(zone));
}
__meminit int init_currently_empty_zone(struct zone *zone,
}
}
+int __init add_from_early_node_map(struct range *range, int az,
+ int nr_range, int nid)
+{
+ int i;
+ u64 start, end;
+
+ /* need to go over early_node_map to find out good range for node */
+ for_each_active_range_index_in_nid(i, nid) {
+ start = early_node_map[i].start_pfn;
+ end = early_node_map[i].end_pfn;
+ nr_range = add_range(range, az, nr_range, start, end);
+ }
+ return nr_range;
+}
+
+#ifdef CONFIG_NO_BOOTMEM
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+ u64 goal, u64 limit)
+{
+ int i;
+ void *ptr;
+
+ /* need to go over early_node_map to find out good range for node */
+ for_each_active_range_index_in_nid(i, nid) {
+ u64 addr;
+ u64 ei_start, ei_last;
+
+ ei_last = early_node_map[i].end_pfn;
+ ei_last <<= PAGE_SHIFT;
+ ei_start = early_node_map[i].start_pfn;
+ ei_start <<= PAGE_SHIFT;
+ addr = find_early_area(ei_start, ei_last,
+ goal, limit, size, align);
+
+ if (addr == -1ULL)
+ continue;
+
+#if 0
+ printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
+ nid,
+ ei_start, ei_last, goal, limit, size,
+ align, addr);
+#endif
+
+ ptr = phys_to_virt(addr);
+ memset(ptr, 0, size);
+ reserve_early_without_check(addr, addr + size, "BOOTMEM");
+ return ptr;
+ }
+
+ return NULL;
+}
+#endif
+
+
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
int i;
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
*/
-static unsigned long __meminit __absent_pages_in_range(int nid,
+unsigned long __meminit __absent_pages_in_range(int nid,
unsigned long range_start_pfn,
unsigned long range_end_pfn)
{
zone_pcp_init(zone);
for_each_lru(l) {
INIT_LIST_HEAD(&zone->lru[l].list);
- zone->lru[l].nr_saved_scan = 0;
+ zone->reclaim_stat.nr_saved_scan[l] = 0;
}
zone->reclaim_stat.recent_rotated[0] = 0;
zone->reclaim_stat.recent_rotated[1] = 0;
}
/* Merge backward if suitable */
- if (start_pfn < early_node_map[i].end_pfn &&
+ if (start_pfn < early_node_map[i].start_pfn &&
end_pfn >= early_node_map[i].start_pfn) {
early_node_map[i].start_pfn = start_pfn;
return;
}
/* sort the node_map by start_pfn */
-static void __init sort_node_map(void)
+void __init sort_node_map(void)
{
sort(early_node_map, (size_t)nr_nodemap_entries,
sizeof(struct node_active_region),
int i, nid;
unsigned long usable_startpfn;
unsigned long kernelcore_node, kernelcore_remaining;
+ /* save the state before borrow the nodemask */
+ nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
unsigned long totalpages = early_calculate_totalpages();
int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
/* If kernelcore was not specified, there is no ZONE_MOVABLE */
if (!required_kernelcore)
- return;
+ goto out;
/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
find_usable_zone_for_movable();
for (nid = 0; nid < MAX_NUMNODES; nid++)
zone_movable_pfn[nid] =
roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
+
+out:
+ /* restore the node_state */
+ node_states[N_HIGH_MEMORY] = saved_node_state;
}
/* Any regular memory on that node ? */
for (i = 0; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
- printk(" %-8s %0#10lx -> %0#10lx\n",
- zone_names[i],
+ printk(" %-8s ", zone_names[i]);
+ if (arch_zone_lowest_possible_pfn[i] ==
+ arch_zone_highest_possible_pfn[i])
+ printk("empty\n");
+ else
+ printk("%0#10lx -> %0#10lx\n",
arch_zone_lowest_possible_pfn[i],
arch_zone_highest_possible_pfn[i]);
}
early_node_map[i].start_pfn,
early_node_map[i].end_pfn);
- /*
- * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init
- * that node_mask, clear it at first
- */
- nodes_clear(node_states[N_HIGH_MEMORY]);
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] };
+struct pglist_data __refdata contig_page_data = {
+#ifndef CONFIG_NO_BOOTMEM
+ .bdata = &bootmem_node_data[0]
+#endif
+ };
EXPORT_SYMBOL(contig_page_data);
#endif
calculate_totalreserve_pages();
}
-/**
+/*
* The inactive anon list should be small enough that the VM never has to
* do too much work, but large enough that each inactive page has a chance
* to be referenced again before it is swapped out.
* changes.
*/
int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (write)
setup_per_zone_wmarks();
return 0;
#ifdef CONFIG_NUMA
int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
int rc;
- rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
}
int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
int rc;
- rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
* if in function of the boot time zone sizes.
*/
int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ proc_dointvec_minmax(table, write, buffer, length, ppos);
setup_per_zone_lowmem_reserve();
return 0;
}
*/
int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
- struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
unsigned int cpu;
int ret;
- ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (!write || (ret == -EINVAL))
return ret;
- for_each_zone(zone) {
- for_each_online_cpu(cpu) {
+ for_each_populated_zone(zone) {
+ for_each_possible_cpu(cpu) {
unsigned long high;
high = zone->present_pages / percpu_pagelist_fraction;
- setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+ setup_pagelist_highmark(
+ per_cpu_ptr(zone->pageset, cpu), high);
}
}
return 0;
numentries <<= (PAGE_SHIFT - scale);
/* Make sure we've got at least a 0-order allocation.. */
- if (unlikely((numentries * bucketsize) < PAGE_SIZE))
+ if (unlikely(flags & HASH_SMALL)) {
+ /* Makes no sense without HASH_EARLY */
+ WARN_ON(!(flags & HASH_EARLY));
+ if (!(numentries >> *_hash_shift)) {
+ numentries = 1UL << *_hash_shift;
+ BUG_ON(!numentries);
+ }
+ } else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
numentries = PAGE_SIZE / bucketsize;
}
numentries = roundup_pow_of_two(numentries);
* some pages at the end of hash table which
* alloc_pages_exact() automatically does
*/
- if (get_order(size) < MAX_ORDER)
+ if (get_order(size) < MAX_ORDER) {
table = alloc_pages_exact(size, GFP_ATOMIC);
+ kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+ }
}
} while (!table && size > PAGE_SIZE && --log2qty);
if (_hash_mask)
*_hash_mask = (1 << log2qty) - 1;
- /*
- * If hashdist is set, the table allocation is done with __vmalloc()
- * which invokes the kmemleak_alloc() callback. This function may also
- * be called before the slab and kmemleak are initialised when
- * kmemleak simply buffers the request to be executed later
- * (GFP_ATOMIC flag ignored in this case).
- */
- if (!hashdist)
- kmemleak_alloc(table, size, 1, GFP_ATOMIC);
-
return table;
}
int set_migratetype_isolate(struct page *page)
{
struct zone *zone;
- unsigned long flags;
+ struct page *curr_page;
+ unsigned long flags, pfn, iter;
+ unsigned long immobile = 0;
+ struct memory_isolate_notify arg;
+ int notifier_ret;
int ret = -EBUSY;
+ int zone_idx;
zone = page_zone(page);
+ zone_idx = zone_idx(zone);
+
spin_lock_irqsave(&zone->lock, flags);
+ if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+ zone_idx == ZONE_MOVABLE) {
+ ret = 0;
+ goto out;
+ }
+
+ pfn = page_to_pfn(page);
+ arg.start_pfn = pfn;
+ arg.nr_pages = pageblock_nr_pages;
+ arg.pages_found = 0;
+
/*
- * In future, more migrate types will be able to be isolation target.
+ * It may be possible to isolate a pageblock even if the
+ * migratetype is not MIGRATE_MOVABLE. The memory isolation
+ * notifier chain is used by balloon drivers to return the
+ * number of pages in a range that are held by the balloon
+ * driver to shrink memory. If all the pages are accounted for
+ * by balloons, are free, or on the LRU, isolation can continue.
+ * Later, for example, when memory hotplug notifier runs, these
+ * pages reported as "can be isolated" should be isolated(freed)
+ * by the balloon driver through the memory notifier chain.
*/
- if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
+ notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+ notifier_ret = notifier_to_errno(notifier_ret);
+ if (notifier_ret || !arg.pages_found)
goto out;
- set_pageblock_migratetype(page, MIGRATE_ISOLATE);
- move_freepages_block(zone, page, MIGRATE_ISOLATE);
- ret = 0;
+
+ for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+ if (!pfn_valid_within(pfn))
+ continue;
+
+ curr_page = pfn_to_page(iter);
+ if (!page_count(curr_page) || PageLRU(curr_page))
+ continue;
+
+ immobile++;
+ }
+
+ if (arg.pages_found == immobile)
+ ret = 0;
+
out:
+ if (!ret) {
+ set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+ move_freepages_block(zone, page, MIGRATE_ISOLATE);
+ }
+
spin_unlock_irqrestore(&zone->lock, flags);
if (!ret)
drain_all_pages();
spin_unlock_irqrestore(&zone->lock, flags);
}
#endif
+
+#ifdef CONFIG_MEMORY_FAILURE
+bool is_free_buddy_page(struct page *page)
+{
+ struct zone *zone = page_zone(page);
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long flags;
+ int order;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ for (order = 0; order < MAX_ORDER; order++) {
+ struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+ if (PageBuddy(page_head) && page_order(page_head) >= order)
+ break;
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+
+ return order < MAX_ORDER;
+}
+#endif
+
+static struct trace_print_flags pageflag_names[] = {
+ {1UL << PG_locked, "locked" },
+ {1UL << PG_error, "error" },
+ {1UL << PG_referenced, "referenced" },
+ {1UL << PG_uptodate, "uptodate" },
+ {1UL << PG_dirty, "dirty" },
+ {1UL << PG_lru, "lru" },
+ {1UL << PG_active, "active" },
+ {1UL << PG_slab, "slab" },
+ {1UL << PG_owner_priv_1, "owner_priv_1" },
+ {1UL << PG_arch_1, "arch_1" },
+ {1UL << PG_reserved, "reserved" },
+ {1UL << PG_private, "private" },
+ {1UL << PG_private_2, "private_2" },
+ {1UL << PG_writeback, "writeback" },
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ {1UL << PG_head, "head" },
+ {1UL << PG_tail, "tail" },
+#else
+ {1UL << PG_compound, "compound" },
+#endif
+ {1UL << PG_swapcache, "swapcache" },
+ {1UL << PG_mappedtodisk, "mappedtodisk" },
+ {1UL << PG_reclaim, "reclaim" },
+ {1UL << PG_buddy, "buddy" },
+ {1UL << PG_swapbacked, "swapbacked" },
+ {1UL << PG_unevictable, "unevictable" },
+#ifdef CONFIG_MMU
+ {1UL << PG_mlocked, "mlocked" },
+#endif
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
+ {1UL << PG_uncached, "uncached" },
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+ {1UL << PG_hwpoison, "hwpoison" },
+#endif
+ {-1UL, NULL },
+};
+
+static void dump_page_flags(unsigned long flags)
+{
+ const char *delim = "";
+ unsigned long mask;
+ int i;
+
+ printk(KERN_ALERT "page flags: %#lx(", flags);
+
+ /* remove zone id */
+ flags &= (1UL << NR_PAGEFLAGS) - 1;
+
+ for (i = 0; pageflag_names[i].name && flags; i++) {
+
+ mask = pageflag_names[i].mask;
+ if ((flags & mask) != mask)
+ continue;
+
+ flags &= ~mask;
+ printk("%s%s", delim, pageflag_names[i].name);
+ delim = "|";
+ }
+
+ /* check for left over flags */
+ if (flags)
+ printk("%s%#lx", delim, flags);
+
+ printk(")\n");
+}
+
+void dump_page(struct page *page)
+{
+ printk(KERN_ALERT
+ "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
+ page, page_count(page), page_mapcount(page),
+ page->mapping, page->index);
+ dump_page_flags(page->flags);
+}