badpage: replace page_remove_rmap Eeek and BUG
[safe/jmp/linux-2.6] / mm / page_alloc.c
index 4125230..3acb216 100644 (file)
@@ -44,7 +44,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
-#include <linux/memcontrol.h>
+#include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 
 #include <asm/tlbflush.h>
@@ -69,7 +69,6 @@ EXPORT_SYMBOL(node_states);
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
-long nr_swap_pages;
 int percpu_pagelist_fraction;
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -223,24 +222,18 @@ static inline int bad_range(struct zone *zone, struct page *page)
 
 static void bad_page(struct page *page)
 {
-       void *pc = page_get_page_cgroup(page);
+       printk(KERN_EMERG "Bad page state in process %s  pfn:%05lx\n",
+               current->comm, page_to_pfn(page));
+       printk(KERN_EMERG
+               "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
+               page, (void *)page->flags, page_count(page),
+               page_mapcount(page), page->mapping, page->index);
+       printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n");
 
-       printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
-               "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-               current->comm, page, (int)(2*sizeof(unsigned long)),
-               (unsigned long)page->flags, page->mapping,
-               page_mapcount(page), page_count(page));
-       if (pc) {
-               printk(KERN_EMERG "cgroup:%p\n", pc);
-               page_reset_bad_cgroup(page);
-       }
-       printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
-               KERN_EMERG "Backtrace:\n");
        dump_stack();
-       page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD;
-       set_page_count(page, 0);
-       reset_page_mapcount(page);
-       page->mapping = NULL;
+
+       /* Leave bad fields for debug, except PageBuddy could make trouble */
+       __ClearPageBuddy(page);
        add_taint(TAINT_BAD_PAGE);
 }
 
@@ -268,40 +261,60 @@ void prep_compound_page(struct page *page, unsigned long order)
 {
        int i;
        int nr_pages = 1 << order;
-       struct page *p = page + 1;
 
        set_compound_page_dtor(page, free_compound_page);
        set_compound_order(page, order);
        __SetPageHead(page);
-       for (i = 1; i < nr_pages; i++, p++) {
-               if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
-                       p = pfn_to_page(page_to_pfn(page) + i);
+       for (i = 1; i < nr_pages; i++) {
+               struct page *p = page + i;
+
                __SetPageTail(p);
                p->first_page = page;
        }
 }
 
-static void destroy_compound_page(struct page *page, unsigned long order)
+#ifdef CONFIG_HUGETLBFS
+void prep_compound_gigantic_page(struct page *page, unsigned long order)
 {
        int i;
        int nr_pages = 1 << order;
        struct page *p = page + 1;
 
-       if (unlikely(compound_order(page) != order))
+       set_compound_page_dtor(page, free_compound_page);
+       set_compound_order(page, order);
+       __SetPageHead(page);
+       for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+               __SetPageTail(p);
+               p->first_page = page;
+       }
+}
+#endif
+
+static int destroy_compound_page(struct page *page, unsigned long order)
+{
+       int i;
+       int nr_pages = 1 << order;
+       int bad = 0;
+
+       if (unlikely(compound_order(page) != order) ||
+           unlikely(!PageHead(page))) {
                bad_page(page);
+               bad++;
+       }
 
-       if (unlikely(!PageHead(page)))
-                       bad_page(page);
        __ClearPageHead(page);
-       for (i = 1; i < nr_pages; i++, p++) {
-               if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
-                       p = pfn_to_page(page_to_pfn(page) + i);
 
-               if (unlikely(!PageTail(p) |
-                               (p->first_page != page)))
+       for (i = 1; i < nr_pages; i++) {
+               struct page *p = page + i;
+
+               if (unlikely(!PageTail(p) | (p->first_page != page))) {
                        bad_page(page);
+                       bad++;
+               }
                __ClearPageTail(p);
        }
+
+       return bad;
 }
 
 static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
@@ -421,7 +434,8 @@ static inline void __free_one_page(struct page *page,
        int migratetype = get_pageblock_migratetype(page);
 
        if (unlikely(PageCompound(page)))
-               destroy_compound_page(page, order);
+               if (unlikely(destroy_compound_page(page, order)))
+                       return;
 
        page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
 
@@ -454,22 +468,17 @@ static inline void __free_one_page(struct page *page,
 
 static inline int free_pages_check(struct page *page)
 {
+       free_page_mlock(page);
        if (unlikely(page_mapcount(page) |
                (page->mapping != NULL)  |
-               (page_get_page_cgroup(page) != NULL) |
                (page_count(page) != 0)  |
-               (page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
+               (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) {
                bad_page(page);
-       if (PageDirty(page))
-               __ClearPageDirty(page);
-       if (PageSwapBacked(page))
-               __ClearPageSwapBacked(page);
-       /*
-        * For now, we report if PG_reserved was found set, but do not
-        * clear it, and do not free the page.  But we shall soon need
-        * to do more, for when the ZERO_PAGE count wraps negative.
-        */
-       return PageReserved(page);
+               return 1;
+       }
+       if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
+               page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+       return 0;
 }
 
 /*
@@ -514,11 +523,11 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 {
        unsigned long flags;
        int i;
-       int reserved = 0;
+       int bad = 0;
 
        for (i = 0 ; i < (1 << order) ; ++i)
-               reserved += free_pages_check(page + i);
-       if (reserved)
+               bad += free_pages_check(page + i);
+       if (bad)
                return;
 
        if (!PageHighMem(page)) {
@@ -602,21 +611,12 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
        if (unlikely(page_mapcount(page) |
                (page->mapping != NULL)  |
-               (page_get_page_cgroup(page) != NULL) |
                (page_count(page) != 0)  |
-               (page->flags & PAGE_FLAGS_CHECK_AT_PREP)))
+               (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) {
                bad_page(page);
-
-       /*
-        * For now, we report if PG_reserved was found set, but do not
-        * clear it, and do not allocate the page: as a safety net.
-        */
-       if (PageReserved(page))
                return 1;
+       }
 
-       page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
-                       1 << PG_referenced | 1 << PG_arch_1 |
-                       1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
        set_page_private(page, 0);
        set_page_refcounted(page);
 
@@ -1549,6 +1549,10 @@ nofail_alloc:
 
        /* We now go into synchronous reclaim */
        cpuset_memory_pressure_bump();
+       /*
+        * The task's cpuset might have expanded its set of allowable nodes
+        */
+       cpuset_update_task_memory_state();
        p->flags |= PF_MEMALLOC;
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
@@ -3365,10 +3369,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
 {
        unsigned long usemapsize = usemap_size(zonesize);
        zone->pageblock_flags = NULL;
-       if (usemapsize) {
+       if (usemapsize)
                zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
-               memset(zone->pageblock_flags, 0, usemapsize);
-       }
 }
 #else
 static void inline setup_usemap(struct pglist_data *pgdat,
@@ -3433,6 +3435,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
        pgdat->nr_zones = 0;
        init_waitqueue_head(&pgdat->kswapd_wait);
        pgdat->kswapd_max_order = 0;
+       pgdat_page_cgroup_init(pgdat);
        
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
@@ -3452,9 +3455,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                        PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
                if (realsize >= memmap_pages) {
                        realsize -= memmap_pages;
-                       mminit_dprintk(MMINIT_TRACE, "memmap_init",
-                               "%s zone: %lu pages used for memmap\n",
-                               zone_names[j], memmap_pages);
+                       if (memmap_pages)
+                               printk(KERN_DEBUG
+                                      "  %s zone: %lu pages used for memmap\n",
+                                      zone_names[j], memmap_pages);
                } else
                        printk(KERN_WARNING
                                "  %s zone: %lu pages exceeds realsize %lu\n",
@@ -3463,8 +3467,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                /* Account for reserved pages */
                if (j == 0 && realsize > dma_reserve) {
                        realsize -= dma_reserve;
-                       mminit_dprintk(MMINIT_TRACE, "memmap_init",
-                                       "%s zone: %lu pages reserved\n",
+                       printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
                                        zone_names[0], dma_reserve);
                }
 
@@ -4238,7 +4241,7 @@ void setup_per_zone_pages_min(void)
        for_each_zone(zone) {
                u64 tmp;
 
-               spin_lock_irqsave(&zone->lru_lock, flags);
+               spin_lock_irqsave(&zone->lock, flags);
                tmp = (u64)pages_min * zone->present_pages;
                do_div(tmp, lowmem_pages);
                if (is_highmem(zone)) {
@@ -4270,7 +4273,7 @@ void setup_per_zone_pages_min(void)
                zone->pages_low   = zone->pages_min + (tmp >> 2);
                zone->pages_high  = zone->pages_min + (tmp >> 1);
                setup_zone_migrate_reserve(zone);
-               spin_unlock_irqrestore(&zone->lru_lock, flags);
+               spin_unlock_irqrestore(&zone->lock, flags);
        }
 
        /* update totalreserve_pages */
@@ -4300,7 +4303,7 @@ void setup_per_zone_pages_min(void)
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-void setup_per_zone_inactive_ratio(void)
+static void setup_per_zone_inactive_ratio(void)
 {
        struct zone *zone;