sparsemem: on no vmemmap path put mem_map on node high too
[safe/jmp/linux-2.6] / mm / page_alloc.c
index 0734bed..596180f 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/kmemleak.h>
 #include <linux/memory.h>
 #include <trace/events/kmem.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -76,6 +77,31 @@ unsigned long totalreserve_pages __read_mostly;
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The following functions are used by the suspend/hibernate code to temporarily
+ * change gfp_allowed_mask in order to avoid using I/O during memory allocations
+ * while devices are suspended.  To avoid races with the suspend/hibernate code,
+ * they should always be called with pm_mutex held (gfp_allowed_mask also should
+ * only be modified with pm_mutex held, unless the suspend/hibernate code is
+ * guaranteed not to run in parallel with that modification).
+ */
+void set_gfp_allowed_mask(gfp_t mask)
+{
+       WARN_ON(!mutex_is_locked(&pm_mutex));
+       gfp_allowed_mask = mask;
+}
+
+gfp_t clear_gfp_allowed_mask(gfp_t mask)
+{
+       gfp_t ret = gfp_allowed_mask;
+
+       WARN_ON(!mutex_is_locked(&pm_mutex));
+       gfp_allowed_mask &= ~mask;
+       return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 int pageblock_order __read_mostly;
 #endif
@@ -263,10 +289,7 @@ static void bad_page(struct page *page)
 
        printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
                current->comm, page_to_pfn(page));
-       printk(KERN_ALERT
-               "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
-               page, (void *)page->flags, page_count(page),
-               page_mapcount(page), page->mapping, page->index);
+       dump_page(page);
 
        dump_stack();
 out:
@@ -452,6 +475,8 @@ static inline void __free_one_page(struct page *page,
                int migratetype)
 {
        unsigned long page_idx;
+       unsigned long combined_idx;
+       struct page *buddy;
 
        if (unlikely(PageCompound(page)))
                if (unlikely(destroy_compound_page(page, order)))
@@ -465,9 +490,6 @@ static inline void __free_one_page(struct page *page,
        VM_BUG_ON(bad_range(zone, page));
 
        while (order < MAX_ORDER-1) {
-               unsigned long combined_idx;
-               struct page *buddy;
-
                buddy = __page_find_buddy(page, page_idx, order);
                if (!page_is_buddy(page, buddy, order))
                        break;
@@ -482,8 +504,29 @@ static inline void __free_one_page(struct page *page,
                order++;
        }
        set_page_order(page, order);
-       list_add(&page->lru,
-               &zone->free_area[order].free_list[migratetype]);
+
+       /*
+        * If this is not the largest possible page, check if the buddy
+        * of the next-highest order is free. If it is, it's possible
+        * that pages are being freed that will coalesce soon. In case,
+        * that is happening, add the free page to the tail of the list
+        * so it's less likely to be used soon and more likely to be merged
+        * as a higher order page
+        */
+       if ((order < MAX_ORDER-1) && pfn_valid_within(page_to_pfn(buddy))) {
+               struct page *higher_page, *higher_buddy;
+               combined_idx = __find_combined_index(page_idx, order);
+               higher_page = page + combined_idx - page_idx;
+               higher_buddy = __page_find_buddy(higher_page, combined_idx, order + 1);
+               if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
+                       list_add_tail(&page->lru,
+                               &zone->free_area[order].free_list[migratetype]);
+                       goto out;
+               }
+       }
+
+       list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);
+out:
        zone->free_area[order].nr_free++;
 }
 
@@ -2556,7 +2599,7 @@ static int default_zonelist_order(void)
        struct zone *z;
        int average_size;
        /*
-         * ZONE_DMA and ZONE_DMA32 can be very small area in the sytem.
+         * ZONE_DMA and ZONE_DMA32 can be very small area in the system.
         * If they are really small and used heavily, the system can fall
         * into OOM very easily.
         * This function detect ZONE_DMA/DMA32 size and confgigures zone order.
@@ -3199,7 +3242,7 @@ static int __zone_pcp_update(void *data)
        int cpu;
        unsigned long batch = zone_batchsize(zone), flags;
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+       for_each_possible_cpu(cpu) {
                struct per_cpu_pageset *pset;
                struct per_cpu_pages *pcp;
 
@@ -4367,8 +4410,12 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
        for (i = 0; i < MAX_NR_ZONES; i++) {
                if (i == ZONE_MOVABLE)
                        continue;
-               printk("  %-8s %0#10lx -> %0#10lx\n",
-                               zone_names[i],
+               printk("  %-8s ", zone_names[i]);
+               if (arch_zone_lowest_possible_pfn[i] ==
+                               arch_zone_highest_possible_pfn[i])
+                       printk("empty\n");
+               else
+                       printk("%0#10lx -> %0#10lx\n",
                                arch_zone_lowest_possible_pfn[i],
                                arch_zone_highest_possible_pfn[i]);
        }
@@ -5154,3 +5201,80 @@ bool is_free_buddy_page(struct page *page)
        return order < MAX_ORDER;
 }
 #endif
+
+static struct trace_print_flags pageflag_names[] = {
+       {1UL << PG_locked,              "locked"        },
+       {1UL << PG_error,               "error"         },
+       {1UL << PG_referenced,          "referenced"    },
+       {1UL << PG_uptodate,            "uptodate"      },
+       {1UL << PG_dirty,               "dirty"         },
+       {1UL << PG_lru,                 "lru"           },
+       {1UL << PG_active,              "active"        },
+       {1UL << PG_slab,                "slab"          },
+       {1UL << PG_owner_priv_1,        "owner_priv_1"  },
+       {1UL << PG_arch_1,              "arch_1"        },
+       {1UL << PG_reserved,            "reserved"      },
+       {1UL << PG_private,             "private"       },
+       {1UL << PG_private_2,           "private_2"     },
+       {1UL << PG_writeback,           "writeback"     },
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+       {1UL << PG_head,                "head"          },
+       {1UL << PG_tail,                "tail"          },
+#else
+       {1UL << PG_compound,            "compound"      },
+#endif
+       {1UL << PG_swapcache,           "swapcache"     },
+       {1UL << PG_mappedtodisk,        "mappedtodisk"  },
+       {1UL << PG_reclaim,             "reclaim"       },
+       {1UL << PG_buddy,               "buddy"         },
+       {1UL << PG_swapbacked,          "swapbacked"    },
+       {1UL << PG_unevictable,         "unevictable"   },
+#ifdef CONFIG_MMU
+       {1UL << PG_mlocked,             "mlocked"       },
+#endif
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
+       {1UL << PG_uncached,            "uncached"      },
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+       {1UL << PG_hwpoison,            "hwpoison"      },
+#endif
+       {-1UL,                          NULL            },
+};
+
+static void dump_page_flags(unsigned long flags)
+{
+       const char *delim = "";
+       unsigned long mask;
+       int i;
+
+       printk(KERN_ALERT "page flags: %#lx(", flags);
+
+       /* remove zone id */
+       flags &= (1UL << NR_PAGEFLAGS) - 1;
+
+       for (i = 0; pageflag_names[i].name && flags; i++) {
+
+               mask = pageflag_names[i].mask;
+               if ((flags & mask) != mask)
+                       continue;
+
+               flags &= ~mask;
+               printk("%s%s", delim, pageflag_names[i].name);
+               delim = "|";
+       }
+
+       /* check for left over flags */
+       if (flags)
+               printk("%s%#lx", delim, flags);
+
+       printk(")\n");
+}
+
+void dump_page(struct page *page)
+{
+       printk(KERN_ALERT
+              "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
+               page, page_count(page), page_mapcount(page),
+               page->mapping, page->index);
+       dump_page_flags(page->flags);
+}