tunnels: fix netns vs proto registration ordering

[safe/jmp/linux-2.6] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 250d055..d2a8889 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
  #include <linux/page_cgroup.h>
  #include <linux/debugobjects.h>
  #include <linux/kmemleak.h>
+#include <linux/memory.h>
  #include <trace/events/kmem.h>
  
  #include <asm/tlbflush.h>
@@ -486,7 +487,6 @@ static inline void __free_one_page(struct page *page,
         zone->free_area[order].nr_free++;
  }
  
-#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
  /*
   * free_page_mlock() -- clean up attempts to free and mlocked() page.
   * Page should not be on lru, so no need to fix that up.
@@ -497,9 +497,6 @@ static inline void free_page_mlock(struct page *page)
         __dec_zone_page_state(page, NR_MLOCK);
         __count_vm_event(UNEVICTABLE_MLOCKFREED);
  }
-#else
-static void free_page_mlock(struct page *page) { }
-#endif
  
  static inline int free_pages_check(struct page *page)
  {
@@ -1225,10 +1222,10 @@ again:
                 }
                 spin_lock_irqsave(&zone->lock, flags);
                 page = __rmqueue(zone, order, migratetype);
-               __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
                 spin_unlock(&zone->lock);
                 if (!page)
                         goto failed;
+               __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
         }
  
         __count_zone_vm_events(PGALLOC, zone, 1 << order);
@@ -1658,12 +1655,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         if (page)
                 goto out;
  
-       /* The OOM killer will not help higher order allocs */
-       if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL))
-               goto out;
-
+       if (!(gfp_mask & __GFP_NOFAIL)) {
+               /* The OOM killer will not help higher order allocs */
+               if (order > PAGE_ALLOC_COSTLY_ORDER)
+                       goto out;
+               /*
+                * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
+                * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
+                * The caller should handle page allocation failure by itself if
+                * it specifies __GFP_THISNODE.
+                * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
+                */
+               if (gfp_mask & __GFP_THISNODE)
+                       goto out;
+       }
         /* Exhausted what can be done so it's blamo time */
-       out_of_memory(zonelist, gfp_mask, order);
+       out_of_memory(zonelist, gfp_mask, order, nodemask);
  
  out:
         clear_zonelist_oom(zonelist, gfp_mask);
@@ -1769,7 +1776,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
                  * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
                  */
                 alloc_flags &= ~ALLOC_CPUSET;
-       } else if (unlikely(rt_task(p)))
+       } else if (unlikely(rt_task(p)) && !in_interrupt())
                 alloc_flags |= ALLOC_HARDER;
  
         if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
@@ -2395,13 +2402,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
  {
         char saved_string[NUMA_ZONELIST_ORDER_LEN];
         int ret;
+       static DEFINE_MUTEX(zl_order_mutex);
  
+       mutex_lock(&zl_order_mutex);
         if (write)
-               strncpy(saved_string, (char*)table->data,
-                       NUMA_ZONELIST_ORDER_LEN);
+               strcpy(saved_string, (char*)table->data);
         ret = proc_dostring(table, write, buffer, length, ppos);
         if (ret)
-               return ret;
+               goto out;
         if (write) {
                 int oldval = user_zonelist_order;
                 if (__parse_numa_zonelist_order((char*)table->data)) {
@@ -2414,7 +2422,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
                 } else if (oldval != user_zonelist_order)
                         build_all_zonelists();
         }
-       return 0;
+out:
+       mutex_unlock(&zl_order_mutex);
+       return ret;
  }
  
  
@@ -3127,7 +3137,7 @@ static int __cpuinit process_zones(int cpu)
  
                 if (percpu_pagelist_fraction)
                         setup_pagelist_highmark(zone_pcp(zone, cpu),
-                               (zone->present_pages / percpu_pagelist_fraction));
+                           (zone->present_pages / percpu_pagelist_fraction));
         }
  
         return 0;
@@ -3573,7 +3583,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
   * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
   * then all holes in the requested range will be accounted for.
   */
-static unsigned long __meminit __absent_pages_in_range(int nid,
+unsigned long __meminit __absent_pages_in_range(int nid,
                                 unsigned long range_start_pfn,
                                 unsigned long range_end_pfn)
  {
@@ -3988,7 +3998,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
                 }
  
                 /* Merge backward if suitable */
-               if (start_pfn < early_node_map[i].end_pfn &&
+               if (start_pfn < early_node_map[i].start_pfn &&
                                 end_pfn >= early_node_map[i].start_pfn) {
                         early_node_map[i].start_pfn = start_pfn;
                         return;
@@ -4102,7 +4112,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
  }
  
  /* sort the node_map by start_pfn */
-static void __init sort_node_map(void)
+void __init sort_node_map(void)
  {
         sort(early_node_map, (size_t)nr_nodemap_entries,
                         sizeof(struct node_active_region),
@@ -5002,23 +5012,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
  int set_migratetype_isolate(struct page *page)
  {
         struct zone *zone;
-       unsigned long flags;
+       struct page *curr_page;
+       unsigned long flags, pfn, iter;
+       unsigned long immobile = 0;
+       struct memory_isolate_notify arg;
+       int notifier_ret;
         int ret = -EBUSY;
         int zone_idx;
  
         zone = page_zone(page);
         zone_idx = zone_idx(zone);
+
         spin_lock_irqsave(&zone->lock, flags);
+       if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+           zone_idx == ZONE_MOVABLE) {
+               ret = 0;
+               goto out;
+       }
+
+       pfn = page_to_pfn(page);
+       arg.start_pfn = pfn;
+       arg.nr_pages = pageblock_nr_pages;
+       arg.pages_found = 0;
+
         /*
-        * In future, more migrate types will be able to be isolation target.
+        * It may be possible to isolate a pageblock even if the
+        * migratetype is not MIGRATE_MOVABLE. The memory isolation
+        * notifier chain is used by balloon drivers to return the
+        * number of pages in a range that are held by the balloon
+        * driver to shrink memory. If all the pages are accounted for
+        * by balloons, are free, or on the LRU, isolation can continue.
+        * Later, for example, when memory hotplug notifier runs, these
+        * pages reported as "can be isolated" should be isolated(freed)
+        * by the balloon driver through the memory notifier chain.
          */
-       if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
-           zone_idx != ZONE_MOVABLE)
+       notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+       notifier_ret = notifier_to_errno(notifier_ret);
+       if (notifier_ret || !arg.pages_found)
                 goto out;
-       set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-       move_freepages_block(zone, page, MIGRATE_ISOLATE);
-       ret = 0;
+
+       for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+               if (!pfn_valid_within(pfn))
+                       continue;
+
+               curr_page = pfn_to_page(iter);
+               if (!page_count(curr_page) || PageLRU(curr_page))
+                       continue;
+
+               immobile++;
+       }
+
+       if (arg.pages_found == immobile)
+               ret = 0;
+
  out:
+       if (!ret) {
+               set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+               move_freepages_block(zone, page, MIGRATE_ISOLATE);
+       }
+
         spin_unlock_irqrestore(&zone->lock, flags);
         if (!ret)
                 drain_all_pages();
@@ -5085,3 +5137,24 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
         spin_unlock_irqrestore(&zone->lock, flags);
  }
  #endif
+
+#ifdef CONFIG_MEMORY_FAILURE
+bool is_free_buddy_page(struct page *page)
+{
+       struct zone *zone = page_zone(page);
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long flags;
+       int order;
+
+       spin_lock_irqsave(&zone->lock, flags);
+       for (order = 0; order < MAX_ORDER; order++) {
+               struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+               if (PageBuddy(page_head) && page_order(page_head) >= order)
+                       break;
+       }
+       spin_unlock_irqrestore(&zone->lock, flags);
+
+       return order < MAX_ORDER;
+}
+#endif