cassini: Only use chip checksum for ipv4 packets.

[safe/jmp/linux-2.6] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 4ccb865..6383557 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -45,6 +45,7 @@
  #include <linux/fault-inject.h>
  #include <linux/page-isolation.h>
  #include <linux/memcontrol.h>
+#include <linux/debugobjects.h>
  
  #include <asm/tlbflush.h>
  #include <asm/div64.h>
@@ -532,8 +533,11 @@ static void __free_pages_ok(struct page *page, unsigned int order)
         if (reserved)
                 return;
  
-       if (!PageHighMem(page))
+       if (!PageHighMem(page)) {
                 debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
+               debug_check_no_obj_freed(page_address(page),
+                                          PAGE_SIZE << order);
+       }
         arch_free_page(page, order);
         kernel_map_pages(page, 1 << order, 0);
  
@@ -546,7 +550,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
  /*
   * permit the bootmem allocator to evade page validation on high-order frees
   */
-void __init __free_pages_bootmem(struct page *page, unsigned int order)
+void __free_pages_bootmem(struct page *page, unsigned int order)
  {
         if (order == 0) {
                 __ClearPageReserved(page);
@@ -632,7 +636,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
         if (PageReserved(page))
                 return 1;
  
-       page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_readahead |
+       page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
                         1 << PG_referenced | 1 << PG_arch_1 |
                         1 << PG_owner_priv_1 | 1 << PG_mappedtodisk);
         set_page_private(page, 0);
@@ -995,8 +999,10 @@ static void free_hot_cold_page(struct page *page, int cold)
         if (free_pages_check(page))
                 return;
  
-       if (!PageHighMem(page))
+       if (!PageHighMem(page)) {
                 debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
+               debug_check_no_obj_freed(page_address(page), PAGE_SIZE);
+       }
         arch_free_page(page, 0);
         kernel_map_pages(page, 1, 0);
  
@@ -1284,7 +1290,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
         if (!zlc)
                 return NULL;
  
-       if (time_after(jiffies, zlc->last_full_zap + HZ)) {
+       if (time_after(jiffies, zlc->last_full_zap + HZ)) {
                 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
                 zlc->last_full_zap = jiffies;
         }
@@ -1317,7 +1323,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
   * We are low on memory in the second scan, and should leave no stone
   * unturned looking for a free page.
   */
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
+static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
                                                 nodemask_t *allowednodes)
  {
         struct zonelist_cache *zlc;     /* cached zonelist speedup info */
@@ -1328,7 +1334,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
         if (!zlc)
                 return 1;
  
-       i = z - zonelist->zones;
+       i = z - zonelist->_zonerefs;
         n = zlc->z_to_n[i];
  
         /* This zone is worth trying if it is allowed but not full */
@@ -1340,7 +1346,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
   * zlc->fullzones, so that subsequent attempts to allocate a page
   * from that zone don't waste time re-examining it.
   */
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
+static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
  {
         struct zonelist_cache *zlc;     /* cached zonelist speedup info */
         int i;                          /* index of *z in zonelist zones */
@@ -1349,7 +1355,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
         if (!zlc)
                 return;
  
-       i = z - zonelist->zones;
+       i = z - zonelist->_zonerefs;
  
         set_bit(i, zlc->fullzones);
  }
@@ -1361,13 +1367,13 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
         return NULL;
  }
  
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
+static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
                                 nodemask_t *allowednodes)
  {
         return 1;
  }
  
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
+static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
  {
  }
  #endif /* CONFIG_NUMA */
@@ -1377,10 +1383,10 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
   * a page.
   */
  static struct page *
-get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
                 struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
  {
-       struct zone **z;
+       struct zoneref *z;
         struct page *page = NULL;
         int classzone_idx;
         struct zone *zone, *preferred_zone;
@@ -1388,16 +1394,17 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
         int zlc_active = 0;             /* set if using zonelist_cache */
         int did_zlc_setup = 0;          /* just call zlc_setup() one time */
  
-       z = first_zones_zonelist(zonelist, high_zoneidx);
-       classzone_idx = zone_idx(*z);
-       preferred_zone = *z;
+       (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
+                                                       &preferred_zone);
+       classzone_idx = zone_idx(preferred_zone);
  
  zonelist_scan:
         /*
          * Scan zonelist, looking for a zone with enough free.
          * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
          */
-       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+       for_each_zone_zonelist_nodemask(zone, z, zonelist,
+                                               high_zoneidx, nodemask) {
                 if (NUMA_BUILD && zlc_active &&
                         !zlc_zone_worth_trying(zonelist, z, allowednodes))
                                 continue;
@@ -1447,19 +1454,21 @@ try_next_zone:
  /*
   * This is the 'heart' of the zoned buddy allocator.
   */
-struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
-               struct zonelist *zonelist)
+static struct page *
+__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
+                       struct zonelist *zonelist, nodemask_t *nodemask)
  {
         const gfp_t wait = gfp_mask & __GFP_WAIT;
         enum zone_type high_zoneidx = gfp_zone(gfp_mask);
-       struct zone **z;
+       struct zoneref *z;
+       struct zone *zone;
         struct page *page;
         struct reclaim_state reclaim_state;
         struct task_struct *p = current;
         int do_retry;
         int alloc_flags;
-       int did_some_progress;
+       unsigned long did_some_progress;
+       unsigned long pages_reclaimed = 0;
  
         might_sleep_if(wait);
  
@@ -1467,9 +1476,9 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
                 return NULL;
  
  restart:
-       z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
+       z = zonelist->_zonerefs;  /* the list of zones suitable for gfp_mask */
  
-       if (unlikely(*z == NULL)) {
+       if (unlikely(!z->zone)) {
                 /*
                  * Happens if we have an empty zonelist as a result of
                  * GFP_THISNODE being used on a memoryless node
@@ -1477,7 +1486,7 @@ restart:
                 return NULL;
         }
  
-       page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+       page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                         zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
         if (page)
                 goto got_pg;
@@ -1493,8 +1502,8 @@ restart:
         if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
                 goto nopage;
  
-       for (z = zonelist->zones; *z; z++)
-               wakeup_kswapd(*z, order);
+       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
+               wakeup_kswapd(zone, order);
  
         /*
          * OK, we're below the kswapd watermark and have kicked background
@@ -1522,7 +1531,7 @@ restart:
          * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
          * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
          */
-       page = get_page_from_freelist(gfp_mask, order, zonelist,
+       page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
                                                 high_zoneidx, alloc_flags);
         if (page)
                 goto got_pg;
@@ -1535,7 +1544,7 @@ rebalance:
                 if (!(gfp_mask & __GFP_NOMEMALLOC)) {
  nofail_alloc:
                         /* go through the zonelist yet again, ignoring mins */
-                       page = get_page_from_freelist(gfp_mask, order,
+                       page = get_page_from_freelist(gfp_mask, nodemask, order,
                                 zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
                         if (page)
                                 goto got_pg;
@@ -1570,12 +1579,12 @@ nofail_alloc:
                 drain_all_pages();
  
         if (likely(did_some_progress)) {
-               page = get_page_from_freelist(gfp_mask, order,
+               page = get_page_from_freelist(gfp_mask, nodemask, order,
                                         zonelist, high_zoneidx, alloc_flags);
                 if (page)
                         goto got_pg;
         } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
-               if (!try_set_zone_oom(zonelist)) {
+               if (!try_set_zone_oom(zonelist, gfp_mask)) {
                         schedule_timeout_uninterruptible(1);
                         goto restart;
                 }
@@ -1586,21 +1595,22 @@ nofail_alloc:
                  * a parallel oom killing, we must fail if we're still
                  * under heavy pressure.
                  */
-               page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
-                       zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
+               page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
+                       order, zonelist, high_zoneidx,
+                       ALLOC_WMARK_HIGH|ALLOC_CPUSET);
                 if (page) {
-                       clear_zonelist_oom(zonelist);
+                       clear_zonelist_oom(zonelist, gfp_mask);
                         goto got_pg;
                 }
  
                 /* The OOM killer will not help higher order allocs so fail */
                 if (order > PAGE_ALLOC_COSTLY_ORDER) {
-                       clear_zonelist_oom(zonelist);
+                       clear_zonelist_oom(zonelist, gfp_mask);
                         goto nopage;
                 }
  
                 out_of_memory(zonelist, gfp_mask, order);
-               clear_zonelist_oom(zonelist);
+               clear_zonelist_oom(zonelist, gfp_mask);
                 goto restart;
         }
  
@@ -1608,14 +1618,26 @@ nofail_alloc:
          * Don't let big-order allocations loop unless the caller explicitly
          * requests that.  Wait for some write requests to complete then retry.
          *
-        * In this implementation, __GFP_REPEAT means __GFP_NOFAIL for order
-        * <= 3, but that may not be true in other implementations.
+        * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
+        * means __GFP_NOFAIL, but that may not be true in other
+        * implementations.
+        *
+        * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is
+        * specified, then we retry until we no longer reclaim any pages
+        * (above), or we've reclaimed an order of pages at least as
+        * large as the allocation's order. In both cases, if the
+        * allocation still fails, we stop retrying.
          */
+       pages_reclaimed += did_some_progress;
         do_retry = 0;
         if (!(gfp_mask & __GFP_NORETRY)) {
-               if ((order <= PAGE_ALLOC_COSTLY_ORDER) ||
-                                               (gfp_mask & __GFP_REPEAT))
+               if (order <= PAGE_ALLOC_COSTLY_ORDER) {
                         do_retry = 1;
+               } else {
+                       if (gfp_mask & __GFP_REPEAT &&
+                               pages_reclaimed < (1 << order))
+                                       do_retry = 1;
+               }
                 if (gfp_mask & __GFP_NOFAIL)
                         do_retry = 1;
         }
@@ -1636,6 +1658,20 @@ got_pg:
         return page;
  }
  
+struct page *
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist)
+{
+       return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
+}
+
+struct page *
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist, nodemask_t *nodemask)
+{
+       return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
+}
+
  EXPORT_SYMBOL(__alloc_pages);
  
  /*
@@ -1702,7 +1738,7 @@ EXPORT_SYMBOL(free_pages);
  
  static unsigned int nr_free_zone_pages(int offset)
  {
-       struct zone **z;
+       struct zoneref *z;
         struct zone *zone;
  
         /* Just pick one node, since fallback list is circular */
@@ -1879,6 +1915,12 @@ void show_free_areas(void)
         show_swap_cache_info();
  }
  
+static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
+{
+       zoneref->zone = zone;
+       zoneref->zone_idx = zone_idx(zone);
+}
+
  /*
   * Builds allocation fallback zone lists.
   *
@@ -1896,7 +1938,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
                 zone_type--;
                 zone = pgdat->node_zones + zone_type;
                 if (populated_zone(zone)) {
-                       zonelist->zones[nr_zones++] = zone;
+                       zoneref_set_zone(zone,
+                               &zonelist->_zonerefs[nr_zones++]);
                         check_highest_zone(zone_type);
                 }
  
@@ -2072,11 +2115,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
         struct zonelist *zonelist;
  
         zonelist = &pgdat->node_zonelists[0];
-       for (j = 0; zonelist->zones[j] != NULL; j++)
+       for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
                 ;
         j = build_zonelists_node(NODE_DATA(node), zonelist, j,
                                                         MAX_NR_ZONES - 1);
-       zonelist->zones[j] = NULL;
+       zonelist->_zonerefs[j].zone = NULL;
+       zonelist->_zonerefs[j].zone_idx = 0;
  }
  
  /*
@@ -2089,7 +2133,8 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
  
         zonelist = &pgdat->node_zonelists[1];
         j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
-       zonelist->zones[j] = NULL;
+       zonelist->_zonerefs[j].zone = NULL;
+       zonelist->_zonerefs[j].zone_idx = 0;
  }
  
  /*
@@ -2114,12 +2159,14 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
                         node = node_order[j];
                         z = &NODE_DATA(node)->node_zones[zone_type];
                         if (populated_zone(z)) {
-                               zonelist->zones[pos++] = z;
+                               zoneref_set_zone(z,
+                                       &zonelist->_zonerefs[pos++]);
                                 check_highest_zone(zone_type);
                         }
                 }
         }
-       zonelist->zones[pos] = NULL;
+       zonelist->_zonerefs[pos].zone = NULL;
+       zonelist->_zonerefs[pos].zone_idx = 0;
  }
  
  static int default_zonelist_order(void)
@@ -2196,7 +2243,8 @@ static void build_zonelists(pg_data_t *pgdat)
         /* initialize zonelists */
         for (i = 0; i < MAX_ZONELISTS; i++) {
                 zonelist = pgdat->node_zonelists + i;
-               zonelist->zones[0] = NULL;
+               zonelist->_zonerefs[0].zone = NULL;
+               zonelist->_zonerefs[0].zone_idx = 0;
         }
  
         /* NUMA-aware ordering of nodes */
@@ -2248,13 +2296,13 @@ static void build_zonelist_cache(pg_data_t *pgdat)
  {
         struct zonelist *zonelist;
         struct zonelist_cache *zlc;
-       struct zone **z;
+       struct zoneref *z;
  
         zonelist = &pgdat->node_zonelists[0];
         zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
         bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
-       for (z = zonelist->zones; *z; z++)
-               zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z);
+       for (z = zonelist->_zonerefs; z->zone; z++)
+               zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
  }
  
  
@@ -2297,7 +2345,8 @@ static void build_zonelists(pg_data_t *pgdat)
                                                         MAX_NR_ZONES - 1);
         }
  
-       zonelist->zones[j] = NULL;
+       zonelist->_zonerefs[j].zone = NULL;
+       zonelist->_zonerefs[j].zone_idx = 0;
  }
  
  /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
@@ -2494,7 +2543,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
         struct page *page;
         unsigned long end_pfn = start_pfn + size;
         unsigned long pfn;
+       struct zone *z;
  
+       z = &NODE_DATA(nid)->node_zones[zone];
         for (pfn = start_pfn; pfn < end_pfn; pfn++) {
                 /*
                  * There can be holes in boot-time mem_map[]s
@@ -2512,7 +2563,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                 init_page_count(page);
                 reset_page_mapcount(page);
                 SetPageReserved(page);
-
                 /*
                  * Mark the block movable so that blocks are reserved for
                  * movable at startup. This will force kernel allocations
@@ -2521,8 +2571,15 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                  * kernel allocations are made. Later some blocks near
                  * the start are marked MIGRATE_RESERVE by
                  * setup_zone_migrate_reserve()
+                *
+                * bitmap is created for zone's valid pfn range. but memmap
+                * can be created for invalid pages (for alignment)
+                * check here not to call set_pageblock_migratetype() against
+                * pfn out of zone.
                  */
-               if ((pfn & (pageblock_nr_pages-1)))
+               if ((z->zone_start_pfn <= pfn)
+                   && (pfn < z->zone_start_pfn + z->spanned_pages)
+                   && !(pfn & (pageblock_nr_pages - 1)))
                         set_pageblock_migratetype(page, MIGRATE_MOVABLE);
  
                 INIT_LIST_HEAD(&page->lru);
@@ -2805,8 +2862,6 @@ __meminit int init_currently_empty_zone(struct zone *zone,
  
         zone->zone_start_pfn = zone_start_pfn;
  
-       memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
-
         zone_init_free_lists(zone);
  
         return 0;
@@ -3376,6 +3431,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                 ret = init_currently_empty_zone(zone, zone_start_pfn,
                                                 size, MEMMAP_EARLY);
                 BUG_ON(ret);
+               memmap_init(size, nid, j, zone_start_pfn);
                 zone_start_pfn += size;
         }
  }
@@ -4315,9 +4371,7 @@ void *__init alloc_large_system_hash(const char *tablename,
                 else if (hashdist)
                         table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
                 else {
-                       unsigned long order;
-                       for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++)
-                               ;
+                       unsigned long order = get_order(size);
                         table = (void*) __get_free_pages(GFP_ATOMIC, order);
                         /*
                          * If bucketsize is not a power-of-two, we may free
@@ -4436,6 +4490,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
         pfn = page_to_pfn(page);
         bitmap = get_pageblock_bitmap(zone, pfn);
         bitidx = pfn_to_bitidx(zone, pfn);
+       VM_BUG_ON(pfn < zone->zone_start_pfn);
+       VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);
  
         for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
                 if (flags & value)