S390: Update comments about why we don't use <asm-generic/statfs.h>

[safe/jmp/linux-2.6] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 106ba10..1ff1a58 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
  #include <linux/kthread.h>
  #include <linux/freezer.h>
  #include <linux/memcontrol.h>
+#include <linux/delayacct.h>
  
  #include <asm/tlbflush.h>
  #include <asm/div64.h>
@@ -70,13 +71,6 @@ struct scan_control {
  
         int order;
  
-       /*
-        * Pages that have (or should have) IO pending.  If we run into
-        * a lot of these, we're better off waiting a little for IO to
-        * finish rather than scanning more pages in the VM.
-        */
-       int nr_io_pages;
-
         /* Which cgroup do we reclaim from */
         struct mem_cgroup *mem_cgroup;
  
@@ -198,7 +192,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
                 shrinker->nr += delta;
                 if (shrinker->nr < 0) {
                         printk(KERN_ERR "%s: nr=%ld\n",
-                                       __FUNCTION__, shrinker->nr);
+                                       __func__, shrinker->nr);
                         shrinker->nr = max_pass;
                 }
  
@@ -346,7 +340,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
                 if (PagePrivate(page)) {
                         if (try_to_free_buffers(page)) {
                                 ClearPageDirty(page);
-                               printk("%s: orphaned page\n", __FUNCTION__);
+                               printk("%s: orphaned page\n", __func__);
                                 return PAGE_CLEAN;
                         }
                 }
@@ -397,17 +391,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
  }
  
  /*
- * Attempt to detach a locked page from its ->mapping.  If it is dirty or if
- * someone else has a ref on the page, abort and return 0.  If it was
- * successfully detached, return 1.  Assumes the caller has a single ref on
- * this page.
+ * Same as remove_mapping, but if the page is removed from the mapping, it
+ * gets returned with a refcount of 0.
   */
-int remove_mapping(struct address_space *mapping, struct page *page)
+static int __remove_mapping(struct address_space *mapping, struct page *page)
  {
         BUG_ON(!PageLocked(page));
         BUG_ON(mapping != page_mapping(page));
  
-       write_lock_irq(&mapping->tree_lock);
+       spin_lock_irq(&mapping->tree_lock);
         /*
          * The non racy check for a busy page.
          *
@@ -433,28 +425,48 @@ int remove_mapping(struct address_space *mapping, struct page *page)
          * Note that if SetPageDirty is always performed via set_page_dirty,
          * and thus under tree_lock, then this ordering is not required.
          */
-       if (unlikely(page_count(page) != 2))
+       if (!page_freeze_refs(page, 2))
                 goto cannot_free;
-       smp_rmb();
-       if (unlikely(PageDirty(page)))
+       /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
+       if (unlikely(PageDirty(page))) {
+               page_unfreeze_refs(page, 2);
                 goto cannot_free;
+       }
  
         if (PageSwapCache(page)) {
                 swp_entry_t swap = { .val = page_private(page) };
                 __delete_from_swap_cache(page);
-               write_unlock_irq(&mapping->tree_lock);
+               spin_unlock_irq(&mapping->tree_lock);
                 swap_free(swap);
-               __put_page(page);       /* The pagecache ref */
-               return 1;
+       } else {
+               __remove_from_page_cache(page);
+               spin_unlock_irq(&mapping->tree_lock);
         }
  
-       __remove_from_page_cache(page);
-       write_unlock_irq(&mapping->tree_lock);
-       __put_page(page);
         return 1;
  
  cannot_free:
-       write_unlock_irq(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
+       return 0;
+}
+
+/*
+ * Attempt to detach a locked page from its ->mapping.  If it is dirty or if
+ * someone else has a ref on the page, abort and return 0.  If it was
+ * successfully detached, return 1.  Assumes the caller has a single ref on
+ * this page.
+ */
+int remove_mapping(struct address_space *mapping, struct page *page)
+{
+       if (__remove_mapping(mapping, page)) {
+               /*
+                * Unfreezing the refcount with 1 rather than 2 effectively
+                * drops the pagecache ref for us without requiring another
+                * atomic operation.
+                */
+               page_unfreeze_refs(page, 1);
+               return 1;
+       }
         return 0;
  }
  
@@ -484,7 +496,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 page = lru_to_page(page_list);
                 list_del(&page->lru);
  
-               if (TestSetPageLocked(page))
+               if (!trylock_page(page))
                         goto keep;
  
                 VM_BUG_ON(PageActive(page));
@@ -512,10 +524,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                          */
                         if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
                                 wait_on_page_writeback(page);
-                       else {
-                               sc->nr_io_pages++;
+                       else
                                 goto keep_locked;
-                       }
                 }
  
                 referenced = page_referenced(page, 1, sc->mem_cgroup);
@@ -554,10 +564,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 if (PageDirty(page)) {
                         if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
                                 goto keep_locked;
-                       if (!may_enter_fs) {
-                               sc->nr_io_pages++;
+                       if (!may_enter_fs)
                                 goto keep_locked;
-                       }
                         if (!sc->may_writepage)
                                 goto keep_locked;
  
@@ -568,15 +576,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         case PAGE_ACTIVATE:
                                 goto activate_locked;
                         case PAGE_SUCCESS:
-                               if (PageWriteback(page) || PageDirty(page)) {
-                                       sc->nr_io_pages++;
+                               if (PageWriteback(page) || PageDirty(page))
                                         goto keep;
-                               }
                                 /*
                                  * A synchronous write - probably a ramdisk.  Go
                                  * ahead and try to reclaim the page.
                                  */
-                               if (TestSetPageLocked(page))
+                               if (!trylock_page(page))
                                         goto keep;
                                 if (PageDirty(page) || PageWriteback(page))
                                         goto keep_locked;
@@ -610,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 if (PagePrivate(page)) {
                         if (!try_to_release_page(page, sc->gfp_mask))
                                 goto activate_locked;
-                       if (!mapping && page_count(page) == 1)
-                               goto free_it;
+                       if (!mapping && page_count(page) == 1) {
+                               unlock_page(page);
+                               if (put_page_testzero(page))
+                                       goto free_it;
+                               else {
+                                       /*
+                                        * rare race with speculative reference.
+                                        * the speculative reference will free
+                                        * this page shortly, so we may
+                                        * increment nr_reclaimed here (and
+                                        * leave it off the LRU).
+                                        */
+                                       nr_reclaimed++;
+                                       continue;
+                               }
+                       }
                 }
  
-               if (!mapping || !remove_mapping(mapping, page))
+               if (!mapping || !__remove_mapping(mapping, page))
                         goto keep_locked;
  
-free_it:
                 unlock_page(page);
+free_it:
                 nr_reclaimed++;
-               if (!pagevec_add(&freed_pvec, page))
-                       __pagevec_release_nonlru(&freed_pvec);
+               if (!pagevec_add(&freed_pvec, page)) {
+                       __pagevec_free(&freed_pvec);
+                       pagevec_reinit(&freed_pvec);
+               }
                 continue;
  
  activate_locked:
@@ -635,7 +657,7 @@ keep:
         }
         list_splice(&ret_pages, page_list);
         if (pagevec_count(&freed_pvec))
-               __pagevec_release_nonlru(&freed_pvec);
+               __pagevec_free(&freed_pvec);
         count_vm_events(PGACTIVATE, pgactivate);
         return nr_reclaimed;
  }
@@ -1128,7 +1150,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                 ClearPageActive(page);
  
                 list_move(&page->lru, &zone->inactive_list);
-               mem_cgroup_move_lists(page_get_page_cgroup(page), false);
+               mem_cgroup_move_lists(page, false);
                 pgmoved++;
                 if (!pagevec_add(&pvec, page)) {
                         __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
@@ -1156,8 +1178,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                 VM_BUG_ON(PageLRU(page));
                 SetPageLRU(page);
                 VM_BUG_ON(!PageActive(page));
+
                 list_move(&page->lru, &zone->active_list);
-               mem_cgroup_move_lists(page_get_page_cgroup(page), true);
+               mem_cgroup_move_lists(page, true);
                 pgmoved++;
                 if (!pagevec_add(&pvec, page)) {
                         __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
@@ -1258,17 +1281,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
   * If a zone is deemed to be full of pinned pages then just give it a light
   * scan then give up on it.
   */
-static unsigned long shrink_zones(int priority, struct zone **zones,
+static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
                                         struct scan_control *sc)
  {
+       enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
         unsigned long nr_reclaimed = 0;
-       int i;
-
+       struct zoneref *z;
+       struct zone *zone;
  
         sc->all_unreclaimable = 1;
-       for (i = 0; zones[i] != NULL; i++) {
-               struct zone *zone = zones[i];
-
+       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
                 if (!populated_zone(zone))
                         continue;
                 /*
@@ -1312,17 +1334,24 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
   * hope that some of these pages can be written.  But if the allocating task
   * holds filesystem locks which prevent writeout this might not work, and the
   * allocation attempt will fail.
+ *
+ * returns:    0, if no pages reclaimed
+ *             else, the number of pages reclaimed
   */
-static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
-                                         struct scan_control *sc)
+static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
+                                       struct scan_control *sc)
  {
         int priority;
-       int ret = 0;
+       unsigned long ret = 0;
         unsigned long total_scanned = 0;
         unsigned long nr_reclaimed = 0;
         struct reclaim_state *reclaim_state = current->reclaim_state;
         unsigned long lru_pages = 0;
-       int i;
+       struct zoneref *z;
+       struct zone *zone;
+       enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
+
+       delayacct_freepages_start();
  
         if (scan_global_lru(sc))
                 count_vm_event(ALLOCSTALL);
@@ -1330,8 +1359,7 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
          * mem_cgroup will not do shrink_slab.
          */
         if (scan_global_lru(sc)) {
-               for (i = 0; zones[i] != NULL; i++) {
-                       struct zone *zone = zones[i];
+               for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
  
                         if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                 continue;
@@ -1343,16 +1371,15 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
  
         for (priority = DEF_PRIORITY; priority >= 0; priority--) {
                 sc->nr_scanned = 0;
-               sc->nr_io_pages = 0;
                 if (!priority)
                         disable_swap_token();
-               nr_reclaimed += shrink_zones(priority, zones, sc);
+               nr_reclaimed += shrink_zones(priority, zonelist, sc);
                 /*
                  * Don't shrink slabs when reclaiming memory from
                  * over limit cgroups
                  */
                 if (scan_global_lru(sc)) {
-                       shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
+                       shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
                         if (reclaim_state) {
                                 nr_reclaimed += reclaim_state->reclaimed_slab;
                                 reclaim_state->reclaimed_slab = 0;
@@ -1360,7 +1387,7 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
                 }
                 total_scanned += sc->nr_scanned;
                 if (nr_reclaimed >= sc->swap_cluster_max) {
-                       ret = 1;
+                       ret = nr_reclaimed;
                         goto out;
                 }
  
@@ -1378,13 +1405,12 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
                 }
  
                 /* Take a nap, wait for some writeback to complete */
-               if (sc->nr_scanned && priority < DEF_PRIORITY - 2 &&
-                               sc->nr_io_pages > sc->swap_cluster_max)
+               if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
                         congestion_wait(WRITE, HZ/10);
         }
-       /* top priority shrink_caches still had more to do? don't OOM, then */
+       /* top priority shrink_zones still had more to do? don't OOM, then */
         if (!sc->all_unreclaimable && scan_global_lru(sc))
-               ret = 1;
+               ret = nr_reclaimed;
  out:
         /*
          * Now that we've scanned all the zones at this priority level, note
@@ -1397,8 +1423,7 @@ out:
                 priority = 0;
  
         if (scan_global_lru(sc)) {
-               for (i = 0; zones[i] != NULL; i++) {
-                       struct zone *zone = zones[i];
+               for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
  
                         if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                 continue;
@@ -1408,10 +1433,13 @@ out:
         } else
                 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
  
+       delayacct_freepages_end();
+
         return ret;
  }
  
-unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
+unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
+                                                               gfp_t gfp_mask)
  {
         struct scan_control sc = {
                 .gfp_mask = gfp_mask,
@@ -1424,7 +1452,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
                 .isolate_pages = isolate_pages_global,
         };
  
-       return do_try_to_free_pages(zones, gfp_mask, &sc);
+       return do_try_to_free_pages(zonelist, &sc);
  }
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
@@ -1433,7 +1461,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                                 gfp_t gfp_mask)
  {
         struct scan_control sc = {
-               .gfp_mask = gfp_mask,
                 .may_writepage = !laptop_mode,
                 .may_swap = 1,
                 .swap_cluster_max = SWAP_CLUSTER_MAX,
@@ -1442,13 +1469,12 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                 .mem_cgroup = mem_cont,
                 .isolate_pages = mem_cgroup_isolate_pages,
         };
-       struct zone **zones;
-       int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
+       struct zonelist *zonelist;
  
-       zones = NODE_DATA(numa_node_id())->node_zonelists[target_zone].zones;
-       if (do_try_to_free_pages(zones, sc.gfp_mask, &sc))
-               return 1;
-       return 0;
+       sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+                       (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+       zonelist = NODE_DATA(numa_node_id())->node_zonelists;
+       return do_try_to_free_pages(zonelist, &sc);
  }
  #endif
  
@@ -1513,7 +1539,6 @@ loop_again:
                 if (!priority)
                         disable_swap_token();
  
-               sc.nr_io_pages = 0;
                 all_zones_ok = 1;
  
                 /*
@@ -1606,8 +1631,7 @@ loop_again:
                  * OK, kswapd is getting into trouble.  Take a nap, then take
                  * another pass across the zones.
                  */
-               if (total_scanned && priority < DEF_PRIORITY - 2 &&
-                                       sc.nr_io_pages > sc.swap_cluster_max)
+               if (total_scanned && priority < DEF_PRIORITY - 2)
                         congestion_wait(WRITE, HZ/10);
  
                 /*
@@ -1663,11 +1687,10 @@ static int kswapd(void *p)
         struct reclaim_state reclaim_state = {
                 .reclaimed_slab = 0,
         };
-       cpumask_t cpumask;
+       node_to_cpumask_ptr(cpumask, pgdat->node_id);
  
-       cpumask = node_to_cpumask(pgdat->node_id);
-       if (!cpus_empty(cpumask))
-               set_cpus_allowed(tsk, cpumask);
+       if (!cpus_empty(*cpumask))
+               set_cpus_allowed_ptr(tsk, cpumask);
         current->reclaim_state = &reclaim_state;
  
         /*
@@ -1896,17 +1919,16 @@ out:
  static int __devinit cpu_callback(struct notifier_block *nfb,
                                   unsigned long action, void *hcpu)
  {
-       pg_data_t *pgdat;
-       cpumask_t mask;
         int nid;
  
         if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
                 for_each_node_state(nid, N_HIGH_MEMORY) {
-                       pgdat = NODE_DATA(nid);
-                       mask = node_to_cpumask(pgdat->node_id);
-                       if (any_online_cpu(mask) != NR_CPUS)
+                       pg_data_t *pgdat = NODE_DATA(nid);
+                       node_to_cpumask_ptr(mask, pgdat->node_id);
+
+                       if (any_online_cpu(*mask) < nr_cpu_ids)
                                 /* One of our CPUs online: restore mask */
-                               set_cpus_allowed(pgdat->kswapd, mask);
+                               set_cpus_allowed_ptr(pgdat->kswapd, mask);
                 }
         }
         return NOTIFY_OK;
@@ -1957,7 +1979,7 @@ module_init(kswapd_init)
  int zone_reclaim_mode __read_mostly;
  
  #define RECLAIM_OFF 0
-#define RECLAIM_ZONE (1<<0)    /* Run shrink_cache on the zone */
+#define RECLAIM_ZONE (1<<0)    /* Run shrink_inactive_list on the zone */
  #define RECLAIM_WRITE (1<<1)   /* Writeout pages during reclaim */
  #define RECLAIM_SWAP (1<<2)    /* Swap pages out during reclaim */