nfsd: nfsd should drop CAP_MKNOD for non-root
[safe/jmp/linux-2.6] / mm / vmscan.c
index 03ca923..56ddf41 100644 (file)
@@ -125,17 +125,30 @@ static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-#define scan_global_lru(sc)    (!(sc)->mem_cgroup)
+#define scanning_global_lru(sc)        (!(sc)->mem_cgroup)
 #else
-#define scan_global_lru(sc)    (1)
+#define scanning_global_lru(sc)        (1)
 #endif
 
 static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
                                                  struct scan_control *sc)
 {
+       if (!scanning_global_lru(sc))
+               return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
+
        return &zone->reclaim_stat;
 }
 
+static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc,
+                                  enum lru_list lru)
+{
+       if (!scanning_global_lru(sc))
+               return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru);
+
+       return zone_page_state(zone, NR_LRU_BASE + lru);
+}
+
+
 /*
  * Add a shrinker callback to be called from the vm
  */
@@ -1077,17 +1090,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                __mod_zone_page_state(zone, NR_INACTIVE_ANON,
                                                -count[LRU_INACTIVE_ANON]);
 
-               if (scan_global_lru(sc)) {
+               if (scanning_global_lru(sc))
                        zone->pages_scanned += nr_scan;
-                       reclaim_stat->recent_scanned[0] +=
-                                                     count[LRU_INACTIVE_ANON];
-                       reclaim_stat->recent_scanned[0] +=
-                                                     count[LRU_ACTIVE_ANON];
-                       reclaim_stat->recent_scanned[1] +=
-                                                     count[LRU_INACTIVE_FILE];
-                       reclaim_stat->recent_scanned[1] +=
-                                                     count[LRU_ACTIVE_FILE];
-               }
+
+               reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
+               reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
+               reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE];
+               reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE];
+
                spin_unlock_irq(&zone->lru_lock);
 
                nr_scanned += nr_scan;
@@ -1119,7 +1129,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                if (current_is_kswapd()) {
                        __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
                        __count_vm_events(KSWAPD_STEAL, nr_freed);
-               } else if (scan_global_lru(sc))
+               } else if (scanning_global_lru(sc))
                        __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
 
                __count_zone_vm_events(PGSTEAL, zone, nr_freed);
@@ -1145,7 +1155,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                        SetPageLRU(page);
                        lru = page_lru(page);
                        add_page_to_lru_list(zone, page, lru);
-                       if (PageActive(page) && scan_global_lru(sc)) {
+                       if (PageActive(page)) {
                                int file = !!page_is_file_cache(page);
                                reclaim_stat->recent_rotated[file]++;
                        }
@@ -1218,10 +1228,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
         * zone->pages_scanned is used for detect zone's oom
         * mem_cgroup remembers nr_scan by itself.
         */
-       if (scan_global_lru(sc)) {
+       if (scanning_global_lru(sc)) {
                zone->pages_scanned += pgscanned;
-               reclaim_stat->recent_scanned[!!file] += pgmoved;
        }
+       reclaim_stat->recent_scanned[!!file] += pgmoved;
 
        if (file)
                __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
@@ -1252,7 +1262,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
         * Move the pages to the [file or anon] inactive list.
         */
        pagevec_init(&pvec, 1);
-       pgmoved = 0;
        lru = LRU_BASE + file * LRU_FILE;
 
        spin_lock_irq(&zone->lru_lock);
@@ -1262,9 +1271,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
         * This helps balance scan pressure between file and anonymous
         * pages in get_scan_ratio.
         */
-       if (scan_global_lru(sc))
-               reclaim_stat->recent_rotated[!!file] += pgmoved;
+       reclaim_stat->recent_rotated[!!file] += pgmoved;
 
+       pgmoved = 0;
        while (!list_empty(&l_inactive)) {
                page = lru_to_page(&l_inactive);
                prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1303,14 +1312,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
        pagevec_release(&pvec);
 }
 
-/**
- * inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @zone: zone to check
- *
- * Returns true if the zone does not have enough inactive anon pages,
- * meaning some active anon pages need to be deactivated.
- */
-static int inactive_anon_is_low(struct zone *zone)
+static int inactive_anon_is_low_global(struct zone *zone)
 {
        unsigned long active, inactive;
 
@@ -1323,6 +1325,25 @@ static int inactive_anon_is_low(struct zone *zone)
        return 0;
 }
 
+/**
+ * inactive_anon_is_low - check if anonymous pages need to be deactivated
+ * @zone: zone to check
+ * @sc:   scan control of this context
+ *
+ * Returns true if the zone does not have enough inactive anon pages,
+ * meaning some active anon pages need to be deactivated.
+ */
+static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
+{
+       int low;
+
+       if (scanning_global_lru(sc))
+               low = inactive_anon_is_low_global(zone);
+       else
+               low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup);
+       return low;
+}
+
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
        struct zone *zone, struct scan_control *sc, int priority)
 {
@@ -1333,8 +1354,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
                return 0;
        }
 
-       if (lru == LRU_ACTIVE_ANON &&
-           (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
+       if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) {
                shrink_active_list(nr_to_scan, zone, sc, priority, file);
                return 0;
        }
@@ -1365,17 +1385,20 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
                return;
        }
 
-       anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
-               zone_page_state(zone, NR_INACTIVE_ANON);
-       file  = zone_page_state(zone, NR_ACTIVE_FILE) +
-               zone_page_state(zone, NR_INACTIVE_FILE);
-       free  = zone_page_state(zone, NR_FREE_PAGES);
-
-       /* If we have very few page cache pages, force-scan anon pages. */
-       if (unlikely(file + free <= zone->pages_high)) {
-               percent[0] = 100;
-               percent[1] = 0;
-               return;
+       anon  = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) +
+               zone_nr_pages(zone, sc, LRU_INACTIVE_ANON);
+       file  = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) +
+               zone_nr_pages(zone, sc, LRU_INACTIVE_FILE);
+
+       if (scanning_global_lru(sc)) {
+               free  = zone_page_state(zone, NR_FREE_PAGES);
+               /* If we have very few page cache pages,
+                  force-scan anon pages. */
+               if (unlikely(file + free <= zone->pages_high)) {
+                       percent[0] = 100;
+                       percent[1] = 0;
+                       return;
+               }
        }
 
        /*
@@ -1443,30 +1466,23 @@ static void shrink_zone(int priority, struct zone *zone,
        get_scan_ratio(zone, sc, percent);
 
        for_each_evictable_lru(l) {
-               if (scan_global_lru(sc)) {
-                       int file = is_file_lru(l);
-                       int scan;
-
-                       scan = zone_page_state(zone, NR_LRU_BASE + l);
-                       if (priority) {
-                               scan >>= priority;
-                               scan = (scan * percent[file]) / 100;
-                       }
+               int file = is_file_lru(l);
+               int scan;
+
+               scan = zone_nr_pages(zone, sc, l);
+               if (priority) {
+                       scan >>= priority;
+                       scan = (scan * percent[file]) / 100;
+               }
+               if (scanning_global_lru(sc)) {
                        zone->lru[l].nr_scan += scan;
                        nr[l] = zone->lru[l].nr_scan;
                        if (nr[l] >= swap_cluster_max)
                                zone->lru[l].nr_scan = 0;
                        else
                                nr[l] = 0;
-               } else {
-                       /*
-                        * This reclaim occurs not because zone memory shortage
-                        * but because memory controller hits its limit.
-                        * Don't modify zone reclaim related data.
-                        */
-                       nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone,
-                                                               priority, l);
-               }
+               } else
+                       nr[l] = scan;
        }
 
        while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -1499,9 +1515,7 @@ static void shrink_zone(int priority, struct zone *zone,
         * Even if we did not try to evict anon pages at all, we want to
         * rebalance the anon lru active/inactive ratio.
         */
-       if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
-               shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
-       else if (!scan_global_lru(sc))
+       if (inactive_anon_is_low(zone, sc))
                shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
 
        throttle_vm_writeout(sc->gfp_mask);
@@ -1536,7 +1550,7 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
                 * Take care memory controller reclaiming has small influence
                 * to global LRU.
                 */
-               if (scan_global_lru(sc)) {
+               if (scanning_global_lru(sc)) {
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                continue;
                        note_zone_scanning_priority(zone, priority);
@@ -1589,12 +1603,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 
        delayacct_freepages_start();
 
-       if (scan_global_lru(sc))
+       if (scanning_global_lru(sc))
                count_vm_event(ALLOCSTALL);
        /*
         * mem_cgroup will not do shrink_slab.
         */
-       if (scan_global_lru(sc)) {
+       if (scanning_global_lru(sc)) {
                for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1613,7 +1627,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 * Don't shrink slabs when reclaiming memory from
                 * over limit cgroups
                 */
-               if (scan_global_lru(sc)) {
+               if (scanning_global_lru(sc)) {
                        shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
                        if (reclaim_state) {
                                sc->nr_reclaimed += reclaim_state->reclaimed_slab;
@@ -1644,7 +1658,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                        congestion_wait(WRITE, HZ/10);
        }
        /* top priority shrink_zones still had more to do? don't OOM, then */
-       if (!sc->all_unreclaimable && scan_global_lru(sc))
+       if (!sc->all_unreclaimable && scanning_global_lru(sc))
                ret = sc->nr_reclaimed;
 out:
        /*
@@ -1657,7 +1671,7 @@ out:
        if (priority < 0)
                priority = 0;
 
-       if (scan_global_lru(sc)) {
+       if (scanning_global_lru(sc)) {
                for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1693,14 +1707,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
-                                               gfp_t gfp_mask,
-                                          bool noswap)
+                                          gfp_t gfp_mask,
+                                          bool noswap,
+                                          unsigned int swappiness)
 {
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
                .may_swap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
-               .swappiness = vm_swappiness,
+               .swappiness = swappiness,
                .order = 0,
                .mem_cgroup = mem_cont,
                .isolate_pages = mem_cgroup_isolate_pages,
@@ -1797,7 +1812,7 @@ loop_again:
                         * Do some background aging of the anon list, to give
                         * pages a chance to be referenced before reclaiming.
                         */
-                       if (inactive_anon_is_low(zone))
+                       if (inactive_anon_is_low(zone, &sc))
                                shrink_active_list(SWAP_CLUSTER_MAX, zone,
                                                        &sc, priority, 0);
 
@@ -2042,31 +2057,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
                                      int pass, struct scan_control *sc)
 {
        struct zone *zone;
-       unsigned long nr_to_scan, ret = 0;
-       enum lru_list l;
+       unsigned long ret = 0;
 
        for_each_zone(zone) {
+               enum lru_list l;
 
                if (!populated_zone(zone))
                        continue;
-
                if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
                        continue;
 
                for_each_evictable_lru(l) {
+                       enum zone_stat_item ls = NR_LRU_BASE + l;
+                       unsigned long lru_pages = zone_page_state(zone, ls);
+
                        /* For pass = 0, we don't shrink the active list */
-                       if (pass == 0 &&
-                               (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE))
+                       if (pass == 0 && (l == LRU_ACTIVE_ANON ||
+                                               l == LRU_ACTIVE_FILE))
                                continue;
 
-                       zone->lru[l].nr_scan +=
-                               (zone_page_state(zone, NR_LRU_BASE + l)
-                                                               >> prio) + 1;
+                       zone->lru[l].nr_scan += (lru_pages >> prio) + 1;
                        if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
+                               unsigned long nr_to_scan;
+
                                zone->lru[l].nr_scan = 0;
-                               nr_to_scan = min(nr_pages,
-                                       zone_page_state(zone,
-                                                       NR_LRU_BASE + l));
+                               nr_to_scan = min(nr_pages, lru_pages);
                                ret += shrink_list(l, nr_to_scan, zone,
                                                                sc, prio);
                                if (ret >= nr_pages)
@@ -2074,7 +2089,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
                        }
                }
        }
-
        return ret;
 }
 
@@ -2097,7 +2111,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
                .may_swap = 0,
                .swap_cluster_max = nr_pages,
                .may_writepage = 1,
-               .swappiness = vm_swappiness,
                .isolate_pages = isolate_pages_global,
        };
 
@@ -2131,10 +2144,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
                int prio;
 
                /* Force reclaiming mapped pages in the passes #3 and #4 */
-               if (pass > 2) {
+               if (pass > 2)
                        sc.may_swap = 1;
-                       sc.swappiness = 100;
-               }
 
                for (prio = DEF_PRIORITY; prio >= 0; prio--) {
                        unsigned long nr_to_scan = nr_pages - ret;