X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=777af57fd8c8c80971d7abaf4edb974e360288c9;hb=0bb38a5cdeb39f543657ec6fb9950343d2de6918;hp=ece2ecb081025dd5f1ebf74ed24c0ce70bd5a4bf;hpb=de2e7567c7ddf24f0ca80010163ed10da66a14e2;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/vmscan.c b/mm/vmscan.c index ece2ecb..777af57 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,8 +148,8 @@ static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone, return &zone->reclaim_stat; } -static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc, - enum lru_list lru) +static unsigned long zone_nr_lru_pages(struct zone *zone, + struct scan_control *sc, enum lru_list lru) { if (!scanning_global_lru(sc)) return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru); @@ -286,7 +286,12 @@ static inline int page_mapping_inuse(struct page *page) static inline int is_page_cache_freeable(struct page *page) { - return page_count(page) - !!page_has_private(page) == 2; + /* + * A freeable page cache page is referenced only by the caller + * that isolated the page, the page cache radix tree and + * optional buffer heads at page->private. + */ + return page_count(page) - page_has_private(page) == 2; } static int may_write_to_queue(struct backing_dev_info *bdi) @@ -361,7 +366,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, * block, for some throttling. This happens by accident, because * swap_backing_dev_info is bust: it doesn't reflect the * congestion state of the swapdevs. Easy to fix, if needed. - * See swapfile.c:page_queue_congested(). */ if (!is_page_cache_freeable(page)) return PAGE_KEEP; @@ -531,7 +535,7 @@ redo: * unevictable page on [in]active list. * We know how to handle that. */ - lru = active + page_is_file_cache(page); + lru = active + page_lru_base_type(page); lru_cache_add_lru(page, lru); } else { /* @@ -540,6 +544,16 @@ redo: */ lru = LRU_UNEVICTABLE; add_page_to_unevictable_list(page); + /* + * When racing with an mlock clearing (page is + * unlocked), make sure that if the other thread does + * not observe our setting of PG_lru and fails + * isolation, we see PG_mlocked cleared below and move + * the page back to the evictable list. + * + * The other side is TestClearPageMlocked(). + */ + smp_mb(); } /* @@ -659,7 +673,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * processes. Try to unmap it here. */ if (page_mapped(page) && mapping) { - switch (try_to_unmap(page, 0)) { + switch (try_to_unmap(page, TTU_UNMAP)) { case SWAP_FAIL: goto activate_locked; case SWAP_AGAIN: @@ -821,7 +835,7 @@ int __isolate_lru_page(struct page *page, int mode, int file) if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) return ret; - if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) + if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file) return ret; /* @@ -971,7 +985,7 @@ static unsigned long isolate_pages_global(unsigned long nr, if (file) lru += LRU_FILE; return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order, - mode, !!file); + mode, file); } /* @@ -986,7 +1000,7 @@ static unsigned long clear_active_flags(struct list_head *page_list, struct page *page; list_for_each_entry(page, page_list, lru) { - lru = page_is_file_cache(page); + lru = page_lru_base_type(page); if (PageActive(page)) { lru += LRU_ACTIVE; ClearPageActive(page); @@ -1084,7 +1098,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, int lumpy_reclaim = 0; while (unlikely(too_many_isolated(zone, file, sc))) { - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) @@ -1208,8 +1222,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, SetPageLRU(page); lru = page_lru(page); add_page_to_lru_list(zone, page, lru); - if (PageActive(page)) { - int file = !!page_is_file_cache(page); + if (is_active_lru(lru)) { + int file = is_file_lru(lru); reclaim_stat->recent_rotated[file]++; } if (!pagevec_add(&pvec, page)) { @@ -1273,15 +1287,10 @@ static void move_active_pages_to_lru(struct zone *zone, while (!list_empty(list)) { page = lru_to_page(list); - prefetchw_prev_lru_page(page, list, flags); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - VM_BUG_ON(!PageActive(page)); - if (!is_active_lru(lru)) - ClearPageActive(page); /* we are de-activating */ - list_move(&page->lru, &zone->lru[lru].list); mem_cgroup_add_lru_list(page, lru); pgmoved++; @@ -1324,7 +1333,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, if (scanning_global_lru(sc)) { zone->pages_scanned += pgscanned; } - reclaim_stat->recent_scanned[!!file] += nr_taken; + reclaim_stat->recent_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, pgscanned); if (file) @@ -1357,12 +1366,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * IO, plus JVM can create lots of anon VM_EXEC pages, * so we ignore them here. */ - if ((vm_flags & VM_EXEC) && !PageAnon(page)) { + if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) { list_add(&page->lru, &l_active); continue; } } + ClearPageActive(page); /* we are de-activating */ list_add(&page->lru, &l_inactive); } @@ -1376,7 +1386,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * helps balance scan pressure between file and anonymous pages in * get_scan_ratio. */ - reclaim_stat->recent_rotated[!!file] += nr_rotated; + reclaim_stat->recent_rotated[file] += nr_rotated; move_active_pages_to_lru(zone, &l_active, LRU_ACTIVE + file * LRU_FILE); @@ -1488,10 +1498,10 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, unsigned long ap, fp; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); - anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + - zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); - file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + - zone_nr_pages(zone, sc, LRU_INACTIVE_FILE); + anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); + file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); if (scanning_global_lru(sc)) { free = zone_page_state(zone, NR_FREE_PAGES); @@ -1585,6 +1595,7 @@ static void shrink_zone(int priority, struct zone *zone, enum lru_list l; unsigned long nr_reclaimed = sc->nr_reclaimed; unsigned long swap_cluster_max = sc->swap_cluster_max; + struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); int noswap = 0; /* If we have no swap space, do not bother scanning anon pages. */ @@ -1599,17 +1610,14 @@ static void shrink_zone(int priority, struct zone *zone, int file = is_file_lru(l); unsigned long scan; - scan = zone_nr_pages(zone, sc, l); + scan = zone_nr_lru_pages(zone, sc, l); if (priority || noswap) { scan >>= priority; scan = (scan * percent[file]) / 100; } - if (scanning_global_lru(sc)) - nr[l] = nr_scan_try_batch(scan, - &zone->lru[l].nr_saved_scan, - swap_cluster_max); - else - nr[l] = scan; + nr[l] = nr_scan_try_batch(scan, + &reclaim_stat->nr_saved_scan[l], + swap_cluster_max); } while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -1711,10 +1719,10 @@ static void shrink_zones(int priority, struct zonelist *zonelist, * * If the caller is !__GFP_FS then the probability of a failure is reasonably * high - the zone may be full of dirty or under-writeback pages, which this - * caller can't do much about. We kick pdflush and take explicit naps in the - * hope that some of these pages can be written. But if the allocating task - * holds filesystem locks which prevent writeout this might not work, and the - * allocation attempt will fail. + * caller can't do much about. We kick the writeback threads and take explicit + * naps in the hope that some of these pages can be written. But if the + * allocating task holds filesystem locks which prevent writeout this might not + * work, and the allocation attempt will fail. * * returns: 0, if no pages reclaimed * else, the number of pages reclaimed @@ -1838,11 +1846,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR +unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + unsigned int swappiness, + struct zone *zone, int nid) +{ + struct scan_control sc = { + .may_writepage = !laptop_mode, + .may_unmap = 1, + .may_swap = !noswap, + .swap_cluster_max = SWAP_CLUSTER_MAX, + .swappiness = swappiness, + .order = 0, + .mem_cgroup = mem, + .isolate_pages = mem_cgroup_isolate_pages, + }; + nodemask_t nm = nodemask_of_node(nid); + + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); + sc.nodemask = &nm; + sc.nr_reclaimed = 0; + sc.nr_scanned = 0; + /* + * NOTE: Although we can get the priority field, using it + * here is not a good idea, since it limits the pages we can scan. + * if we don't reclaim here, the shrink_zone from balance_pgdat + * will pick up pages from other mem cgroup's as well. We hack + * the priority and make it zero. + */ + shrink_zone(0, zone, &sc); + return sc.nr_reclaimed; +} + unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask, bool noswap, unsigned int swappiness) { + struct zonelist *zonelist; struct scan_control sc = { .may_writepage = !laptop_mode, .may_unmap = 1, @@ -1854,7 +1896,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .isolate_pages = mem_cgroup_isolate_pages, .nodemask = NULL, /* we don't care the placement */ }; - struct zonelist *zonelist; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -1976,6 +2017,7 @@ loop_again: for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; int nr_slab; + int nid, zid; if (!populated_zone(zone)) continue; @@ -1990,6 +2032,15 @@ loop_again: temp_priority[i] = priority; sc.nr_scanned = 0; note_zone_scanning_priority(zone, priority); + + nid = pgdat->node_id; + zid = zone_idx(zone); + /* + * Call soft limit reclaim before calling shrink_zone. + * For now we ignore the return value + */ + mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask, + nid, zid); /* * We put equal pressure on every zone, unless one * zone has way too many pages free already. @@ -2219,6 +2270,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, { struct zone *zone; unsigned long nr_reclaimed = 0; + struct zone_reclaim_stat *reclaim_stat; for_each_populated_zone(zone) { enum lru_list l; @@ -2235,11 +2287,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, l == LRU_ACTIVE_FILE)) continue; - zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; - if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { + reclaim_stat = get_reclaim_stat(zone, sc); + reclaim_stat->nr_saved_scan[l] += + (lru_pages >> prio) + 1; + if (reclaim_stat->nr_saved_scan[l] + >= nr_pages || pass > 3) { unsigned long nr_to_scan; - zone->lru[l].nr_saved_scan = 0; + reclaim_stat->nr_saved_scan[l] = 0; nr_to_scan = min(nr_pages, lru_pages); nr_reclaimed += shrink_list(l, nr_to_scan, zone, sc, prio); @@ -2656,7 +2711,7 @@ static void check_move_unevictable_page(struct page *page, struct zone *zone) retry: ClearPageUnevictable(page); if (page_evictable(page, NULL)) { - enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); + enum lru_list l = page_lru_base_type(page); __dec_zone_state(zone, NR_UNEVICTABLE); list_move(&page->lru, &zone->lru[l].list); @@ -2799,10 +2854,10 @@ static void scan_all_zones_unevictable_pages(void) unsigned long scan_unevictable_pages; int scan_unevictable_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write && *(unsigned long *)table->data) scan_all_zones_unevictable_pages();