X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fswap.c;h=6e83084c1f6c3bf2c1a9fa36fb5649ea181e363c;hb=03fb3d2af96c2783c3a5bc03f3d984cf422f0e69;hp=218c52a24a216831a347736cde51045563854770;hpb=d85f33855c303acfa87fa457157cef755b6087df;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/swap.c b/mm/swap.c index 218c52a..6e83084 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -5,7 +5,7 @@ */ /* - * This file contains the default values for the opereation of the + * This file contains the default values for the operation of the * Linux VM subsystem. Fine-tuning documentation can be found in * Documentation/sysctl/vm.txt. * Started 18.12.91 @@ -24,21 +24,26 @@ #include #include #include /* for try_to_release_page() */ -#include #include #include #include #include -#include +#include +#include + +#include "internal.h" /* How many pages do we try to swap or page in/out together? */ int page_cluster; +static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); +static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); + /* * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. */ -static void fastcall __page_cache_release(struct page *page) +static void __page_cache_release(struct page *page) { if (PageLRU(page)) { unsigned long flags; @@ -74,12 +79,11 @@ void put_page(struct page *page) EXPORT_SYMBOL(put_page); /** - * put_pages_list(): release a list of pages + * put_pages_list() - release a list of pages + * @pages: list of pages threaded on page->lru * * Release a list of pages which are strung together on page.lru. Currently * used by read_cache_pages() and related error recovery code. - * - * @pages: list of pages threaded on page->lru */ void put_pages_list(struct list_head *pages) { @@ -94,59 +98,98 @@ void put_pages_list(struct list_head *pages) EXPORT_SYMBOL(put_pages_list); /* + * pagevec_move_tail() must be called with IRQ disabled. + * Otherwise this may cause nasty races. + */ +static void pagevec_move_tail(struct pagevec *pvec) +{ + int i; + int pgmoved = 0; + struct zone *zone = NULL; + + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock(&zone->lru_lock); + zone = pagezone; + spin_lock(&zone->lru_lock); + } + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + int lru = page_is_file_cache(page); + list_move_tail(&page->lru, &zone->lru[lru].list); + pgmoved++; + } + } + if (zone) + spin_unlock(&zone->lru_lock); + __count_vm_events(PGROTATED, pgmoved); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); +} + +/* * Writeback is about to end against a page which has been marked for immediate * reclaim. If it still appears to be reclaimable, move it to the tail of the - * inactive list. The page still has PageWriteback set, which will pin it. - * - * We don't expect many pages to come through here, so don't bother batching - * things up. - * - * To avoid placing the page at the tail of the LRU while PG_writeback is still - * set, this function will clear PG_writeback before performing the page - * motion. Do that inside the lru lock because once PG_writeback is cleared - * we may not touch the page. - * - * Returns zero if it cleared PG_writeback. + * inactive list. */ -int rotate_reclaimable_page(struct page *page) +void rotate_reclaimable_page(struct page *page) { - struct zone *zone; - unsigned long flags; - - if (PageLocked(page)) - return 1; - if (PageDirty(page)) - return 1; - if (PageActive(page)) - return 1; - if (!PageLRU(page)) - return 1; - - zone = page_zone(page); - spin_lock_irqsave(&zone->lru_lock, flags); - if (PageLRU(page) && !PageActive(page)) { - list_move_tail(&page->lru, &zone->inactive_list); - __count_vm_event(PGROTATED); + if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && + !PageUnevictable(page) && PageLRU(page)) { + struct pagevec *pvec; + unsigned long flags; + + page_cache_get(page); + local_irq_save(flags); + pvec = &__get_cpu_var(lru_rotate_pvecs); + if (!pagevec_add(pvec, page)) + pagevec_move_tail(pvec); + local_irq_restore(flags); } - if (!test_clear_page_writeback(page)) - BUG(); - spin_unlock_irqrestore(&zone->lru_lock, flags); - return 0; +} + +static void update_page_reclaim_stat(struct zone *zone, struct page *page, + int file, int rotated) +{ + struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; + struct zone_reclaim_stat *memcg_reclaim_stat; + + memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); + + reclaim_stat->recent_scanned[file]++; + if (rotated) + reclaim_stat->recent_rotated[file]++; + + if (!memcg_reclaim_stat) + return; + + memcg_reclaim_stat->recent_scanned[file]++; + if (rotated) + memcg_reclaim_stat->recent_rotated[file]++; } /* * FIXME: speed this up? */ -void fastcall activate_page(struct page *page) +void activate_page(struct page *page) { struct zone *zone = page_zone(page); spin_lock_irq(&zone->lru_lock); - if (PageLRU(page) && !PageActive(page)) { - del_page_from_inactive_list(zone, page); + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + int file = page_is_file_cache(page); + int lru = LRU_BASE + file; + del_page_from_lru_list(zone, page, lru); + SetPageActive(page); - add_page_to_active_list(zone, page); + lru += LRU_ACTIVE; + add_page_to_lru_list(zone, page, lru); __count_vm_event(PGACTIVATE); + + update_page_reclaim_stat(zone, page, !!file, 1); } spin_unlock_irq(&zone->lru_lock); } @@ -158,9 +201,10 @@ void fastcall activate_page(struct page *page) * inactive,referenced -> active,unreferenced * active,unreferenced -> active,referenced */ -void fastcall mark_page_accessed(struct page *page) +void mark_page_accessed(struct page *page) { - if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { + if (!PageActive(page) && !PageUnevictable(page) && + PageReferenced(page) && PageLRU(page)) { activate_page(page); ClearPageReferenced(page); } else if (!PageReferenced(page)) { @@ -170,52 +214,90 @@ void fastcall mark_page_accessed(struct page *page) EXPORT_SYMBOL(mark_page_accessed); -/** - * lru_cache_add: add a page to the page lists - * @page: the page to add - */ -static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, }; -static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, }; - -void fastcall lru_cache_add(struct page *page) +void __lru_cache_add(struct page *page, enum lru_list lru) { - struct pagevec *pvec = &get_cpu_var(lru_add_pvecs); + struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; page_cache_get(page); if (!pagevec_add(pvec, page)) - __pagevec_lru_add(pvec); + ____pagevec_lru_add(pvec, lru); put_cpu_var(lru_add_pvecs); } -void fastcall lru_cache_add_active(struct page *page) +/** + * lru_cache_add_lru - add a page to a page list + * @page: the page to be added to the LRU. + * @lru: the LRU list to which the page is added. + */ +void lru_cache_add_lru(struct page *page, enum lru_list lru) +{ + if (PageActive(page)) { + VM_BUG_ON(PageUnevictable(page)); + ClearPageActive(page); + } else if (PageUnevictable(page)) { + VM_BUG_ON(PageActive(page)); + ClearPageUnevictable(page); + } + + VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); + __lru_cache_add(page, lru); +} + +/** + * add_page_to_unevictable_list - add a page to the unevictable list + * @page: the page to be added to the unevictable list + * + * Add page directly to its zone's unevictable list. To avoid races with + * tasks that might be making the page evictable, through eg. munlock, + * munmap or exit, while it's not on the lru, we want to add the page + * while it's locked or otherwise "invisible" to other tasks. This is + * difficult to do when using the pagevec cache, so bypass that. + */ +void add_page_to_unevictable_list(struct page *page) { - struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs); + struct zone *zone = page_zone(page); - page_cache_get(page); - if (!pagevec_add(pvec, page)) - __pagevec_lru_add_active(pvec); - put_cpu_var(lru_add_active_pvecs); + spin_lock_irq(&zone->lru_lock); + SetPageUnevictable(page); + SetPageLRU(page); + add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); + spin_unlock_irq(&zone->lru_lock); } -static void __lru_add_drain(int cpu) +/* + * Drain pages out of the cpu's pagevecs. + * Either "cpu" is the current CPU, and preemption has already been + * disabled; or "cpu" is being hot-unplugged, and is already dead. + */ +static void drain_cpu_pagevecs(int cpu) { - struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu); - - /* CPU is dead, so no locking needed. */ - if (pagevec_count(pvec)) - __pagevec_lru_add(pvec); - pvec = &per_cpu(lru_add_active_pvecs, cpu); - if (pagevec_count(pvec)) - __pagevec_lru_add_active(pvec); + struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); + struct pagevec *pvec; + int lru; + + for_each_lru(lru) { + pvec = &pvecs[lru - LRU_BASE]; + if (pagevec_count(pvec)) + ____pagevec_lru_add(pvec, lru); + } + + pvec = &per_cpu(lru_rotate_pvecs, cpu); + if (pagevec_count(pvec)) { + unsigned long flags; + + /* No harm done if a racing interrupt already did this */ + local_irq_save(flags); + pagevec_move_tail(pvec); + local_irq_restore(flags); + } } void lru_add_drain(void) { - __lru_add_drain(get_cpu()); + drain_cpu_pagevecs(get_cpu()); put_cpu(); } -#ifdef CONFIG_NUMA static void lru_add_drain_per_cpu(struct work_struct *dummy) { lru_add_drain(); @@ -229,18 +311,6 @@ int lru_add_drain_all(void) return schedule_on_each_cpu(lru_add_drain_per_cpu); } -#else - -/* - * Returns 0 for success - */ -int lru_add_drain_all(void) -{ - lru_add_drain(); - return 0; -} -#endif - /* * Batched page_cache_release(). Decrement the reference count on all the * passed pages. If it fell to zero then remove the page from the LRU and @@ -249,15 +319,17 @@ int lru_add_drain_all(void) * Avoid taking zone->lru_lock if possible, but if it is taken, retain it * for the remainder of the operation. * - * The locking in this function is against shrink_cache(): we recheck the - * page count inside the lock to see whether shrink_cache grabbed the page - * via the LRU. If it did, give up: shrink_cache will free it. + * The locking in this function is against shrink_inactive_list(): we recheck + * the page count inside the lock to see whether shrink_inactive_list() + * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() + * will free it. */ void release_pages(struct page **pages, int nr, int cold) { int i; struct pagevec pages_to_free; struct zone *zone = NULL; + unsigned long uninitialized_var(flags); pagevec_init(&pages_to_free, cold); for (i = 0; i < nr; i++) { @@ -265,7 +337,7 @@ void release_pages(struct page **pages, int nr, int cold) if (unlikely(PageCompound(page))) { if (zone) { - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irqrestore(&zone->lru_lock, flags); zone = NULL; } put_compound_page(page); @@ -277,11 +349,13 @@ void release_pages(struct page **pages, int nr, int cold) if (PageLRU(page)) { struct zone *pagezone = page_zone(page); + if (pagezone != zone) { if (zone) - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irqrestore(&zone->lru_lock, + flags); zone = pagezone; - spin_lock_irq(&zone->lru_lock); + spin_lock_irqsave(&zone->lru_lock, flags); } VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); @@ -290,7 +364,7 @@ void release_pages(struct page **pages, int nr, int cold) if (!pagevec_add(&pages_to_free, page)) { if (zone) { - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irqrestore(&zone->lru_lock, flags); zone = NULL; } __pagevec_free(&pages_to_free); @@ -298,7 +372,7 @@ void release_pages(struct page **pages, int nr, int cold) } } if (zone) - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irqrestore(&zone->lru_lock, flags); pagevec_free(&pages_to_free); } @@ -323,66 +397,21 @@ void __pagevec_release(struct pagevec *pvec) EXPORT_SYMBOL(__pagevec_release); /* - * pagevec_release() for pages which are known to not be on the LRU - * - * This function reinitialises the caller's pagevec. - */ -void __pagevec_release_nonlru(struct pagevec *pvec) -{ - int i; - struct pagevec pages_to_free; - - pagevec_init(&pages_to_free, pvec->cold); - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - - VM_BUG_ON(PageLRU(page)); - if (put_page_testzero(page)) - pagevec_add(&pages_to_free, page); - } - pagevec_free(&pages_to_free); - pagevec_reinit(pvec); -} - -/* * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ -void __pagevec_lru_add(struct pagevec *pvec) +void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) { int i; struct zone *zone = NULL; - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - struct zone *pagezone = page_zone(page); - - if (pagezone != zone) { - if (zone) - spin_unlock_irq(&zone->lru_lock); - zone = pagezone; - spin_lock_irq(&zone->lru_lock); - } - VM_BUG_ON(PageLRU(page)); - SetPageLRU(page); - add_page_to_inactive_list(zone, page); - } - if (zone) - spin_unlock_irq(&zone->lru_lock); - release_pages(pvec->pages, pvec->nr, pvec->cold); - pagevec_reinit(pvec); -} - -EXPORT_SYMBOL(__pagevec_lru_add); - -void __pagevec_lru_add_active(struct pagevec *pvec) -{ - int i; - struct zone *zone = NULL; + VM_BUG_ON(is_unevictable_lru(lru)); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; struct zone *pagezone = page_zone(page); + int file; + int active; if (pagezone != zone) { if (zone) @@ -390,11 +419,16 @@ void __pagevec_lru_add_active(struct pagevec *pvec) zone = pagezone; spin_lock_irq(&zone->lru_lock); } + VM_BUG_ON(PageActive(page)); + VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - VM_BUG_ON(PageActive(page)); - SetPageActive(page); - add_page_to_active_list(zone, page); + active = is_active_lru(lru); + file = is_file_lru(lru); + if (active) + SetPageActive(page); + update_page_reclaim_stat(zone, page, file, active); + add_page_to_lru_list(zone, page, lru); } if (zone) spin_unlock_irq(&zone->lru_lock); @@ -402,6 +436,8 @@ void __pagevec_lru_add_active(struct pagevec *pvec) pagevec_reinit(pvec); } +EXPORT_SYMBOL(____pagevec_lru_add); + /* * Try to drop buffers from the pages in a pagevec */ @@ -412,7 +448,7 @@ void pagevec_strip(struct pagevec *pvec) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; - if (PagePrivate(page) && !TestSetPageLocked(page)) { + if (PagePrivate(page) && trylock_page(page)) { if (PagePrivate(page)) try_to_release_page(page, 0); unlock_page(page); @@ -462,7 +498,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag); */ #define ACCT_THRESHOLD max(16, NR_CPUS * 2) -static DEFINE_PER_CPU(long, committed_space) = 0; +static DEFINE_PER_CPU(long, committed_space); void vm_acct_memory(long pages) { @@ -472,7 +508,7 @@ void vm_acct_memory(long pages) local = &__get_cpu_var(committed_space); *local += pages; if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { - atomic_add(*local, &vm_committed_space); + atomic_long_add(*local, &vm_committed_space); *local = 0; } preempt_enable(); @@ -488,10 +524,10 @@ static int cpu_swap_callback(struct notifier_block *nfb, long *committed; committed = &per_cpu(committed_space, (long)hcpu); - if (action == CPU_DEAD) { - atomic_add(*committed, &vm_committed_space); + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + atomic_long_add(*committed, &vm_committed_space); *committed = 0; - __lru_add_drain((long)hcpu); + drain_cpu_pagevecs((long)hcpu); } return NOTIFY_OK; } @@ -505,6 +541,10 @@ void __init swap_setup(void) { unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); +#ifdef CONFIG_SWAP + bdi_init(swapper_space.backing_dev_info); +#endif + /* Use a smaller cluster for small-memory machines */ if (megs < 16) page_cluster = 2;