X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=mm%2Fswap.c;h=7cd60bf0a972a2f8371774135eb204b7692821cf;hp=154ae13d8b7e33bd9f5d929b2c7e4c16ab38590c;hb=70764a905785ebacc8d44fed7a12fba3db267ae6;hpb=7f28570185f98bfa83f775756ced79e9f22b6d93 diff --git a/mm/swap.c b/mm/swap.c index 154ae13..7cd60bf 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -5,7 +5,7 @@ */ /* - * This file contains the default values for the opereation of the + * This file contains the default values for the operation of the * Linux VM subsystem. Fine-tuning documentation can be found in * Documentation/sysctl/vm.txt. * Started 18.12.91 @@ -24,91 +24,173 @@ #include #include #include /* for try_to_release_page() */ -#include #include #include #include #include -#include +#include +#include +#include + +#include "internal.h" /* How many pages do we try to swap or page in/out together? */ int page_cluster; -#ifdef CONFIG_HUGETLB_PAGE +static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); +static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); -void put_page(struct page *page) +/* + * This path almost never happens for VM activity - pages are normally + * freed via pagevecs. But it gets used by networking. + */ +static void __page_cache_release(struct page *page) { - if (unlikely(PageCompound(page))) { - page = (struct page *)page_private(page); - if (put_page_testzero(page)) { - void (*dtor)(struct page *page); + if (PageLRU(page)) { + unsigned long flags; + struct zone *zone = page_zone(page); - dtor = (void (*)(struct page *))page[1].mapping; - (*dtor)(page); - } - return; + spin_lock_irqsave(&zone->lru_lock, flags); + VM_BUG_ON(!PageLRU(page)); + __ClearPageLRU(page); + del_page_from_lru(zone, page); + spin_unlock_irqrestore(&zone->lru_lock, flags); } - if (put_page_testzero(page)) + free_hot_cold_page(page, 0); +} + +static void put_compound_page(struct page *page) +{ + page = compound_head(page); + if (put_page_testzero(page)) { + compound_page_dtor *dtor; + + dtor = get_compound_page_dtor(page); + (*dtor)(page); + } +} + +void put_page(struct page *page) +{ + if (unlikely(PageCompound(page))) + put_compound_page(page); + else if (put_page_testzero(page)) __page_cache_release(page); } EXPORT_SYMBOL(put_page); -#endif + +/** + * put_pages_list() - release a list of pages + * @pages: list of pages threaded on page->lru + * + * Release a list of pages which are strung together on page.lru. Currently + * used by read_cache_pages() and related error recovery code. + */ +void put_pages_list(struct list_head *pages) +{ + while (!list_empty(pages)) { + struct page *victim; + + victim = list_entry(pages->prev, struct page, lru); + list_del(&victim->lru); + page_cache_release(victim); + } +} +EXPORT_SYMBOL(put_pages_list); + +/* + * pagevec_move_tail() must be called with IRQ disabled. + * Otherwise this may cause nasty races. + */ +static void pagevec_move_tail(struct pagevec *pvec) +{ + int i; + int pgmoved = 0; + struct zone *zone = NULL; + + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock(&zone->lru_lock); + zone = pagezone; + spin_lock(&zone->lru_lock); + } + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + int lru = page_lru_base_type(page); + list_move_tail(&page->lru, &zone->lru[lru].list); + pgmoved++; + } + } + if (zone) + spin_unlock(&zone->lru_lock); + __count_vm_events(PGROTATED, pgmoved); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); +} /* * Writeback is about to end against a page which has been marked for immediate * reclaim. If it still appears to be reclaimable, move it to the tail of the - * inactive list. The page still has PageWriteback set, which will pin it. - * - * We don't expect many pages to come through here, so don't bother batching - * things up. - * - * To avoid placing the page at the tail of the LRU while PG_writeback is still - * set, this function will clear PG_writeback before performing the page - * motion. Do that inside the lru lock because once PG_writeback is cleared - * we may not touch the page. - * - * Returns zero if it cleared PG_writeback. + * inactive list. */ -int rotate_reclaimable_page(struct page *page) +void rotate_reclaimable_page(struct page *page) { - struct zone *zone; - unsigned long flags; - - if (PageLocked(page)) - return 1; - if (PageDirty(page)) - return 1; - if (PageActive(page)) - return 1; - if (!PageLRU(page)) - return 1; - - zone = page_zone(page); - spin_lock_irqsave(&zone->lru_lock, flags); - if (PageLRU(page) && !PageActive(page)) { - list_del(&page->lru); - list_add_tail(&page->lru, &zone->inactive_list); - inc_page_state(pgrotated); + if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && + !PageUnevictable(page) && PageLRU(page)) { + struct pagevec *pvec; + unsigned long flags; + + page_cache_get(page); + local_irq_save(flags); + pvec = &__get_cpu_var(lru_rotate_pvecs); + if (!pagevec_add(pvec, page)) + pagevec_move_tail(pvec); + local_irq_restore(flags); } - if (!test_clear_page_writeback(page)) - BUG(); - spin_unlock_irqrestore(&zone->lru_lock, flags); - return 0; +} + +static void update_page_reclaim_stat(struct zone *zone, struct page *page, + int file, int rotated) +{ + struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; + struct zone_reclaim_stat *memcg_reclaim_stat; + + memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); + + reclaim_stat->recent_scanned[file]++; + if (rotated) + reclaim_stat->recent_rotated[file]++; + + if (!memcg_reclaim_stat) + return; + + memcg_reclaim_stat->recent_scanned[file]++; + if (rotated) + memcg_reclaim_stat->recent_rotated[file]++; } /* * FIXME: speed this up? */ -void fastcall activate_page(struct page *page) +void activate_page(struct page *page) { struct zone *zone = page_zone(page); spin_lock_irq(&zone->lru_lock); - if (PageLRU(page) && !PageActive(page)) { - del_page_from_inactive_list(zone, page); + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + int file = page_is_file_cache(page); + int lru = page_lru_base_type(page); + del_page_from_lru_list(zone, page, lru); + SetPageActive(page); - add_page_to_active_list(zone, page); - inc_page_state(pgactivate); + lru += LRU_ACTIVE; + add_page_to_lru_list(zone, page, lru); + __count_vm_event(PGACTIVATE); + + update_page_reclaim_stat(zone, page, file, 1); } spin_unlock_irq(&zone->lru_lock); } @@ -120,9 +202,10 @@ void fastcall activate_page(struct page *page) * inactive,referenced -> active,unreferenced * active,unreferenced -> active,referenced */ -void fastcall mark_page_accessed(struct page *page) +void mark_page_accessed(struct page *page) { - if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) { + if (!PageActive(page) && !PageUnevictable(page) && + PageReferenced(page) && PageLRU(page)) { activate_page(page); ClearPageReferenced(page); } else if (!PageReferenced(page)) { @@ -132,65 +215,102 @@ void fastcall mark_page_accessed(struct page *page) EXPORT_SYMBOL(mark_page_accessed); -/** - * lru_cache_add: add a page to the page lists - * @page: the page to add - */ -static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, }; -static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, }; - -void fastcall lru_cache_add(struct page *page) +void __lru_cache_add(struct page *page, enum lru_list lru) { - struct pagevec *pvec = &get_cpu_var(lru_add_pvecs); + struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; page_cache_get(page); if (!pagevec_add(pvec, page)) - __pagevec_lru_add(pvec); + ____pagevec_lru_add(pvec, lru); put_cpu_var(lru_add_pvecs); } -void fastcall lru_cache_add_active(struct page *page) +/** + * lru_cache_add_lru - add a page to a page list + * @page: the page to be added to the LRU. + * @lru: the LRU list to which the page is added. + */ +void lru_cache_add_lru(struct page *page, enum lru_list lru) { - struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs); + if (PageActive(page)) { + VM_BUG_ON(PageUnevictable(page)); + ClearPageActive(page); + } else if (PageUnevictable(page)) { + VM_BUG_ON(PageActive(page)); + ClearPageUnevictable(page); + } - page_cache_get(page); - if (!pagevec_add(pvec, page)) - __pagevec_lru_add_active(pvec); - put_cpu_var(lru_add_active_pvecs); + VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); + __lru_cache_add(page, lru); } -void lru_add_drain(void) +/** + * add_page_to_unevictable_list - add a page to the unevictable list + * @page: the page to be added to the unevictable list + * + * Add page directly to its zone's unevictable list. To avoid races with + * tasks that might be making the page evictable, through eg. munlock, + * munmap or exit, while it's not on the lru, we want to add the page + * while it's locked or otherwise "invisible" to other tasks. This is + * difficult to do when using the pagevec cache, so bypass that. + */ +void add_page_to_unevictable_list(struct page *page) { - struct pagevec *pvec = &get_cpu_var(lru_add_pvecs); + struct zone *zone = page_zone(page); - if (pagevec_count(pvec)) - __pagevec_lru_add(pvec); - pvec = &__get_cpu_var(lru_add_active_pvecs); - if (pagevec_count(pvec)) - __pagevec_lru_add_active(pvec); - put_cpu_var(lru_add_pvecs); + spin_lock_irq(&zone->lru_lock); + SetPageUnevictable(page); + SetPageLRU(page); + add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); + spin_unlock_irq(&zone->lru_lock); } /* - * This path almost never happens for VM activity - pages are normally - * freed via pagevecs. But it gets used by networking. + * Drain pages out of the cpu's pagevecs. + * Either "cpu" is the current CPU, and preemption has already been + * disabled; or "cpu" is being hot-unplugged, and is already dead. */ -void fastcall __page_cache_release(struct page *page) +static void drain_cpu_pagevecs(int cpu) { - unsigned long flags; - struct zone *zone = page_zone(page); + struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); + struct pagevec *pvec; + int lru; + + for_each_lru(lru) { + pvec = &pvecs[lru - LRU_BASE]; + if (pagevec_count(pvec)) + ____pagevec_lru_add(pvec, lru); + } - spin_lock_irqsave(&zone->lru_lock, flags); - if (TestClearPageLRU(page)) - del_page_from_lru(zone, page); - if (page_count(page) != 0) - page = NULL; - spin_unlock_irqrestore(&zone->lru_lock, flags); - if (page) - free_hot_page(page); + pvec = &per_cpu(lru_rotate_pvecs, cpu); + if (pagevec_count(pvec)) { + unsigned long flags; + + /* No harm done if a racing interrupt already did this */ + local_irq_save(flags); + pagevec_move_tail(pvec); + local_irq_restore(flags); + } +} + +void lru_add_drain(void) +{ + drain_cpu_pagevecs(get_cpu()); + put_cpu(); +} + +static void lru_add_drain_per_cpu(struct work_struct *dummy) +{ + lru_add_drain(); } -EXPORT_SYMBOL(__page_cache_release); +/* + * Returns 0 for success + */ +int lru_add_drain_all(void) +{ + return schedule_on_each_cpu(lru_add_drain_per_cpu); +} /* * Batched page_cache_release(). Decrement the reference count on all the @@ -200,44 +320,60 @@ EXPORT_SYMBOL(__page_cache_release); * Avoid taking zone->lru_lock if possible, but if it is taken, retain it * for the remainder of the operation. * - * The locking in this function is against shrink_cache(): we recheck the - * page count inside the lock to see whether shrink_cache grabbed the page - * via the LRU. If it did, give up: shrink_cache will free it. + * The locking in this function is against shrink_inactive_list(): we recheck + * the page count inside the lock to see whether shrink_inactive_list() + * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() + * will free it. */ void release_pages(struct page **pages, int nr, int cold) { int i; struct pagevec pages_to_free; struct zone *zone = NULL; + unsigned long uninitialized_var(flags); pagevec_init(&pages_to_free, cold); for (i = 0; i < nr; i++) { struct page *page = pages[i]; - struct zone *pagezone; + + if (unlikely(PageCompound(page))) { + if (zone) { + spin_unlock_irqrestore(&zone->lru_lock, flags); + zone = NULL; + } + put_compound_page(page); + continue; + } if (!put_page_testzero(page)) continue; - pagezone = page_zone(page); - if (pagezone != zone) { - if (zone) - spin_unlock_irq(&zone->lru_lock); - zone = pagezone; - spin_lock_irq(&zone->lru_lock); - } - if (TestClearPageLRU(page)) - del_page_from_lru(zone, page); - if (page_count(page) == 0) { - if (!pagevec_add(&pages_to_free, page)) { - spin_unlock_irq(&zone->lru_lock); - __pagevec_free(&pages_to_free); - pagevec_reinit(&pages_to_free); - zone = NULL; /* No lock is held */ + if (PageLRU(page)) { + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock_irqrestore(&zone->lru_lock, + flags); + zone = pagezone; + spin_lock_irqsave(&zone->lru_lock, flags); } + VM_BUG_ON(!PageLRU(page)); + __ClearPageLRU(page); + del_page_from_lru(zone, page); } + + if (!pagevec_add(&pages_to_free, page)) { + if (zone) { + spin_unlock_irqrestore(&zone->lru_lock, flags); + zone = NULL; + } + __pagevec_free(&pages_to_free); + pagevec_reinit(&pages_to_free); + } } if (zone) - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irqrestore(&zone->lru_lock, flags); pagevec_free(&pages_to_free); } @@ -262,66 +398,21 @@ void __pagevec_release(struct pagevec *pvec) EXPORT_SYMBOL(__pagevec_release); /* - * pagevec_release() for pages which are known to not be on the LRU - * - * This function reinitialises the caller's pagevec. - */ -void __pagevec_release_nonlru(struct pagevec *pvec) -{ - int i; - struct pagevec pages_to_free; - - pagevec_init(&pages_to_free, pvec->cold); - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - - BUG_ON(PageLRU(page)); - if (put_page_testzero(page)) - pagevec_add(&pages_to_free, page); - } - pagevec_free(&pages_to_free); - pagevec_reinit(pvec); -} - -/* * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ -void __pagevec_lru_add(struct pagevec *pvec) +void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) { int i; struct zone *zone = NULL; - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - struct zone *pagezone = page_zone(page); - - if (pagezone != zone) { - if (zone) - spin_unlock_irq(&zone->lru_lock); - zone = pagezone; - spin_lock_irq(&zone->lru_lock); - } - if (TestSetPageLRU(page)) - BUG(); - add_page_to_inactive_list(zone, page); - } - if (zone) - spin_unlock_irq(&zone->lru_lock); - release_pages(pvec->pages, pvec->nr, pvec->cold); - pagevec_reinit(pvec); -} - -EXPORT_SYMBOL(__pagevec_lru_add); - -void __pagevec_lru_add_active(struct pagevec *pvec) -{ - int i; - struct zone *zone = NULL; + VM_BUG_ON(is_unevictable_lru(lru)); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; struct zone *pagezone = page_zone(page); + int file; + int active; if (pagezone != zone) { if (zone) @@ -329,11 +420,16 @@ void __pagevec_lru_add_active(struct pagevec *pvec) zone = pagezone; spin_lock_irq(&zone->lru_lock); } - if (TestSetPageLRU(page)) - BUG(); - if (TestSetPageActive(page)) - BUG(); - add_page_to_active_list(zone, page); + VM_BUG_ON(PageActive(page)); + VM_BUG_ON(PageUnevictable(page)); + VM_BUG_ON(PageLRU(page)); + SetPageLRU(page); + active = is_active_lru(lru); + file = is_file_lru(lru); + if (active) + SetPageActive(page); + update_page_reclaim_stat(zone, page, file, active); + add_page_to_lru_list(zone, page, lru); } if (zone) spin_unlock_irq(&zone->lru_lock); @@ -341,6 +437,8 @@ void __pagevec_lru_add_active(struct pagevec *pvec) pagevec_reinit(pvec); } +EXPORT_SYMBOL(____pagevec_lru_add); + /* * Try to drop buffers from the pages in a pagevec */ @@ -351,8 +449,9 @@ void pagevec_strip(struct pagevec *pvec) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; - if (PagePrivate(page) && !TestSetPageLocked(page)) { - try_to_release_page(page, 0); + if (page_has_private(page) && trylock_page(page)) { + if (page_has_private(page)) + try_to_release_page(page, 0); unlock_page(page); } } @@ -381,6 +480,8 @@ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, return pagevec_count(pvec); } +EXPORT_SYMBOL(pagevec_lookup); + unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, int tag, unsigned nr_pages) { @@ -391,88 +492,16 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, EXPORT_SYMBOL(pagevec_lookup_tag); -#ifdef CONFIG_SMP -/* - * We tolerate a little inaccuracy to avoid ping-ponging the counter between - * CPUs - */ -#define ACCT_THRESHOLD max(16, NR_CPUS * 2) - -static DEFINE_PER_CPU(long, committed_space) = 0; - -void vm_acct_memory(long pages) -{ - long *local; - - preempt_disable(); - local = &__get_cpu_var(committed_space); - *local += pages; - if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { - atomic_add(*local, &vm_committed_space); - *local = 0; - } - preempt_enable(); -} -EXPORT_SYMBOL(vm_acct_memory); - -#ifdef CONFIG_HOTPLUG_CPU -static void lru_drain_cache(unsigned int cpu) -{ - struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu); - - /* CPU is dead, so no locking needed. */ - if (pagevec_count(pvec)) - __pagevec_lru_add(pvec); - pvec = &per_cpu(lru_add_active_pvecs, cpu); - if (pagevec_count(pvec)) - __pagevec_lru_add_active(pvec); -} - -/* Drop the CPU's cached committed space back into the central pool. */ -static int cpu_swap_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - long *committed; - - committed = &per_cpu(committed_space, (long)hcpu); - if (action == CPU_DEAD) { - atomic_add(*committed, &vm_committed_space); - *committed = 0; - lru_drain_cache((long)hcpu); - } - return NOTIFY_OK; -} -#endif /* CONFIG_HOTPLUG_CPU */ -#endif /* CONFIG_SMP */ - -#ifdef CONFIG_SMP -void percpu_counter_mod(struct percpu_counter *fbc, long amount) -{ - long count; - long *pcount; - int cpu = get_cpu(); - - pcount = per_cpu_ptr(fbc->counters, cpu); - count = *pcount + amount; - if (count >= FBC_BATCH || count <= -FBC_BATCH) { - spin_lock(&fbc->lock); - fbc->count += count; - spin_unlock(&fbc->lock); - count = 0; - } - *pcount = count; - put_cpu(); -} -EXPORT_SYMBOL(percpu_counter_mod); -#endif - /* * Perform any setup for the swap system */ void __init swap_setup(void) { - unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); + unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); + +#ifdef CONFIG_SWAP + bdi_init(swapper_space.backing_dev_info); +#endif /* Use a smaller cluster for small-memory machines */ if (megs < 16) @@ -483,5 +512,4 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ - hotcpu_notifier(cpu_swap_callback, 0); }