X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fmigrate.c;h=4205b1d6049ed2f6a07da22c0a100d52d6ccd0f8;hb=cad719d86e9dbd06634eaba6401e022c8101d6b2;hp=0bc640fd68fa64c38915d40fe20db545c1784799;hpb=b925585039cf39275c2e0e57512e5df27fa73aad;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/migrate.c b/mm/migrate.c index 0bc640f..4205b1d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include "internal.h" @@ -38,7 +40,8 @@ /* * migrate_prep() needs to be called before we start compiling a list of pages - * to be migrated using isolate_lru_page(). + * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is + * undesirable, use migrate_prep_local() */ int migrate_prep(void) { @@ -53,33 +56,36 @@ int migrate_prep(void) return 0; } +/* Do the necessary work of migrate_prep but not if it involves other CPUs */ +int migrate_prep_local(void) +{ + lru_add_drain(); + + return 0; +} + /* * Add isolated pages on the list back to the LRU under page lock * to avoid leaking evictable pages back onto unevictable list. - * - * returns the number of pages put back. */ -int putback_lru_pages(struct list_head *l) +void putback_lru_pages(struct list_head *l) { struct page *page; struct page *page2; - int count = 0; list_for_each_entry_safe(page, page2, l, lru) { list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); putback_lru_page(page); - count++; } - return count; } /* * Restore a potential migration pte to a working pte entry */ -static void remove_migration_pte(struct vm_area_struct *vma, - struct page *old, struct page *new) +static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, + unsigned long addr, void *old) { struct mm_struct *mm = vma->vm_mm; swp_entry_t entry; @@ -88,40 +94,37 @@ static void remove_migration_pte(struct vm_area_struct *vma, pmd_t *pmd; pte_t *ptep, pte; spinlock_t *ptl; - unsigned long addr = page_address_in_vma(new, vma); - - if (addr == -EFAULT) - return; pgd = pgd_offset(mm, addr); if (!pgd_present(*pgd)) - return; + goto out; pud = pud_offset(pgd, addr); if (!pud_present(*pud)) - return; + goto out; pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) - return; + goto out; ptep = pte_offset_map(pmd, addr); if (!is_swap_pte(*ptep)) { pte_unmap(ptep); - return; + goto out; } ptl = pte_lockptr(mm, pmd); spin_lock(ptl); pte = *ptep; if (!is_swap_pte(pte)) - goto out; + goto unlock; entry = pte_to_swp_entry(pte); - if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) - goto out; + if (!is_migration_entry(entry) || + migration_entry_to_page(entry) != old) + goto unlock; get_page(new); pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); @@ -136,59 +139,11 @@ static void remove_migration_pte(struct vm_area_struct *vma, page_add_file_rmap(new); /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, pte); - -out: + update_mmu_cache(vma, addr, ptep); +unlock: pte_unmap_unlock(ptep, ptl); -} - -/* - * Note that remove_file_migration_ptes will only work on regular mappings, - * Nonlinear mappings do not use migration entries. - */ -static void remove_file_migration_ptes(struct page *old, struct page *new) -{ - struct vm_area_struct *vma; - struct address_space *mapping = new->mapping; - struct prio_tree_iter iter; - pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - - if (!mapping) - return; - - spin_lock(&mapping->i_mmap_lock); - - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) - remove_migration_pte(vma, old, new); - - spin_unlock(&mapping->i_mmap_lock); -} - -/* - * Must hold mmap_sem lock on at least one of the vmas containing - * the page so that the anon_vma cannot vanish. - */ -static void remove_anon_migration_ptes(struct page *old, struct page *new) -{ - struct anon_vma *anon_vma; - struct vm_area_struct *vma; - unsigned long mapping; - - mapping = (unsigned long)new->mapping; - - if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) - return; - - /* - * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. - */ - anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); - spin_lock(&anon_vma->lock); - - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) - remove_migration_pte(vma, old, new); - - spin_unlock(&anon_vma->lock); +out: + return SWAP_AGAIN; } /* @@ -197,10 +152,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new) */ static void remove_migration_ptes(struct page *old, struct page *new) { - if (PageAnon(new)) - remove_anon_migration_ptes(old, new); - else - remove_file_migration_ptes(old, new); + rmap_walk(new, remove_migration_pte, old); } /* @@ -328,8 +280,6 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ static void migrate_page_copy(struct page *newpage, struct page *page) { - int anon; - copy_highpage(newpage, page); if (PageError(page)) @@ -341,8 +291,8 @@ static void migrate_page_copy(struct page *newpage, struct page *page) if (TestClearPageActive(page)) { VM_BUG_ON(PageUnevictable(page)); SetPageActive(newpage); - } else - unevictable_migrate_page(newpage, page); + } else if (TestClearPageUnevictable(page)) + SetPageUnevictable(newpage); if (PageChecked(page)) SetPageChecked(newpage); if (PageMappedToDisk(page)) @@ -361,12 +311,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page) } mlock_migrate_page(newpage, page); + ksm_migrate_page(newpage, page); ClearPageSwapCache(page); ClearPagePrivate(page); set_page_private(page, 0); - /* page->mapping contains a flag for PageAnon() */ - anon = PageAnon(page); page->mapping = NULL; /* @@ -545,7 +494,8 @@ static int fallback_migrate_page(struct address_space *mapping, * < 0 - error code * == 0 - success */ -static int move_to_new_page(struct page *newpage, struct page *page) +static int move_to_new_page(struct page *newpage, struct page *page, + int remap_swapcache) { struct address_space *mapping; int rc; @@ -580,10 +530,12 @@ static int move_to_new_page(struct page *newpage, struct page *page) else rc = fallback_migrate_page(mapping, newpage, page); - if (!rc) { - remove_migration_ptes(page, newpage); - } else + if (rc) { newpage->mapping = NULL; + } else { + if (remap_swapcache) + remove_migration_ptes(page, newpage); + } unlock_page(newpage); @@ -595,14 +547,16 @@ static int move_to_new_page(struct page *newpage, struct page *page) * to the newly allocated page in newpage. */ static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force) + struct page *page, int force, int offlining) { int rc = 0; int *result = NULL; struct page *newpage = get_new_page(page, private, &result); + int remap_swapcache = 1; int rcu_locked = 0; int charge = 0; struct mem_cgroup *mem = NULL; + struct anon_vma *anon_vma = NULL; if (!newpage) return -ENOMEM; @@ -621,8 +575,22 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, lock_page(page); } + /* + * Only memory hotplug's offline_pages() caller has locked out KSM, + * and can safely migrate a KSM page. The other cases have skipped + * PageKsm along with PageReserved - but it is only now when we have + * the page lock that we can be certain it will not go KSM beneath us + * (KSM will not upgrade a page from PageAnon to PageKsm when it sees + * its pagecount raised, but only here do we take the page lock which + * serializes that). + */ + if (PageKsm(page) && !offlining) { + rc = -EBUSY; + goto unlock; + } + /* charge against new page */ - charge = mem_cgroup_prepare_migration(page, &mem); + charge = mem_cgroup_prepare_migration(page, newpage, &mem); if (charge == -ENOMEM) { rc = -ENOMEM; goto unlock; @@ -645,6 +613,34 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, if (PageAnon(page)) { rcu_read_lock(); rcu_locked = 1; + + /* Determine how to safely use anon_vma */ + if (!page_mapped(page)) { + if (!PageSwapCache(page)) + goto rcu_unlock; + + /* + * We cannot be sure that the anon_vma of an unmapped + * swapcache page is safe to use because we don't + * know in advance if the VMA that this page belonged + * to still exists. If the VMA and others sharing the + * data have been freed, then the anon_vma could + * already be invalid. + * + * To avoid this possibility, swapcache pages get + * migrated but are not remapped when migration + * completes + */ + remap_swapcache = 0; + } else { + /* + * Take a reference count on the anon_vma if the + * page is mapped so that it is guaranteed to + * exist when the page is remapped later + */ + anon_vma = page_anon_vma(page); + atomic_inc(&anon_vma->external_refcount); + } } /* @@ -679,11 +675,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, skip_unmap: if (!page_mapped(page)) - rc = move_to_new_page(newpage, page); + rc = move_to_new_page(newpage, page, remap_swapcache); - if (rc) + if (rc && remap_swapcache) remove_migration_ptes(page, page); rcu_unlock: + + /* Drop an anon_vma reference if we took one */ + if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) { + int empty = list_empty(&anon_vma->head); + spin_unlock(&anon_vma->lock); + if (empty) + anon_vma_free(anon_vma); + } + if (rcu_locked) rcu_read_unlock(); uncharge: @@ -737,7 +742,7 @@ move_newpage: * Return: Number of pages not migrated or error code. */ int migrate_pages(struct list_head *from, - new_page_t get_new_page, unsigned long private) + new_page_t get_new_page, unsigned long private, int offlining) { int retry = 1; int nr_failed = 0; @@ -746,13 +751,6 @@ int migrate_pages(struct list_head *from, struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; int rc; - unsigned long flags; - - local_irq_save(flags); - list_for_each_entry(page, from, lru) - __inc_zone_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); - local_irq_restore(flags); if (!swapwrite) current->flags |= PF_SWAPWRITE; @@ -764,7 +762,7 @@ int migrate_pages(struct list_head *from, cond_resched(); rc = unmap_and_move(get_new_page, private, - page, pass > 2); + page, pass > 2, offlining); switch(rc) { case -ENOMEM: @@ -860,7 +858,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, if (!page) goto set_status; - if (PageReserved(page)) /* Check for zero page */ + /* Use PageReserved to check for zero page */ + if (PageReserved(page) || PageKsm(page)) goto put_and_set; pp->page = page; @@ -878,8 +877,11 @@ static int do_move_page_to_node_array(struct mm_struct *mm, goto put_and_set; err = isolate_lru_page(page); - if (!err) + if (!err) { list_add_tail(&page->lru, &pagelist); + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + } put_and_set: /* * Either remove the duplicate refcount from @@ -894,7 +896,7 @@ set_status: err = 0; if (!list_empty(&pagelist)) err = migrate_pages(&pagelist, new_page_node, - (unsigned long)pm); + (unsigned long)pm, 0); up_read(&mm->mmap_sem); return err; @@ -953,6 +955,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, goto out_pm; err = -ENODEV; + if (node < 0 || node >= MAX_NUMNODES) + goto out_pm; + if (!node_state(node, N_HIGH_MEMORY)) goto out_pm; @@ -1015,7 +1020,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, err = -ENOENT; /* Use PageReserved to check for zero page */ - if (!page || PageReserved(page)) + if (!page || PageReserved(page) || PageKsm(page)) goto set_status; err = page_to_nid(page); @@ -1040,33 +1045,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, #define DO_PAGES_STAT_CHUNK_NR 16 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; - unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; - int err; - for (i = 0; i < nr_pages; i += chunk_nr) { - if (chunk_nr > nr_pages - i) - chunk_nr = nr_pages - i; + while (nr_pages) { + unsigned long chunk_nr; - err = copy_from_user(chunk_pages, &pages[i], - chunk_nr * sizeof(*chunk_pages)); - if (err) { - err = -EFAULT; - goto out; - } + chunk_nr = nr_pages; + if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) + chunk_nr = DO_PAGES_STAT_CHUNK_NR; + + if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) + break; do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); - err = copy_to_user(&status[i], chunk_status, - chunk_nr * sizeof(*chunk_status)); - if (err) { - err = -EFAULT; - goto out; - } - } - err = 0; + if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) + break; -out: - return err; + pages += chunk_nr; + status += chunk_nr; + nr_pages -= chunk_nr; + } + return nr_pages ? -EFAULT : 0; } /*