X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fmigrate.c;h=88000b89fc9a567768509926b7baf5dc44d2d7d7;hb=c62b1a3b31b5e27a6c5c2e91cc5ce05fdb6344d0;hp=2bb4e1d6352046e9030a789a9b78c385cd104c5b;hpb=938bb9f5e840eddbf54e4f62f6c5ba9b3ae12c9d;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/migrate.c b/mm/migrate.c index 2bb4e1d..88000b8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,8 @@ int putback_lru_pages(struct list_head *l) list_for_each_entry_safe(page, page2, l, lru) { list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); putback_lru_page(page); count++; } @@ -76,8 +79,8 @@ int putback_lru_pages(struct list_head *l) /* * Restore a potential migration pte to a working pte entry */ -static void remove_migration_pte(struct vm_area_struct *vma, - struct page *old, struct page *new) +static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, + unsigned long addr, void *old) { struct mm_struct *mm = vma->vm_mm; swp_entry_t entry; @@ -86,40 +89,37 @@ static void remove_migration_pte(struct vm_area_struct *vma, pmd_t *pmd; pte_t *ptep, pte; spinlock_t *ptl; - unsigned long addr = page_address_in_vma(new, vma); - - if (addr == -EFAULT) - return; pgd = pgd_offset(mm, addr); if (!pgd_present(*pgd)) - return; + goto out; pud = pud_offset(pgd, addr); if (!pud_present(*pud)) - return; + goto out; pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) - return; + goto out; ptep = pte_offset_map(pmd, addr); if (!is_swap_pte(*ptep)) { pte_unmap(ptep); - return; + goto out; } ptl = pte_lockptr(mm, pmd); spin_lock(ptl); pte = *ptep; if (!is_swap_pte(pte)) - goto out; + goto unlock; entry = pte_to_swp_entry(pte); - if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) - goto out; + if (!is_migration_entry(entry) || + migration_entry_to_page(entry) != old) + goto unlock; get_page(new); pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); @@ -134,59 +134,11 @@ static void remove_migration_pte(struct vm_area_struct *vma, page_add_file_rmap(new); /* No need to invalidate - it was non-present before */ - update_mmu_cache(vma, addr, pte); - -out: + update_mmu_cache(vma, addr, ptep); +unlock: pte_unmap_unlock(ptep, ptl); -} - -/* - * Note that remove_file_migration_ptes will only work on regular mappings, - * Nonlinear mappings do not use migration entries. - */ -static void remove_file_migration_ptes(struct page *old, struct page *new) -{ - struct vm_area_struct *vma; - struct address_space *mapping = page_mapping(new); - struct prio_tree_iter iter; - pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - - if (!mapping) - return; - - spin_lock(&mapping->i_mmap_lock); - - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) - remove_migration_pte(vma, old, new); - - spin_unlock(&mapping->i_mmap_lock); -} - -/* - * Must hold mmap_sem lock on at least one of the vmas containing - * the page so that the anon_vma cannot vanish. - */ -static void remove_anon_migration_ptes(struct page *old, struct page *new) -{ - struct anon_vma *anon_vma; - struct vm_area_struct *vma; - unsigned long mapping; - - mapping = (unsigned long)new->mapping; - - if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) - return; - - /* - * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. - */ - anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); - spin_lock(&anon_vma->lock); - - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) - remove_migration_pte(vma, old, new); - - spin_unlock(&anon_vma->lock); +out: + return SWAP_AGAIN; } /* @@ -195,10 +147,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new) */ static void remove_migration_ptes(struct page *old, struct page *new) { - if (PageAnon(new)) - remove_anon_migration_ptes(old, new); - else - remove_file_migration_ptes(old, new); + rmap_walk(new, remove_migration_pte, old); } /* @@ -250,7 +199,7 @@ out: * The number of remaining references must be: * 1 for anonymous pages without a mapping * 2 for pages with a mapping - * 3 for pages with a mapping and PagePrivate set. + * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ static int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) @@ -270,7 +219,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, pslot = radix_tree_lookup_slot(&mapping->page_tree, page_index(page)); - expected_count = 2 + !!PagePrivate(page); + expected_count = 2 + page_has_private(page); if (page_count(page) != expected_count || (struct page *)radix_tree_deref_slot(pslot) != page) { spin_unlock_irq(&mapping->tree_lock); @@ -312,7 +261,10 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ __dec_zone_page_state(page, NR_FILE_PAGES); __inc_zone_page_state(newpage, NR_FILE_PAGES); - + if (PageSwapBacked(page)) { + __dec_zone_page_state(page, NR_SHMEM); + __inc_zone_page_state(newpage, NR_SHMEM); + } spin_unlock_irq(&mapping->tree_lock); return 0; @@ -323,8 +275,6 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ static void migrate_page_copy(struct page *newpage, struct page *page) { - int anon; - copy_highpage(newpage, page); if (PageError(page)) @@ -336,8 +286,8 @@ static void migrate_page_copy(struct page *newpage, struct page *page) if (TestClearPageActive(page)) { VM_BUG_ON(PageUnevictable(page)); SetPageActive(newpage); - } else - unevictable_migrate_page(newpage, page); + } else if (TestClearPageUnevictable(page)) + SetPageUnevictable(newpage); if (PageChecked(page)) SetPageChecked(newpage); if (PageMappedToDisk(page)) @@ -356,12 +306,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page) } mlock_migrate_page(newpage, page); + ksm_migrate_page(newpage, page); ClearPageSwapCache(page); ClearPagePrivate(page); set_page_private(page, 0); - /* page->mapping contains a flag for PageAnon() */ - anon = PageAnon(page); page->mapping = NULL; /* @@ -386,7 +335,7 @@ EXPORT_SYMBOL(fail_migrate_page); /* * Common logic to directly migrate a single page suitable for - * pages that do not use PagePrivate. + * pages that do not use PagePrivate/PagePrivate2. * * Pages are locked upon entry and exit. */ @@ -522,7 +471,7 @@ static int fallback_migrate_page(struct address_space *mapping, * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ - if (PagePrivate(page) && + if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; @@ -575,9 +524,9 @@ static int move_to_new_page(struct page *newpage, struct page *page) else rc = fallback_migrate_page(mapping, newpage, page); - if (!rc) { + if (!rc) remove_migration_ptes(page, newpage); - } else + else newpage->mapping = NULL; unlock_page(newpage); @@ -590,14 +539,14 @@ static int move_to_new_page(struct page *newpage, struct page *page) * to the newly allocated page in newpage. */ static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force) + struct page *page, int force, int offlining) { int rc = 0; int *result = NULL; struct page *newpage = get_new_page(page, private, &result); int rcu_locked = 0; int charge = 0; - struct mem_cgroup *mem; + struct mem_cgroup *mem = NULL; if (!newpage) return -ENOMEM; @@ -616,6 +565,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, lock_page(page); } + /* + * Only memory hotplug's offline_pages() caller has locked out KSM, + * and can safely migrate a KSM page. The other cases have skipped + * PageKsm along with PageReserved - but it is only now when we have + * the page lock that we can be certain it will not go KSM beneath us + * (KSM will not upgrade a page from PageAnon to PageKsm when it sees + * its pagecount raised, but only here do we take the page lock which + * serializes that). + */ + if (PageKsm(page) && !offlining) { + rc = -EBUSY; + goto unlock; + } + /* charge against new page */ charge = mem_cgroup_prepare_migration(page, &mem); if (charge == -ENOMEM) { @@ -655,7 +618,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, * free the metadata, so the page can be freed. */ if (!page->mapping) { - if (!PageAnon(page) && PagePrivate(page)) { + if (!PageAnon(page) && page_has_private(page)) { /* * Go direct to try_to_free_buffers() here because * a) that's what try_to_release_page() would do anyway @@ -664,13 +627,15 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, * needs to be effective. */ try_to_free_buffers(page); + goto rcu_unlock; } - goto rcu_unlock; + goto skip_unmap; } /* Establish migration ptes or remove ptes */ - try_to_unmap(page, 1); + try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); +skip_unmap: if (!page_mapped(page)) rc = move_to_new_page(newpage, page); @@ -693,6 +658,8 @@ unlock: * restored. */ list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); putback_lru_page(page); } @@ -728,7 +695,7 @@ move_newpage: * Return: Number of pages not migrated or error code. */ int migrate_pages(struct list_head *from, - new_page_t get_new_page, unsigned long private) + new_page_t get_new_page, unsigned long private, int offlining) { int retry = 1; int nr_failed = 0; @@ -748,7 +715,7 @@ int migrate_pages(struct list_head *from, cond_resched(); rc = unmap_and_move(get_new_page, private, - page, pass > 2); + page, pass > 2, offlining); switch(rc) { case -ENOMEM: @@ -802,7 +769,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, *result = &pm->status; - return alloc_pages_node(pm->node, + return alloc_pages_exact_node(pm->node, GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); } @@ -820,7 +787,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm, struct page_to_node *pp; LIST_HEAD(pagelist); - migrate_prep(); down_read(&mm->mmap_sem); /* @@ -845,7 +811,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, if (!page) goto set_status; - if (PageReserved(page)) /* Check for zero page */ + /* Use PageReserved to check for zero page */ + if (PageReserved(page) || PageKsm(page)) goto put_and_set; pp->page = page; @@ -863,8 +830,11 @@ static int do_move_page_to_node_array(struct mm_struct *mm, goto put_and_set; err = isolate_lru_page(page); - if (!err) + if (!err) { list_add_tail(&page->lru, &pagelist); + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + } put_and_set: /* * Either remove the duplicate refcount from @@ -879,7 +849,7 @@ set_status: err = 0; if (!list_empty(&pagelist)) err = migrate_pages(&pagelist, new_page_node, - (unsigned long)pm); + (unsigned long)pm, 0); up_read(&mm->mmap_sem); return err; @@ -907,6 +877,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); if (!pm) goto out; + + migrate_prep(); + /* * Store a chunk of page_to_node array in a page, * but keep the last one as a marker @@ -935,6 +908,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, goto out_pm; err = -ENODEV; + if (node < 0 || node >= MAX_NUMNODES) + goto out_pm; + if (!node_state(node, N_HIGH_MEMORY)) goto out_pm; @@ -997,7 +973,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, err = -ENOENT; /* Use PageReserved to check for zero page */ - if (!page || PageReserved(page)) + if (!page || PageReserved(page) || PageKsm(page)) goto set_status; err = page_to_nid(page); @@ -1022,33 +998,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, #define DO_PAGES_STAT_CHUNK_NR 16 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; - unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; - int err; - for (i = 0; i < nr_pages; i += chunk_nr) { - if (chunk_nr + i > nr_pages) - chunk_nr = nr_pages - i; + while (nr_pages) { + unsigned long chunk_nr; - err = copy_from_user(chunk_pages, &pages[i], - chunk_nr * sizeof(*chunk_pages)); - if (err) { - err = -EFAULT; - goto out; - } + chunk_nr = nr_pages; + if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) + chunk_nr = DO_PAGES_STAT_CHUNK_NR; + + if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) + break; do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); - err = copy_to_user(&status[i], chunk_status, - chunk_nr * sizeof(*chunk_status)); - if (err) { - err = -EFAULT; - goto out; - } - } - err = 0; + if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) + break; -out: - return err; + pages += chunk_nr; + status += chunk_nr; + nr_pages -= chunk_nr; + } + return nr_pages ? -EFAULT : 0; } /* @@ -1129,7 +1099,7 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, struct vm_area_struct *vma; int err = 0; - for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { + for (vma = mm->mmap; vma && !err; vma = vma->vm_next) { if (vma->vm_ops && vma->vm_ops->migrate) { err = vma->vm_ops->migrate(vma, to, from, flags); if (err)