X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fmigrate.c;h=ba2453f9483dfe4b03b3df1c089607da36ebb60c;hb=35ae61a0f43ebbabc3cb4345136ca529fc4d6700;hp=d3a1810a4c9fe01bd32e46c763ccaea86fdbfcef;hpb=aaa994b300a172afafab47938804836b923e5ef7;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/migrate.c b/mm/migrate.c index d3a1810..ba2453f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -25,12 +25,12 @@ #include #include #include +#include +#include +#include #include "internal.h" -/* The maximum number of pages to take off the LRU for migration */ -#define MIGRATE_CHUNK_SIZE 256 - #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) /* @@ -65,9 +65,8 @@ int isolate_lru_page(struct page *page, struct list_head *pagelist) } /* - * migrate_prep() needs to be called after we have compiled the list of pages - * to be migrated using isolate_lru_page() but before we begin a series of calls - * to migrate_pages(). + * migrate_prep() needs to be called before we start compiling a list of pages + * to be migrated using isolate_lru_page(). */ int migrate_prep(void) { @@ -410,6 +409,7 @@ int migrate_page(struct address_space *mapping, } EXPORT_SYMBOL(migrate_page); +#ifdef CONFIG_BLOCK /* * Migration function for pages with buffers. This function can only be used * if the underlying filesystem guarantees that no other references to "page" @@ -467,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping, return 0; } EXPORT_SYMBOL(buffer_migrate_page); +#endif /* * Writeback a page to clean the dirty state @@ -526,7 +527,7 @@ static int fallback_migrate_page(struct address_space *mapping, * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ - if (page_has_buffers(page) && + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; @@ -587,18 +588,24 @@ static int move_to_new_page(struct page *newpage, struct page *page) * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */ -static int unmap_and_move(struct page *newpage, struct page *page, int force) +static int unmap_and_move(new_page_t get_new_page, unsigned long private, + struct page *page, int force) { int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; if (page_count(page) == 1) /* page was freed from under us. So we are done. */ - goto ret; + goto move_newpage; rc = -EAGAIN; if (TestSetPageLocked(page)) { if (!force) - goto ret; + goto move_newpage; lock_page(page); } @@ -611,18 +618,16 @@ static int unmap_and_move(struct page *newpage, struct page *page, int force) /* * Establish migration ptes or remove ptes */ - if (try_to_unmap(page, 1) != SWAP_FAIL) { - if (!page_mapped(page)) - rc = move_to_new_page(newpage, page); - } else - /* A vma has VM_LOCKED set -> permanent failure */ - rc = -EPERM; + try_to_unmap(page, 1); + if (!page_mapped(page)) + rc = move_to_new_page(newpage, page); if (rc) remove_migration_ptes(page, page); + unlock: unlock_page(page); -ret: + if (rc != -EAGAIN) { /* * A page that has been migrated has all references @@ -632,9 +637,19 @@ ret: */ list_del(&page->lru); move_to_lru(page); + } - list_del(&newpage->lru); - move_to_lru(newpage); +move_newpage: + /* + * Move the new page to the LRU. If migration was not successful + * then this will free the page. + */ + move_to_lru(newpage); + if (result) { + if (rc) + *result = rc; + else + *result = page_to_nid(newpage); } return rc; } @@ -642,19 +657,19 @@ ret: /* * migrate_pages * - * Two lists are passed to this function. The first list - * contains the pages isolated from the LRU to be migrated. - * The second list contains new pages that the isolated pages - * can be moved to. + * The function takes one list of pages to migrate and a function + * that determines from the page to be migrated and the private data + * the target of the move and allocates the page. * * The function returns after 10 attempts or if no pages * are movable anymore because to has become empty * or no retryable pages exist anymore. All pages will be * retruned to the LRU or freed. * - * Return: Number of pages not migrated. + * Return: Number of pages not migrated or error code. */ -int migrate_pages(struct list_head *from, struct list_head *to) +int migrate_pages(struct list_head *from, + new_page_t get_new_page, unsigned long private) { int retry = 1; int nr_failed = 0; @@ -671,15 +686,14 @@ int migrate_pages(struct list_head *from, struct list_head *to) retry = 0; list_for_each_entry_safe(page, page2, from, lru) { - - if (list_empty(to)) - break; - cond_resched(); - rc = unmap_and_move(lru_to_page(to), page, pass > 2); + rc = unmap_and_move(get_new_page, private, + page, pass > 2); switch(rc) { + case -ENOMEM: + goto out; case -EAGAIN: retry++; break; @@ -692,72 +706,293 @@ int migrate_pages(struct list_head *from, struct list_head *to) } } } - + rc = 0; +out: if (!swapwrite) current->flags &= ~PF_SWAPWRITE; putback_lru_pages(from); + + if (rc) + return rc; + return nr_failed + retry; } +#ifdef CONFIG_NUMA /* - * Migrate the list 'pagelist' of pages to a certain destination. - * - * Specify destination with either non-NULL vma or dest_node >= 0 - * Return the number of pages not migrated or error code + * Move a list of individual pages */ -int migrate_pages_to(struct list_head *pagelist, - struct vm_area_struct *vma, int dest) -{ - LIST_HEAD(newlist); - int err = 0; - unsigned long offset = 0; - int nr_pages; - int nr_failed = 0; +struct page_to_node { + unsigned long addr; struct page *page; - struct list_head *p; + int node; + int status; +}; + +static struct page *new_page_node(struct page *p, unsigned long private, + int **result) +{ + struct page_to_node *pm = (struct page_to_node *)private; + + while (pm->node != MAX_NUMNODES && pm->page != p) + pm++; + + if (pm->node == MAX_NUMNODES) + return NULL; -redo: - nr_pages = 0; - list_for_each(p, pagelist) { - if (vma) { + *result = &pm->status; + + return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0); +} + +/* + * Move a set of pages as indicated in the pm array. The addr + * field must be set to the virtual address of the page to be moved + * and the node number must contain a valid target node. + */ +static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, + int migrate_all) +{ + int err; + struct page_to_node *pp; + LIST_HEAD(pagelist); + + down_read(&mm->mmap_sem); + + /* + * Build a list of pages to migrate + */ + migrate_prep(); + for (pp = pm; pp->node != MAX_NUMNODES; pp++) { + struct vm_area_struct *vma; + struct page *page; + + /* + * A valid page pointer that will not match any of the + * pages that will be moved. + */ + pp->page = ZERO_PAGE(0); + + err = -EFAULT; + vma = find_vma(mm, pp->addr); + if (!vma) + goto set_status; + + page = follow_page(vma, pp->addr, FOLL_GET); + err = -ENOENT; + if (!page) + goto set_status; + + if (PageReserved(page)) /* Check for zero page */ + goto put_and_set; + + pp->page = page; + err = page_to_nid(page); + + if (err == pp->node) /* - * The address passed to alloc_page_vma is used to - * generate the proper interleave behavior. We fake - * the address here by an increasing offset in order - * to get the proper distribution of pages. - * - * No decision has been made as to which page - * a certain old page is moved to so we cannot - * specify the correct address. + * Node already in the right place */ - page = alloc_page_vma(GFP_HIGHUSER, vma, - offset + vma->vm_start); - offset += PAGE_SIZE; - } - else - page = alloc_pages_node(dest, GFP_HIGHUSER, 0); + goto put_and_set; - if (!page) { - err = -ENOMEM; + err = -EACCES; + if (page_mapcount(page) > 1 && + !migrate_all) + goto put_and_set; + + err = isolate_lru_page(page, &pagelist); +put_and_set: + /* + * Either remove the duplicate refcount from + * isolate_lru_page() or drop the page ref if it was + * not isolated. + */ + put_page(page); +set_status: + pp->status = err; + } + + if (!list_empty(&pagelist)) + err = migrate_pages(&pagelist, new_page_node, + (unsigned long)pm); + else + err = -ENOENT; + + up_read(&mm->mmap_sem); + return err; +} + +/* + * Determine the nodes of a list of pages. The addr in the pm array + * must have been set to the virtual address of which we want to determine + * the node number. + */ +static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) +{ + down_read(&mm->mmap_sem); + + for ( ; pm->node != MAX_NUMNODES; pm++) { + struct vm_area_struct *vma; + struct page *page; + int err; + + err = -EFAULT; + vma = find_vma(mm, pm->addr); + if (!vma) + goto set_status; + + page = follow_page(vma, pm->addr, 0); + err = -ENOENT; + /* Use PageReserved to check for zero page */ + if (!page || PageReserved(page)) + goto set_status; + + err = page_to_nid(page); +set_status: + pm->status = err; + } + + up_read(&mm->mmap_sem); + return 0; +} + +/* + * Move a list of pages in the address space of the currently executing + * process. + */ +asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, + const void __user * __user *pages, + const int __user *nodes, + int __user *status, int flags) +{ + int err = 0; + int i; + struct task_struct *task; + nodemask_t task_nodes; + struct mm_struct *mm; + struct page_to_node *pm = NULL; + + /* Check flags */ + if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) + return -EINVAL; + + if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) + return -EPERM; + + /* Find the mm_struct */ + read_lock(&tasklist_lock); + task = pid ? find_task_by_pid(pid) : current; + if (!task) { + read_unlock(&tasklist_lock); + return -ESRCH; + } + mm = get_task_mm(task); + read_unlock(&tasklist_lock); + + if (!mm) + return -EINVAL; + + /* + * Check if this process has the right to modify the specified + * process. The right exists if the process has administrative + * capabilities, superuser privileges or the same + * userid as the target process. + */ + if ((current->euid != task->suid) && (current->euid != task->uid) && + (current->uid != task->suid) && (current->uid != task->uid) && + !capable(CAP_SYS_NICE)) { + err = -EPERM; + goto out2; + } + + err = security_task_movememory(task); + if (err) + goto out2; + + + task_nodes = cpuset_mems_allowed(task); + + /* Limit nr_pages so that the multiplication may not overflow */ + if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { + err = -E2BIG; + goto out2; + } + + pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); + if (!pm) { + err = -ENOMEM; + goto out2; + } + + /* + * Get parameters from user space and initialize the pm + * array. Return various errors if the user did something wrong. + */ + for (i = 0; i < nr_pages; i++) { + const void *p; + + err = -EFAULT; + if (get_user(p, pages + i)) goto out; + + pm[i].addr = (unsigned long)p; + if (nodes) { + int node; + + if (get_user(node, nodes + i)) + goto out; + + err = -ENODEV; + if (!node_online(node)) + goto out; + + err = -EACCES; + if (!node_isset(node, task_nodes)) + goto out; + + pm[i].node = node; } - list_add_tail(&page->lru, &newlist); - nr_pages++; - if (nr_pages > MIGRATE_CHUNK_SIZE) - break; } - err = migrate_pages(pagelist, &newlist); + /* End marker */ + pm[nr_pages].node = MAX_NUMNODES; + + if (nodes) + err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL); + else + err = do_pages_stat(mm, pm); + + if (err >= 0) + /* Return status information */ + for (i = 0; i < nr_pages; i++) + if (put_user(pm[i].status, status + i)) + err = -EFAULT; - if (err >= 0) { - nr_failed += err; - if (list_empty(&newlist) && !list_empty(pagelist)) - goto redo; - } out: + vfree(pm); +out2: + mmput(mm); + return err; +} +#endif - /* Calculate number of leftover pages */ - list_for_each(p, pagelist) - nr_failed++; - return nr_failed; +/* + * Call migration functions in the vma_ops that may prepare + * memory in a vm for migration. migration functions may perform + * the migration for vmas that do not have an underlying page struct. + */ +int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, + const nodemask_t *from, unsigned long flags) +{ + struct vm_area_struct *vma; + int err = 0; + + for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { + if (vma->vm_ops && vma->vm_ops->migrate) { + err = vma->vm_ops->migrate(vma, to, from, flags); + if (err) + break; + } + } + return err; }