X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fksm.c;h=56a0da1f9979d7eaa9d85cbc0b20ef76215e4abf;hb=dd508ae2dbff0cfc7401eb6e278339fc56bc5033;hp=37cc92f83a8d04cd33b3e6c02e7b60e930ed8a16;hpb=4035c07a895974d0ac06a56fe870ad293fc451a7;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/ksm.c b/mm/ksm.c index 37cc92f..56a0da1 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -108,14 +109,14 @@ struct ksm_scan { /** * struct stable_node - node of the stable rbtree - * @page: pointer to struct page of the ksm page * @node: rb node of this ksm page in the stable tree * @hlist: hlist head of rmap_items using this ksm page + * @kpfn: page frame number of this ksm page */ struct stable_node { - struct page *page; struct rb_node node; struct hlist_head hlist; + unsigned long kpfn; }; /** @@ -178,9 +179,6 @@ static unsigned long ksm_pages_unshared; /* The number of rmap_items in use: to calculate pages_volatile */ static unsigned long ksm_rmap_items; -/* Limit on the number of unswappable pages used */ -static unsigned long ksm_max_kernel_pages; - /* Number of pages ksmd should scan in one batch */ static unsigned int ksm_thread_pages_to_scan = 100; @@ -515,7 +513,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node) struct page *page; void *expected_mapping; - page = stable_node->page; + page = pfn_to_page(stable_node->kpfn); expected_mapping = (void *)stable_node + (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); rcu_read_lock(); @@ -831,7 +829,8 @@ out: * try_to_merge_one_page - take two pages and merge them into one * @vma: the vma that holds the pte pointing to page * @page: the PageAnon page that we want to replace with kpage - * @kpage: the PageKsm page that we want to map instead of page + * @kpage: the PageKsm page that we want to map instead of page, + * or NULL the first time when we want to use page as kpage. * * This function returns 0 if the pages were merged, -EFAULT otherwise. */ @@ -864,15 +863,24 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, * ptes are necessarily already write-protected. But in either * case, we need to lock and check page_count is not raised. */ - if (write_protect_page(vma, page, &orig_pte) == 0 && - pages_identical(page, kpage)) - err = replace_page(vma, page, kpage, orig_pte); + if (write_protect_page(vma, page, &orig_pte) == 0) { + if (!kpage) { + /* + * While we hold page lock, upgrade page from + * PageAnon+anon_vma to PageKsm+NULL stable_node: + * stable_tree_insert() will update stable_node. + */ + set_page_stable_node(page, NULL); + mark_page_accessed(page); + err = 0; + } else if (pages_identical(page, kpage)) + err = replace_page(vma, page, kpage, orig_pte); + } - if ((vma->vm_flags & VM_LOCKED) && !err) { + if ((vma->vm_flags & VM_LOCKED) && kpage && !err) { munlock_vma_page(page); if (!PageMlocked(kpage)) { unlock_page(page); - lru_add_drain(); lock_page(kpage); mlock_vma_page(kpage); page = kpage; /* for final unlock */ @@ -922,7 +930,7 @@ out: * This function returns the kpage if we successfully merged two identical * pages into one ksm page, NULL otherwise. * - * Note that this function allocates a new kernel page: if one of the pages + * Note that this function upgrades page to ksm page: if one of the pages * is already a ksm page, try_to_merge_with_ksm_page should be used. */ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item, @@ -930,50 +938,12 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item, struct rmap_item *tree_rmap_item, struct page *tree_page) { - struct mm_struct *mm = rmap_item->mm; - struct vm_area_struct *vma; - struct page *kpage; - int err = -EFAULT; - - /* - * The number of nodes in the stable tree - * is the number of kernel pages that we hold. - */ - if (ksm_max_kernel_pages && - ksm_max_kernel_pages <= ksm_pages_shared) - return NULL; - - kpage = alloc_page(GFP_HIGHUSER); - if (!kpage) - return NULL; - - down_read(&mm->mmap_sem); - if (ksm_test_exit(mm)) - goto up; - vma = find_vma(mm, rmap_item->address); - if (!vma || vma->vm_start > rmap_item->address) - goto up; - - copy_user_highpage(kpage, page, rmap_item->address, vma); - - SetPageDirty(kpage); - __SetPageUptodate(kpage); - SetPageSwapBacked(kpage); - set_page_stable_node(kpage, NULL); /* mark it PageKsm */ - lru_cache_add_lru(kpage, LRU_ACTIVE_ANON); - - err = try_to_merge_one_page(vma, page, kpage); - if (err) - goto up; - - /* Must get reference to anon_vma while still holding mmap_sem */ - hold_anon_vma(rmap_item, vma->anon_vma); -up: - up_read(&mm->mmap_sem); + int err; + err = try_to_merge_with_ksm_page(rmap_item, page, NULL); if (!err) { err = try_to_merge_with_ksm_page(tree_rmap_item, - tree_page, kpage); + tree_page, page); /* * If that fails, we have a ksm page with only one pte * pointing to it: so break it. @@ -981,11 +951,7 @@ up: if (err) break_cow(rmap_item); } - if (err) { - put_page(kpage); - kpage = NULL; - } - return kpage; + return err ? NULL : page; } /* @@ -997,7 +963,7 @@ up: * This function returns the stable tree node of identical content if found, * NULL otherwise. */ -static struct stable_node *stable_tree_search(struct page *page) +static struct page *stable_tree_search(struct page *page) { struct rb_node *node = root_stable_tree.rb_node; struct stable_node *stable_node; @@ -1005,7 +971,7 @@ static struct stable_node *stable_tree_search(struct page *page) stable_node = page_stable_node(page); if (stable_node) { /* ksm page forked */ get_page(page); - return stable_node; + return page; } while (node) { @@ -1027,7 +993,7 @@ static struct stable_node *stable_tree_search(struct page *page) put_page(tree_page); node = node->rb_right; } else - return stable_node; + return tree_page; } return NULL; @@ -1083,7 +1049,7 @@ static struct stable_node *stable_tree_insert(struct page *kpage) INIT_HLIST_HEAD(&stable_node->hlist); - stable_node->page = kpage; + stable_node->kpfn = page_to_pfn(kpage); set_page_stable_node(kpage, stable_node); return stable_node; @@ -1194,9 +1160,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) remove_rmap_item_from_tree(rmap_item); /* We first start with searching the page inside the stable tree */ - stable_node = stable_tree_search(page); - if (stable_node) { - kpage = stable_node->page; + kpage = stable_tree_search(page); + if (kpage) { err = try_to_merge_with_ksm_page(rmap_item, page, kpage); if (!err) { /* @@ -1204,7 +1169,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) * add its rmap_item to the stable tree. */ lock_page(kpage); - stable_tree_append(rmap_item, stable_node); + stable_tree_append(rmap_item, page_stable_node(kpage)); unlock_page(kpage); } put_page(kpage); @@ -1244,7 +1209,6 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) stable_tree_append(rmap_item, stable_node); } unlock_page(kpage); - put_page(kpage); /* * If we fail to insert the page into the stable tree, @@ -1681,6 +1645,122 @@ out: return ret; } +#ifdef CONFIG_MIGRATION +int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, + struct vm_area_struct *, unsigned long, void *), void *arg) +{ + struct stable_node *stable_node; + struct hlist_node *hlist; + struct rmap_item *rmap_item; + int ret = SWAP_AGAIN; + int search_new_forks = 0; + + VM_BUG_ON(!PageKsm(page)); + VM_BUG_ON(!PageLocked(page)); + + stable_node = page_stable_node(page); + if (!stable_node) + return ret; +again: + hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + struct anon_vma *anon_vma = rmap_item->anon_vma; + struct vm_area_struct *vma; + + spin_lock(&anon_vma->lock); + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { + if (rmap_item->address < vma->vm_start || + rmap_item->address >= vma->vm_end) + continue; + /* + * Initially we examine only the vma which covers this + * rmap_item; but later, if there is still work to do, + * we examine covering vmas in other mms: in case they + * were forked from the original since ksmd passed. + */ + if ((rmap_item->mm == vma->vm_mm) == search_new_forks) + continue; + + ret = rmap_one(page, vma, rmap_item->address, arg); + if (ret != SWAP_AGAIN) { + spin_unlock(&anon_vma->lock); + goto out; + } + } + spin_unlock(&anon_vma->lock); + } + if (!search_new_forks++) + goto again; +out: + return ret; +} + +void ksm_migrate_page(struct page *newpage, struct page *oldpage) +{ + struct stable_node *stable_node; + + VM_BUG_ON(!PageLocked(oldpage)); + VM_BUG_ON(!PageLocked(newpage)); + VM_BUG_ON(newpage->mapping != oldpage->mapping); + + stable_node = page_stable_node(newpage); + if (stable_node) { + VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); + stable_node->kpfn = page_to_pfn(newpage); + } +} +#endif /* CONFIG_MIGRATION */ + +#ifdef CONFIG_MEMORY_HOTREMOVE +static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn, + unsigned long end_pfn) +{ + struct rb_node *node; + + for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) { + struct stable_node *stable_node; + + stable_node = rb_entry(node, struct stable_node, node); + if (stable_node->kpfn >= start_pfn && + stable_node->kpfn < end_pfn) + return stable_node; + } + return NULL; +} + +static int ksm_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + struct memory_notify *mn = arg; + struct stable_node *stable_node; + + switch (action) { + case MEM_GOING_OFFLINE: + /* + * Keep it very simple for now: just lock out ksmd and + * MADV_UNMERGEABLE while any memory is going offline. + */ + mutex_lock(&ksm_thread_mutex); + break; + + case MEM_OFFLINE: + /* + * Most of the work is done by page migration; but there might + * be a few stable_nodes left over, still pointing to struct + * pages which have been offlined: prune those from the tree. + */ + while ((stable_node = ksm_check_stable_tree(mn->start_pfn, + mn->start_pfn + mn->nr_pages)) != NULL) + remove_node_from_stable_tree(stable_node); + /* fallthrough */ + + case MEM_CANCEL_OFFLINE: + mutex_unlock(&ksm_thread_mutex); + break; + } + return NOTIFY_OK; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ + #ifdef CONFIG_SYSFS /* * This all compiles without CONFIG_SYSFS, but is a waste of space. @@ -1759,8 +1839,8 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, /* * KSM_RUN_MERGE sets ksmd running, and 0 stops it running. * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items, - * breaking COW to free the unswappable pages_shared (but leaves - * mm_slots on the list for when ksmd may be set running again). + * breaking COW to free the pages_shared (but leaves mm_slots + * on the list for when ksmd may be set running again). */ mutex_lock(&ksm_thread_mutex); @@ -1785,29 +1865,6 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, } KSM_ATTR(run); -static ssize_t max_kernel_pages_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - int err; - unsigned long nr_pages; - - err = strict_strtoul(buf, 10, &nr_pages); - if (err) - return -EINVAL; - - ksm_max_kernel_pages = nr_pages; - - return count; -} - -static ssize_t max_kernel_pages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%lu\n", ksm_max_kernel_pages); -} -KSM_ATTR(max_kernel_pages); - static ssize_t pages_shared_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -1857,7 +1914,6 @@ static struct attribute *ksm_attrs[] = { &sleep_millisecs_attr.attr, &pages_to_scan_attr.attr, &run_attr.attr, - &max_kernel_pages_attr.attr, &pages_shared_attr.attr, &pages_sharing_attr.attr, &pages_unshared_attr.attr, @@ -1877,8 +1933,6 @@ static int __init ksm_init(void) struct task_struct *ksm_thread; int err; - ksm_max_kernel_pages = totalram_pages / 4; - err = ksm_slab_init(); if (err) goto out; @@ -1906,6 +1960,13 @@ static int __init ksm_init(void) #endif /* CONFIG_SYSFS */ +#ifdef CONFIG_MEMORY_HOTREMOVE + /* + * Choose a high priority since the callback takes ksm_thread_mutex: + * later callbacks could only be taking locks which nest within that. + */ + hotplug_memory_notifier(ksm_memory_callback, 100); +#endif return 0; out_free2: