#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/mempolicy.h>
+#include <linux/page-isolation.h>
#include <linux/hugetlb.h>
#include <linux/sched.h>
+#include <linux/ksm.h>
/*
* Any behaviour which results in changes to the vma->vm_flags needs to
struct mm_struct * mm = vma->vm_mm;
int error = 0;
pgoff_t pgoff;
- int new_flags = vma->vm_flags;
+ unsigned long new_flags = vma->vm_flags;
switch (behavior) {
case MADV_NORMAL:
new_flags |= VM_DONTCOPY;
break;
case MADV_DOFORK:
+ if (vma->vm_flags & VM_IO) {
+ error = -EINVAL;
+ goto out;
+ }
new_flags &= ~VM_DONTCOPY;
break;
+ case MADV_MERGEABLE:
+ case MADV_UNMERGEABLE:
+ error = ksm_madvise(vma, start, end, behavior, &new_flags);
+ if (error)
+ goto out;
+ break;
}
if (new_flags == vma->vm_flags) {
end = vma->vm_end;
end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- force_page_cache_readahead(file->f_mapping,
- file, start, max_sane_readahead(end - start));
+ force_page_cache_readahead(file->f_mapping, file, start, end - start);
return 0;
}
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
* data it wants to keep. Be sure to free swap resources too. The
- * zap_page_range call sets things up for refill_inactive to actually free
+ * zap_page_range call sets things up for shrink_active_list to actually free
* these pages later if no one else has touched them in the meantime,
* although we could add these pages to a global reuse list for
- * refill_inactive to pick up before reclaiming other pages.
+ * shrink_active_list to pick up before reclaiming other pages.
*
* NB: This interface discards data rather than pushes it out to swap,
* as some implementations do. This has performance implications for
return error;
}
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
+{
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ for (; start < end; start += PAGE_SIZE) {
+ struct page *p;
+ int ret = get_user_pages_fast(start, 1, 0, &p);
+ if (ret != 1)
+ return ret;
+ if (bhv == MADV_SOFT_OFFLINE) {
+ printk(KERN_INFO "Soft offlining page %lx at %lx\n",
+ page_to_pfn(p), start);
+ ret = soft_offline_page(p, MF_COUNT_INCREASED);
+ if (ret)
+ break;
+ continue;
+ }
+ printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+ page_to_pfn(p), start);
+ /* Ignore return value for now */
+ __memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
+ }
+ return ret;
+}
+#endif
+
static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
{
- long error;
+ switch (behavior) {
+ case MADV_REMOVE:
+ return madvise_remove(vma, prev, start, end);
+ case MADV_WILLNEED:
+ return madvise_willneed(vma, prev, start, end);
+ case MADV_DONTNEED:
+ return madvise_dontneed(vma, prev, start, end);
+ default:
+ return madvise_behavior(vma, prev, start, end, behavior);
+ }
+}
+static int
+madvise_behavior_valid(int behavior)
+{
switch (behavior) {
case MADV_DOFORK:
- if (vma->vm_flags & VM_IO) {
- error = -EINVAL;
- break;
- }
case MADV_DONTFORK:
case MADV_NORMAL:
case MADV_SEQUENTIAL:
case MADV_RANDOM:
- error = madvise_behavior(vma, prev, start, end, behavior);
- break;
case MADV_REMOVE:
- error = madvise_remove(vma, prev, start, end);
- break;
-
case MADV_WILLNEED:
- error = madvise_willneed(vma, prev, start, end);
- break;
-
case MADV_DONTNEED:
- error = madvise_dontneed(vma, prev, start, end);
- break;
+#ifdef CONFIG_KSM
+ case MADV_MERGEABLE:
+ case MADV_UNMERGEABLE:
+#endif
+ return 1;
default:
- error = -EINVAL;
- break;
+ return 0;
}
- return error;
}
/*
* so the kernel can free resources associated with it.
* MADV_REMOVE - the application wants to free up the given range of
* pages and associated backing store.
+ * MADV_DONTFORK - omit this area from child's address space when forking:
+ * typically, to avoid COWing pages pinned by get_user_pages().
+ * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
+ * MADV_MERGEABLE - the application recommends that KSM try to merge pages in
+ * this area with pages of identical content from other such areas.
+ * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
*
* return values:
* zero - success
* -EBADF - map exists, but area maps something that isn't a file.
* -EAGAIN - a kernel resource was temporarily unavailable.
*/
-asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
+SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
{
unsigned long end, tmp;
struct vm_area_struct * vma, *prev;
int write;
size_t len;
+#ifdef CONFIG_MEMORY_FAILURE
+ if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
+ return madvise_hwpoison(behavior, start, start+len_in);
+#endif
+ if (!madvise_behavior_valid(behavior))
+ return error;
+
write = madvise_need_mmap_write(behavior);
if (write)
down_write(¤t->mm->mmap_sem);