HWPOISON: return early on non-LRU pages

[safe/jmp/linux-2.6] / mm / mremap.c
diff --git a/mm/mremap.c b/mm/mremap.c

index b535438..97bff25 100644 (file)
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -3,7 +3,7 @@
   *
   *     (C) Copyright 1996 Linus Torvalds
   *
- *     Address space accounting code   <alan@redhat.com>
+ *     Address space accounting code   <alan@lxorguk.ukuu.org.uk>
   *     (C) Copyright 2002 Red Hat Inc, All Rights Reserved
   */
  
@@ -11,17 +11,22 @@
  #include <linux/hugetlb.h>
  #include <linux/slab.h>
  #include <linux/shm.h>
+#include <linux/ksm.h>
  #include <linux/mman.h>
  #include <linux/swap.h>
+#include <linux/capability.h>
  #include <linux/fs.h>
  #include <linux/highmem.h>
  #include <linux/security.h>
  #include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
  
  #include <asm/uaccess.h>
  #include <asm/cacheflush.h>
  #include <asm/tlbflush.h>
  
+#include "internal.h"
+
  static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
  {
         pgd_t *pgd;
@@ -73,12 +78,16 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
         struct mm_struct *mm = vma->vm_mm;
         pte_t *old_pte, *new_pte, pte;
         spinlock_t *old_ptl, *new_ptl;
+       unsigned long old_start;
  
+       old_start = old_addr;
+       mmu_notifier_invalidate_range_start(vma->vm_mm,
+                                           old_start, old_end);
         if (vma->vm_file) {
                 /*
                  * Subtle point from Rajesh Venkatasubramanian: before
-                * moving file-based ptes, we must lock vmtruncate out,
-                * since it might clean the dst vma before the src vma,
+                * moving file-based ptes, we must lock truncate_pagecache
+                * out, since it might clean the dst vma before the src vma,
                  * and we propagate stale pages into the dst afterward.
                  */
                 mapping = vma->vm_file->f_mapping;
@@ -96,29 +105,31 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
         new_pte = pte_offset_map_nested(new_pmd, new_addr);
         new_ptl = pte_lockptr(mm, new_pmd);
         if (new_ptl != old_ptl)
-               spin_lock(new_ptl);
+               spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+       arch_enter_lazy_mmu_mode();
  
         for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
                                    new_pte++, new_addr += PAGE_SIZE) {
                 if (pte_none(*old_pte))
                         continue;
                 pte = ptep_clear_flush(vma, old_addr, old_pte);
-               /* ZERO_PAGE can be dependant on virtual addr */
                 pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
                 set_pte_at(mm, new_addr, new_pte, pte);
         }
  
+       arch_leave_lazy_mmu_mode();
         if (new_ptl != old_ptl)
                 spin_unlock(new_ptl);
         pte_unmap_nested(new_pte - 1);
         pte_unmap_unlock(old_pte - 1, old_ptl);
         if (mapping)
                 spin_unlock(&mapping->i_mmap_lock);
+       mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
  }
  
  #define LATENCY_LIMIT  (64 * PAGE_SIZE)
  
-static unsigned long move_page_tables(struct vm_area_struct *vma,
+unsigned long move_page_tables(struct vm_area_struct *vma,
                 unsigned long old_addr, struct vm_area_struct *new_vma,
                 unsigned long new_addr, unsigned long len)
  {
@@ -164,6 +175,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
         unsigned long excess = 0;
         unsigned long hiwater_vm;
         int split = 0;
+       int err;
  
         /*
          * We'd prefer to avoid failure later on in do_munmap:
@@ -172,6 +184,18 @@ static unsigned long move_vma(struct vm_area_struct *vma,
         if (mm->map_count >= sysctl_max_map_count - 3)
                 return -ENOMEM;
  
+       /*
+        * Advise KSM to break any KSM pages in the area to be moved:
+        * it would be confusing if they were to turn up at the new
+        * location, where they happen to coincide with different KSM
+        * pages recently unmapped.  But leave vma->vm_flags as it was,
+        * so KSM can come around to merge on vma and new_vma afterwards.
+        */
+       err = ksm_madvise(vma, old_addr, old_addr + old_len,
+                                               MADV_UNMERGEABLE, &vm_flags);
+       if (err)
+               return err;
+
         new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
         new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
         if (!new_vma)
@@ -230,8 +254,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
         if (vm_flags & VM_LOCKED) {
                 mm->locked_vm += new_len >> PAGE_SHIFT;
                 if (new_len > old_len)
-                       make_pages_present(new_addr + old_len,
-                                          new_addr + new_len);
+                       mlock_vma_pages_range(new_vma, new_addr + old_len,
+                                                      new_addr + new_len);
         }
  
         return new_addr;
@@ -289,6 +313,10 @@ unsigned long do_mremap(unsigned long addr,
                 if ((addr <= new_addr) && (addr+old_len) > new_addr)
                         goto out;
  
+               ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
+               if (ret)
+                       goto out;
+
                 ret = do_munmap(mm, new_addr, new_len);
                 if (ret)
                         goto out;
@@ -323,7 +351,7 @@ unsigned long do_mremap(unsigned long addr,
         /* We can't remap across vm area boundaries */
         if (old_len > vma->vm_end - addr)
                 goto out;
-       if (vma->vm_flags & VM_DONTEXPAND) {
+       if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
                 if (new_len > old_len)
                         goto out;
         }
@@ -367,7 +395,7 @@ unsigned long do_mremap(unsigned long addr,
                         vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
                         if (vma->vm_flags & VM_LOCKED) {
                                 mm->locked_vm += pages;
-                               make_pages_present(addr + old_len,
+                               mlock_vma_pages_range(vma, addr + old_len,
                                                    addr + new_len);
                         }
                         ret = addr;
@@ -388,8 +416,13 @@ unsigned long do_mremap(unsigned long addr,
  
                         new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
                                                 vma->vm_pgoff, map_flags);
-                       ret = new_addr;
-                       if (new_addr & ~PAGE_MASK)
+                       if (new_addr & ~PAGE_MASK) {
+                               ret = new_addr;
+                               goto out;
+                       }
+
+                       ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
+                       if (ret)
                                 goto out;
                 }
                 ret = move_vma(vma, addr, old_len, new_len, new_addr);
@@ -401,9 +434,9 @@ out_nc:
         return ret;
  }
  
-asmlinkage unsigned long sys_mremap(unsigned long addr,
-       unsigned long old_len, unsigned long new_len,
-       unsigned long flags, unsigned long new_addr)
+SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
+               unsigned long, new_len, unsigned long, flags,
+               unsigned long, new_addr)
  {
         unsigned long ret;