Merge branches 'core/futexes' and 'core/iommu' into core/urgent

[safe/jmp/linux-2.6] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index b6d74b3..ee22989 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -20,7 +20,6 @@
  #include <linux/fs.h>
  #include <linux/personality.h>
  #include <linux/security.h>
-#include <linux/ima.h>
  #include <linux/hugetlb.h>
  #include <linux/profile.h>
  #include <linux/module.h>
@@ -932,13 +931,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         if (!(flags & MAP_FIXED))
                 addr = round_hint_to_min(addr);
  
-       error = arch_mmap_check(addr, len, flags);
-       if (error)
-               return error;
-
         /* Careful about overflows.. */
         len = PAGE_ALIGN(len);
-       if (!len || len > TASK_SIZE)
+       if (!len)
                 return -ENOMEM;
  
         /* offset overflow? */
@@ -1043,14 +1038,51 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
         if (error)
                 return error;
-       error = ima_file_mmap(file, prot);
-       if (error)
-               return error;
  
         return mmap_region(file, addr, len, flags, vm_flags, pgoff);
  }
  EXPORT_SYMBOL(do_mmap_pgoff);
  
+SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
+               unsigned long, prot, unsigned long, flags,
+               unsigned long, fd, unsigned long, pgoff)
+{
+       struct file *file = NULL;
+       unsigned long retval = -EBADF;
+
+       if (!(flags & MAP_ANONYMOUS)) {
+               if (unlikely(flags & MAP_HUGETLB))
+                       return -EINVAL;
+               file = fget(fd);
+               if (!file)
+                       goto out;
+       } else if (flags & MAP_HUGETLB) {
+               struct user_struct *user = NULL;
+               /*
+                * VM_NORESERVE is used because the reservations will be
+                * taken when vm_ops->mmap() is called
+                * A dummy user value is used because we are not locking
+                * memory so no accounting is necessary
+                */
+               len = ALIGN(len, huge_page_size(&default_hstate));
+               file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
+                                               &user, HUGETLB_ANONHUGE_INODE);
+               if (IS_ERR(file))
+                       return PTR_ERR(file);
+       }
+
+       flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+
+       down_write(&current->mm->mmap_sem);
+       retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+       up_write(&current->mm->mmap_sem);
+
+       if (file)
+               fput(file);
+out:
+       return retval;
+}
+
  /*
   * Some shared mappigns will want the pages marked read-only
   * to track write events. If so, we'll downgrade vm_page_prot
@@ -1191,23 +1223,35 @@ munmap_back:
                         goto unmap_and_free_vma;
                 if (vm_flags & VM_EXECUTABLE)
                         added_exe_file_vma(mm);
+
+               /* Can addr have changed??
+                *
+                * Answer: Yes, several device drivers can do it in their
+                *         f_op->mmap method. -DaveM
+                */
+               addr = vma->vm_start;
+               pgoff = vma->vm_pgoff;
+               vm_flags = vma->vm_flags;
         } else if (vm_flags & VM_SHARED) {
                 error = shmem_zero_setup(vma);
                 if (error)
                         goto free_vma;
         }
  
-       /* Can addr have changed??
-        *
-        * Answer: Yes, several device drivers can do it in their
-        *         f_op->mmap method. -DaveM
-        */
-       addr = vma->vm_start;
-       pgoff = vma->vm_pgoff;
-       vm_flags = vma->vm_flags;
+       if (vma_wants_writenotify(vma)) {
+               pgprot_t pprot = vma->vm_page_prot;
  
-       if (vma_wants_writenotify(vma))
+               /* Can vma->vm_page_prot have changed??
+                *
+                * Answer: Yes, drivers may have changed it in their
+                *         f_op->mmap method.
+                *
+                * Ensures that vmas marked as uncached stay that way.
+                */
                 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
+               if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
+                       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       }
  
         vma_link(mm, vma, prev, rb_link, rb_parent);
         file = vma->vm_file;
@@ -1441,6 +1485,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
         unsigned long (*get_area)(struct file *, unsigned long,
                                   unsigned long, unsigned long, unsigned long);
  
+       unsigned long error = arch_mmap_check(addr, len, flags);
+       if (error)
+               return error;
+
+       /* Careful about overflows.. */
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+
         get_area = current->mm->get_unmapped_area;
         if (file && file->f_op && file->f_op->get_unmapped_area)
                 get_area = file->f_op->get_unmapped_area;
@@ -1811,10 +1863,10 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
  }
  
  /*
- * Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the tail.
+ * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
+ * munmap path where it doesn't make sense to fail.
   */
-int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
               unsigned long addr, int new_below)
  {
         struct mempolicy *pol;
@@ -1824,9 +1876,6 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                                         ~(huge_page_mask(hstate_vma(vma)))))
                 return -EINVAL;
  
-       if (mm->map_count >= sysctl_max_map_count)
-               return -ENOMEM;
-
         new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
         if (!new)
                 return -ENOMEM;
@@ -1866,6 +1915,19 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
         return 0;
  }
  
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the tail.
+ */
+int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+             unsigned long addr, int new_below)
+{
+       if (mm->map_count >= sysctl_max_map_count)
+               return -ENOMEM;
+
+       return __split_vma(mm, vma, addr, new_below);
+}
+
  /* Munmap is split into 2 main parts -- this part which finds
   * what needs doing, and the areas themselves, which do the
   * work.  This now handles partial unmappings.
@@ -1901,7 +1963,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
          * places tmp vma above, and higher split_vma places tmp vma below.
          */
         if (start > vma->vm_start) {
-               int error = split_vma(mm, vma, start, 0);
+               int error;
+
+               /*
+                * Make sure that map_count on return from munmap() will
+                * not exceed its limit; but let map_count go just above
+                * its limit temporarily, to help free resources as expected.
+                */
+               if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
+                       return -ENOMEM;
+
+               error = __split_vma(mm, vma, start, 0);
                 if (error)
                         return error;
                 prev = vma;
@@ -1910,7 +1982,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
         /* Does it split the last one? */
         last = find_vma(mm, end);
         if (last && end > last->vm_start) {
-               int error = split_vma(mm, last, end, 1);
+               int error = __split_vma(mm, last, end, 1);
                 if (error)
                         return error;
         }
@@ -1985,20 +2057,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
         if (!len)
                 return addr;
  
-       if ((addr + len) > TASK_SIZE || (addr + len) < addr)
-               return -EINVAL;
-
-       if (is_hugepage_only_range(mm, addr, len))
-               return -EINVAL;
-
         error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
         if (error)
                 return error;
  
         flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
  
-       error = arch_mmap_check(addr, len, flags);
-       if (error)
+       error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
+       if (error & ~PAGE_MASK)
                 return error;
  
         /*
@@ -2264,7 +2330,7 @@ static void special_mapping_close(struct vm_area_struct *vma)
  {
  }
  
-static struct vm_operations_struct special_mapping_vmops = {
+static const struct vm_operations_struct special_mapping_vmops = {
         .close = special_mapping_close,
         .fault = special_mapping_fault,
  };