mmap: don't return ENOMEM when mapcount is temporarily exceeded in munmap()

author KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Tue, 15 Dec 2009 01:57:56 +0000 (17:57 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 15 Dec 2009 16:53:11 +0000 (08:53 -0800)
author KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tue, 15 Dec 2009 01:57:56 +0000 (17:57 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Dec 2009 16:53:11 +0000 (08:53 -0800)
diff --git a/mm/mmap.c b/mm/mmap.c

index ed70a68..02c09f3 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1811,10 +1811,10 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
  }
  
  /*
- * Split a vma into two pieces at address 'addr', a new vma is allocated
- * either for the first part or the tail.
+ * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
+ * munmap path where it doesn't make sense to fail.
   */
-int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
               unsigned long addr, int new_below)
  {
         struct mempolicy *pol;
@@ -1824,9 +1824,6 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                                         ~(huge_page_mask(hstate_vma(vma)))))
                 return -EINVAL;
  
-       if (mm->map_count >= sysctl_max_map_count)
-               return -ENOMEM;
-
         new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
         if (!new)
                 return -ENOMEM;
@@ -1866,6 +1863,19 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
         return 0;
  }
  
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the tail.
+ */
+int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+             unsigned long addr, int new_below)
+{
+       if (mm->map_count >= sysctl_max_map_count)
+               return -ENOMEM;
+
+       return __split_vma(mm, vma, addr, new_below);
+}
+
  /* Munmap is split into 2 main parts -- this part which finds
   * what needs doing, and the areas themselves, which do the
   * work.  This now handles partial unmappings.
@@ -1901,7 +1911,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
          * places tmp vma above, and higher split_vma places tmp vma below.
          */
         if (start > vma->vm_start) {
-               int error = split_vma(mm, vma, start, 0);
+               int error;
+
+               /*
+                * Make sure that map_count on return from munmap() will
+                * not exceed its limit; but let map_count go just above
+                * its limit temporarily, to help free resources as expected.
+                */
+               if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
+                       return -ENOMEM;
+
+               error = __split_vma(mm, vma, start, 0);
                 if (error)
                         return error;
                 prev = vma;
@@ -1910,7 +1930,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
         /* Does it split the last one? */
         last = find_vma(mm, end);
         if (last && end > last->vm_start) {
-               int error = split_vma(mm, last, end, 1);
+               int error = __split_vma(mm, last, end, 1);
                 if (error)
                         return error;
         }
author	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
	Tue, 15 Dec 2009 01:57:56 +0000 (17:57 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 15 Dec 2009 16:53:11 +0000 (08:53 -0800)