ksm: fix deadlock with munlock in exit_mmap
[safe/jmp/linux-2.6] / mm / nommu.c
index 0c3e7d2..66e81e7 100644 (file)
@@ -62,14 +62,14 @@ void *high_memory;
 struct page *mem_map;
 unsigned long max_mapnr;
 unsigned long num_physpages;
-atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
+struct percpu_counter vm_committed_as;
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50; /* default is 50% */
 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
-int sysctl_nr_trim_pages = 1; /* page trimming behaviour */
+int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
 int heap_stack_gap = 0;
 
-atomic_t mmap_pages_allocated;
+atomic_long_t mmap_pages_allocated;
 
 EXPORT_SYMBOL(mem_map);
 EXPORT_SYMBOL(num_physpages);
@@ -170,8 +170,8 @@ unsigned int kobjsize(const void *objp)
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-                    unsigned long start, int len, int flags,
-               struct page **pages, struct vm_area_struct **vmas)
+                    unsigned long start, int nr_pages, int flags,
+                    struct page **pages, struct vm_area_struct **vmas)
 {
        struct vm_area_struct *vma;
        unsigned long vm_flags;
@@ -186,7 +186,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
        vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 
-       for (i = 0; i < len; i++) {
+       for (i = 0; i < nr_pages; i++) {
                vma = find_vma(mm, start);
                if (!vma)
                        goto finish_or_fault;
@@ -221,7 +221,7 @@ finish_or_fault:
  * - don't permit access to VMAs that don't support it, such as I/O mappings
  */
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-       unsigned long start, int len, int write, int force,
+       unsigned long start, int nr_pages, int write, int force,
        struct page **pages, struct vm_area_struct **vmas)
 {
        int flags = 0;
@@ -231,12 +231,31 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        if (force)
                flags |= GUP_FLAGS_FORCE;
 
-       return __get_user_pages(tsk, mm,
-                               start, len, flags,
-                               pages, vmas);
+       return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
 EXPORT_SYMBOL(get_user_pages);
 
+/**
+ * follow_pfn - look up PFN at a user virtual address
+ * @vma: memory mapping
+ * @address: user virtual address
+ * @pfn: location to store found PFN
+ *
+ * Only IO mappings and raw PFN mappings are allowed.
+ *
+ * Returns zero and the pfn at @pfn on success, -ve otherwise.
+ */
+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+       unsigned long *pfn)
+{
+       if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+               return -EINVAL;
+
+       *pfn = address >> PAGE_SHIFT;
+       return 0;
+}
+EXPORT_SYMBOL(follow_pfn);
+
 DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
 
@@ -463,12 +482,11 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
  */
 void __init mmap_init(void)
 {
-       vm_region_jar = kmem_cache_create("vm_region_jar",
-                                         sizeof(struct vm_region), 0,
-                                         SLAB_PANIC, NULL);
-       vm_area_cachep = kmem_cache_create("vm_area_struct",
-                                          sizeof(struct vm_area_struct), 0,
-                                          SLAB_PANIC, NULL);
+       int ret;
+
+       ret = percpu_counter_init(&vm_committed_as, 0);
+       VM_BUG_ON(ret);
+       vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC);
 }
 
 /*
@@ -486,27 +504,24 @@ static noinline void validate_nommu_regions(void)
                return;
 
        last = rb_entry(lastp, struct vm_region, vm_rb);
-       if (unlikely(last->vm_end <= last->vm_start))
-               BUG();
-       if (unlikely(last->vm_top < last->vm_end))
-               BUG();
+       BUG_ON(unlikely(last->vm_end <= last->vm_start));
+       BUG_ON(unlikely(last->vm_top < last->vm_end));
 
        while ((p = rb_next(lastp))) {
                region = rb_entry(p, struct vm_region, vm_rb);
                last = rb_entry(lastp, struct vm_region, vm_rb);
 
-               if (unlikely(region->vm_end <= region->vm_start))
-                       BUG();
-               if (unlikely(region->vm_top < region->vm_end))
-                       BUG();
-               if (unlikely(region->vm_start < last->vm_top))
-                       BUG();
+               BUG_ON(unlikely(region->vm_end <= region->vm_start));
+               BUG_ON(unlikely(region->vm_top < region->vm_end));
+               BUG_ON(unlikely(region->vm_start < last->vm_top));
 
                lastp = p;
        }
 }
 #else
-#define validate_nommu_regions() do {} while(0)
+static void validate_nommu_regions(void)
+{
+}
 #endif
 
 /*
@@ -519,8 +534,6 @@ static void add_nommu_region(struct vm_region *region)
 
        validate_nommu_regions();
 
-       BUG_ON(region->vm_start & ~PAGE_MASK);
-
        parent = NULL;
        p = &nommu_region_tree.rb_node;
        while (*p) {
@@ -563,16 +576,17 @@ static void free_page_series(unsigned long from, unsigned long to)
                struct page *page = virt_to_page(from);
 
                kdebug("- free %lx", from);
-               atomic_dec(&mmap_pages_allocated);
+               atomic_long_dec(&mmap_pages_allocated);
                if (page_count(page) != 1)
-                       kdebug("free page %p [%d]", page, page_count(page));
+                       kdebug("free page %p: refcount not one: %d",
+                              page, page_count(page));
                put_page(page);
        }
 }
 
 /*
  * release a reference to a region
- * - the caller must hold the region semaphore, which this releases
+ * - the caller must hold the region semaphore for writing, which this releases
  * - the region may not have been added to the tree yet, in which case vm_top
  *   will equal vm_start
  */
@@ -905,6 +919,10 @@ static int validate_mmap_request(struct file *file,
                if (!file->f_op->read)
                        capabilities &= ~BDI_CAP_MAP_COPY;
 
+               /* The file shall have been opened with read permission. */
+               if (!(file->f_mode & FMODE_READ))
+                       return -EACCES;
+
                if (flags & MAP_SHARED) {
                        /* do checks for writing, appending and locking */
                        if ((prot & PROT_WRITE) &&
@@ -1096,7 +1114,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
                goto enomem;
 
        total = 1 << order;
-       atomic_add(total, &mmap_pages_allocated);
+       atomic_long_add(total, &mmap_pages_allocated);
 
        point = rlen >> PAGE_SHIFT;
 
@@ -1107,7 +1125,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
                        order = ilog2(total - point);
                        n = 1 << order;
                        kdebug("shave %lu/%lu @%lu", n, total - point, total);
-                       atomic_sub(n, &mmap_pages_allocated);
+                       atomic_long_sub(n, &mmap_pages_allocated);
                        total -= n;
                        set_page_refcounted(pages + total);
                        __free_pages(pages + total, order);
@@ -1161,8 +1179,8 @@ error_free:
        return ret;
 
 enomem:
-       printk("Allocation of length %lu from process %d failed\n",
-              len, current->pid);
+       printk("Allocation of length %lu from process %d (%s) failed\n",
+              len, current->pid, current->comm);
        show_free_areas();
        return -ENOMEM;
 }
@@ -1334,6 +1352,7 @@ unsigned long do_mmap_pgoff(struct file *file,
        }
 
        vma->vm_region = region;
+       add_nommu_region(region);
 
        /* set up the mapping */
        if (file && vma->vm_flags & VM_SHARED)
@@ -1343,8 +1362,6 @@ unsigned long do_mmap_pgoff(struct file *file,
        if (ret < 0)
                goto error_put_region;
 
-       add_nommu_region(region);
-
        /* okay... we have a mapping; now we have to register it */
        result = vma->vm_start;
 
@@ -1536,10 +1553,15 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
        /* find the first potentially overlapping VMA */
        vma = find_vma(mm, start);
        if (!vma) {
-               printk(KERN_WARNING
-                      "munmap of memory not mmapped by process %d (%s):"
-                      " 0x%lx-0x%lx\n",
-                      current->pid, current->comm, start, start + len - 1);
+               static int limit = 0;
+               if (limit < 5) {
+                       printk(KERN_WARNING
+                              "munmap of memory not mmapped by process %d"
+                              " (%s): 0x%lx-0x%lx\n",
+                              current->pid, current->comm,
+                              start, start + len - 1);
+                       limit++;
+               }
                return -EINVAL;
        }
 
@@ -1849,12 +1871,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
        if (mm)
                allowed -= mm->total_vm / 32;
 
-       /*
-        * cast `allowed' as a signed long because vm_committed_space
-        * sometimes has a negative value
-        */
-       if (atomic_long_read(&vm_committed_space) < (long)allowed)
+       if (percpu_counter_read_positive(&vm_committed_as) < allowed)
                return 0;
+
 error:
        vm_unacct_memory(pages);