mm: show free swap as signed
[safe/jmp/linux-2.6] / mm / mmap.c
index 6aaf657..339cf5c 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
 
+#include "internal.h"
+
 #ifndef arch_mmap_check
 #define arch_mmap_check(addr, len, flags)      (0)
 #endif
@@ -72,15 +75,16 @@ pgprot_t protection_map[16] = {
 
 pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
-       return protection_map[vm_flags &
-                               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+       return __pgprot(pgprot_val(protection_map[vm_flags &
+                               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
+                       pgprot_val(arch_vm_get_page_prot(vm_flags)));
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50;      /* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-atomic_t vm_committed_space = ATOMIC_INIT(0);
+atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
 
 /*
  * Check that a process has enough memory to allocate a new virtual
@@ -177,7 +181,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
         * cast `allowed' as a signed long because vm_committed_space
         * sometimes has a negative value
         */
-       if (atomic_read(&vm_committed_space) < (long)allowed)
+       if (atomic_long_read(&vm_committed_space) < (long)allowed)
                return 0;
 error:
        vm_unacct_memory(pages);
@@ -230,9 +234,12 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
        might_sleep();
        if (vma->vm_ops && vma->vm_ops->close)
                vma->vm_ops->close(vma);
-       if (vma->vm_file)
+       if (vma->vm_file) {
                fput(vma->vm_file);
-       mpol_free(vma_policy(vma));
+               if (vma->vm_flags & VM_EXECUTABLE)
+                       removed_exe_file_vma(vma->vm_mm);
+       }
+       mpol_put(vma_policy(vma));
        kmem_cache_free(vm_area_cachep, vma);
        return next;
 }
@@ -242,10 +249,16 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
        unsigned long rlim, retval;
        unsigned long newbrk, oldbrk;
        struct mm_struct *mm = current->mm;
+       unsigned long min_brk;
 
        down_write(&mm->mmap_sem);
 
-       if (brk < mm->start_brk)
+#ifdef CONFIG_COMPAT_BRK
+       min_brk = mm->end_code;
+#else
+       min_brk = mm->start_brk;
+#endif
+       if (brk < min_brk)
                goto out;
 
        /*
@@ -357,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr,
                if (vma_tmp->vm_end > addr) {
                        vma = vma_tmp;
                        if (vma_tmp->vm_start <= addr)
-                               return vma;
+                               break;
                        __rb_link = &__rb_parent->rb_left;
                } else {
                        rb_prev = __rb_parent;
@@ -623,10 +636,13 @@ again:                    remove_next = 1 + (end > next->vm_end);
                spin_unlock(&mapping->i_mmap_lock);
 
        if (remove_next) {
-               if (file)
+               if (file) {
                        fput(file);
+                       if (next->vm_flags & VM_EXECUTABLE)
+                               removed_exe_file_vma(mm);
+               }
                mm->map_count--;
-               mpol_free(vma_policy(next));
+               mpol_put(vma_policy(next));
                kmem_cache_free(vm_area_cachep, next);
                /*
                 * In mprotect's case 6 (see comments on vma_merge),
@@ -1095,6 +1111,9 @@ munmap_back:
        if (!may_expand_vm(mm, len >> PAGE_SHIFT))
                return -ENOMEM;
 
+       if (flags & MAP_NORESERVE)
+               vm_flags |= VM_NORESERVE;
+
        if (accountable && (!(flags & MAP_NORESERVE) ||
                            sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
                if (vm_flags & VM_SHARED) {
@@ -1154,6 +1173,8 @@ munmap_back:
                error = file->f_op->mmap(file, vma);
                if (error)
                        goto unmap_and_free_vma;
+               if (vm_flags & VM_EXECUTABLE)
+                       added_exe_file_vma(mm);
        } else if (vm_flags & VM_SHARED) {
                error = shmem_zero_setup(vma);
                if (error)
@@ -1182,9 +1203,11 @@ munmap_back:
 
        if (file && vma_merge(mm, prev, addr, vma->vm_end,
                        vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
-               mpol_free(vma_policy(vma));
+               mpol_put(vma_policy(vma));
                kmem_cache_free(vm_area_cachep, vma);
                fput(file);
+               if (vm_flags & VM_EXECUTABLE)
+                       removed_exe_file_vma(mm);
        } else {
                vma_link(mm, vma, prev, rb_link, rb_parent);
                file = vma->vm_file;
@@ -1746,7 +1769,7 @@ static void unmap_region(struct mm_struct *mm,
        update_hiwater_rss(mm);
        unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
        vm_unacct_memory(nr_accounted);
-       free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+       free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
                                 next? next->vm_start: 0);
        tlb_finish_mmu(tlb, start, end);
 }
@@ -1790,7 +1813,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
        struct mempolicy *pol;
        struct vm_area_struct *new;
 
-       if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+       if (is_vm_hugetlb_page(vma) && (addr &
+                                       ~(huge_page_mask(hstate_vma(vma)))))
                return -EINVAL;
 
        if (mm->map_count >= sysctl_max_map_count)
@@ -1810,15 +1834,18 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
        }
 
-       pol = mpol_copy(vma_policy(vma));
+       pol = mpol_dup(vma_policy(vma));
        if (IS_ERR(pol)) {
                kmem_cache_free(vm_area_cachep, new);
                return PTR_ERR(pol);
        }
        vma_set_policy(new, pol);
 
-       if (new->vm_file)
+       if (new->vm_file) {
                get_file(new->vm_file);
+               if (vma->vm_flags & VM_EXECUTABLE)
+                       added_exe_file_vma(mm);
+       }
 
        if (new->vm_ops && new->vm_ops->open)
                new->vm_ops->open(new);
@@ -2035,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
 
        /* mm's last user has gone, and its about to be pulled down */
        arch_exit_mmap(mm);
+       mmu_notifier_release(mm);
 
        lru_add_drain();
        flush_cache_mm(mm);
@@ -2043,7 +2071,7 @@ void exit_mmap(struct mm_struct *mm)
        /* Use -1 here to ensure all VMAs in the mm are unmapped */
        end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
        vm_unacct_memory(nr_accounted);
-       free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+       free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
        tlb_finish_mmu(tlb, 0, end);
 
        /*
@@ -2126,7 +2154,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
                if (new_vma) {
                        *new_vma = *vma;
-                       pol = mpol_copy(vma_policy(vma));
+                       pol = mpol_dup(vma_policy(vma));
                        if (IS_ERR(pol)) {
                                kmem_cache_free(vm_area_cachep, new_vma);
                                return NULL;
@@ -2135,8 +2163,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                        new_vma->vm_start = addr;
                        new_vma->vm_end = addr + len;
                        new_vma->vm_pgoff = pgoff;
-                       if (new_vma->vm_file)
+                       if (new_vma->vm_file) {
                                get_file(new_vma->vm_file);
+                               if (vma->vm_flags & VM_EXECUTABLE)
+                                       added_exe_file_vma(mm);
+                       }
                        if (new_vma->vm_ops && new_vma->vm_ops->open)
                                new_vma->vm_ops->open(new_vma);
                        vma_link(mm, new_vma, prev, rb_link, rb_parent);
@@ -2239,3 +2270,167 @@ int install_special_mapping(struct mm_struct *mm,
 
        return 0;
 }
+
+static DEFINE_MUTEX(mm_all_locks_mutex);
+
+static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
+{
+       if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+               /*
+                * The LSB of head.next can't change from under us
+                * because we hold the mm_all_locks_mutex.
+                */
+               spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
+               /*
+                * We can safely modify head.next after taking the
+                * anon_vma->lock. If some other vma in this mm shares
+                * the same anon_vma we won't take it again.
+                *
+                * No need of atomic instructions here, head.next
+                * can't change from under us thanks to the
+                * anon_vma->lock.
+                */
+               if (__test_and_set_bit(0, (unsigned long *)
+                                      &anon_vma->head.next))
+                       BUG();
+       }
+}
+
+static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
+{
+       if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+               /*
+                * AS_MM_ALL_LOCKS can't change from under us because
+                * we hold the mm_all_locks_mutex.
+                *
+                * Operations on ->flags have to be atomic because
+                * even if AS_MM_ALL_LOCKS is stable thanks to the
+                * mm_all_locks_mutex, there may be other cpus
+                * changing other bitflags in parallel to us.
+                */
+               if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
+                       BUG();
+               spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+       }
+}
+
+/*
+ * This operation locks against the VM for all pte/vma/mm related
+ * operations that could ever happen on a certain mm. This includes
+ * vmtruncate, try_to_unmap, and all page faults.
+ *
+ * The caller must take the mmap_sem in write mode before calling
+ * mm_take_all_locks(). The caller isn't allowed to release the
+ * mmap_sem until mm_drop_all_locks() returns.
+ *
+ * mmap_sem in write mode is required in order to block all operations
+ * that could modify pagetables and free pages without need of
+ * altering the vma layout (for example populate_range() with
+ * nonlinear vmas). It's also needed in write mode to avoid new
+ * anon_vmas to be associated with existing vmas.
+ *
+ * A single task can't take more than one mm_take_all_locks() in a row
+ * or it would deadlock.
+ *
+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * mapping->flags avoid to take the same lock twice, if more than one
+ * vma in this mm is backed by the same anon_vma or address_space.
+ *
+ * We can take all the locks in random order because the VM code
+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * takes more than one of them in a row. Secondly we're protected
+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ *
+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
+ * that may have to take thousand of locks.
+ *
+ * mm_take_all_locks() can fail if it's interrupted by signals.
+ */
+int mm_take_all_locks(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+       int ret = -EINTR;
+
+       BUG_ON(down_read_trylock(&mm->mmap_sem));
+
+       mutex_lock(&mm_all_locks_mutex);
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (signal_pending(current))
+                       goto out_unlock;
+               if (vma->vm_file && vma->vm_file->f_mapping)
+                       vm_lock_mapping(mm, vma->vm_file->f_mapping);
+       }
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (signal_pending(current))
+                       goto out_unlock;
+               if (vma->anon_vma)
+                       vm_lock_anon_vma(mm, vma->anon_vma);
+       }
+
+       ret = 0;
+
+out_unlock:
+       if (ret)
+               mm_drop_all_locks(mm);
+
+       return ret;
+}
+
+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+       if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+               /*
+                * The LSB of head.next can't change to 0 from under
+                * us because we hold the mm_all_locks_mutex.
+                *
+                * We must however clear the bitflag before unlocking
+                * the vma so the users using the anon_vma->head will
+                * never see our bitflag.
+                *
+                * No need of atomic instructions here, head.next
+                * can't change from under us until we release the
+                * anon_vma->lock.
+                */
+               if (!__test_and_clear_bit(0, (unsigned long *)
+                                         &anon_vma->head.next))
+                       BUG();
+               spin_unlock(&anon_vma->lock);
+       }
+}
+
+static void vm_unlock_mapping(struct address_space *mapping)
+{
+       if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+               /*
+                * AS_MM_ALL_LOCKS can't change to 0 from under us
+                * because we hold the mm_all_locks_mutex.
+                */
+               spin_unlock(&mapping->i_mmap_lock);
+               if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
+                                       &mapping->flags))
+                       BUG();
+       }
+}
+
+/*
+ * The mmap_sem cannot be released by the caller until
+ * mm_drop_all_locks() returns.
+ */
+void mm_drop_all_locks(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+
+       BUG_ON(down_read_trylock(&mm->mmap_sem));
+       BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (vma->anon_vma)
+                       vm_unlock_anon_vma(vma->anon_vma);
+               if (vma->vm_file && vma->vm_file->f_mapping)
+                       vm_unlock_mapping(vma->vm_file->f_mapping);
+       }
+
+       mutex_unlock(&mm_all_locks_mutex);
+}