mm: show free swap as signed

[safe/jmp/linux-2.6] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index 6aaf657..339cf5c 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,12 +26,15 @@
  #include <linux/mount.h>
  #include <linux/mempolicy.h>
  #include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
  
  #include <asm/uaccess.h>
  #include <asm/cacheflush.h>
  #include <asm/tlb.h>
  #include <asm/mmu_context.h>
  
+#include "internal.h"
+
  #ifndef arch_mmap_check
  #define arch_mmap_check(addr, len, flags)      (0)
  #endif
@@ -72,15 +75,16 @@ pgprot_t protection_map[16] = {
  
  pgprot_t vm_get_page_prot(unsigned long vm_flags)
  {
-       return protection_map[vm_flags &
-                               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+       return __pgprot(pgprot_val(protection_map[vm_flags &
+                               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
+                       pgprot_val(arch_vm_get_page_prot(vm_flags)));
  }
  EXPORT_SYMBOL(vm_get_page_prot);
  
  int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
  int sysctl_overcommit_ratio = 50;      /* default is 50% */
  int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
-atomic_t vm_committed_space = ATOMIC_INIT(0);
+atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
  
  /*
   * Check that a process has enough memory to allocate a new virtual
@@ -177,7 +181,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
          * cast `allowed' as a signed long because vm_committed_space
          * sometimes has a negative value
          */
-       if (atomic_read(&vm_committed_space) < (long)allowed)
+       if (atomic_long_read(&vm_committed_space) < (long)allowed)
                 return 0;
  error:
         vm_unacct_memory(pages);
@@ -230,9 +234,12 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
         might_sleep();
         if (vma->vm_ops && vma->vm_ops->close)
                 vma->vm_ops->close(vma);
-       if (vma->vm_file)
+       if (vma->vm_file) {
                 fput(vma->vm_file);
-       mpol_free(vma_policy(vma));
+               if (vma->vm_flags & VM_EXECUTABLE)
+                       removed_exe_file_vma(vma->vm_mm);
+       }
+       mpol_put(vma_policy(vma));
         kmem_cache_free(vm_area_cachep, vma);
         return next;
  }
@@ -242,10 +249,16 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
         unsigned long rlim, retval;
         unsigned long newbrk, oldbrk;
         struct mm_struct *mm = current->mm;
+       unsigned long min_brk;
  
         down_write(&mm->mmap_sem);
  
-       if (brk < mm->start_brk)
+#ifdef CONFIG_COMPAT_BRK
+       min_brk = mm->end_code;
+#else
+       min_brk = mm->start_brk;
+#endif
+       if (brk < min_brk)
                 goto out;
  
         /*
@@ -357,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr,
                 if (vma_tmp->vm_end > addr) {
                         vma = vma_tmp;
                         if (vma_tmp->vm_start <= addr)
-                               return vma;
+                               break;
                         __rb_link = &__rb_parent->rb_left;
                 } else {
                         rb_prev = __rb_parent;
@@ -623,10 +636,13 @@ again:                    remove_next = 1 + (end > next->vm_end);
                 spin_unlock(&mapping->i_mmap_lock);
  
         if (remove_next) {
-               if (file)
+               if (file) {
                         fput(file);
+                       if (next->vm_flags & VM_EXECUTABLE)
+                               removed_exe_file_vma(mm);
+               }
                 mm->map_count--;
-               mpol_free(vma_policy(next));
+               mpol_put(vma_policy(next));
                 kmem_cache_free(vm_area_cachep, next);
                 /*
                  * In mprotect's case 6 (see comments on vma_merge),
@@ -1095,6 +1111,9 @@ munmap_back:
         if (!may_expand_vm(mm, len >> PAGE_SHIFT))
                 return -ENOMEM;
  
+       if (flags & MAP_NORESERVE)
+               vm_flags |= VM_NORESERVE;
+
         if (accountable && (!(flags & MAP_NORESERVE) ||
                             sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
                 if (vm_flags & VM_SHARED) {
@@ -1154,6 +1173,8 @@ munmap_back:
                 error = file->f_op->mmap(file, vma);
                 if (error)
                         goto unmap_and_free_vma;
+               if (vm_flags & VM_EXECUTABLE)
+                       added_exe_file_vma(mm);
         } else if (vm_flags & VM_SHARED) {
                 error = shmem_zero_setup(vma);
                 if (error)
@@ -1182,9 +1203,11 @@ munmap_back:
  
         if (file && vma_merge(mm, prev, addr, vma->vm_end,
                         vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
-               mpol_free(vma_policy(vma));
+               mpol_put(vma_policy(vma));
                 kmem_cache_free(vm_area_cachep, vma);
                 fput(file);
+               if (vm_flags & VM_EXECUTABLE)
+                       removed_exe_file_vma(mm);
         } else {
                 vma_link(mm, vma, prev, rb_link, rb_parent);
                 file = vma->vm_file;
@@ -1746,7 +1769,7 @@ static void unmap_region(struct mm_struct *mm,
         update_hiwater_rss(mm);
         unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
-       free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+       free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
                                  next? next->vm_start: 0);
         tlb_finish_mmu(tlb, start, end);
  }
@@ -1790,7 +1813,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
         struct mempolicy *pol;
         struct vm_area_struct *new;
  
-       if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+       if (is_vm_hugetlb_page(vma) && (addr &
+                                       ~(huge_page_mask(hstate_vma(vma)))))
                 return -EINVAL;
  
         if (mm->map_count >= sysctl_max_map_count)
@@ -1810,15 +1834,18 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
         }
  
-       pol = mpol_copy(vma_policy(vma));
+       pol = mpol_dup(vma_policy(vma));
         if (IS_ERR(pol)) {
                 kmem_cache_free(vm_area_cachep, new);
                 return PTR_ERR(pol);
         }
         vma_set_policy(new, pol);
  
-       if (new->vm_file)
+       if (new->vm_file) {
                 get_file(new->vm_file);
+               if (vma->vm_flags & VM_EXECUTABLE)
+                       added_exe_file_vma(mm);
+       }
  
         if (new->vm_ops && new->vm_ops->open)
                 new->vm_ops->open(new);
@@ -2035,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
  
         /* mm's last user has gone, and its about to be pulled down */
         arch_exit_mmap(mm);
+       mmu_notifier_release(mm);
  
         lru_add_drain();
         flush_cache_mm(mm);
@@ -2043,7 +2071,7 @@ void exit_mmap(struct mm_struct *mm)
         /* Use -1 here to ensure all VMAs in the mm are unmapped */
         end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
-       free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+       free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
         tlb_finish_mmu(tlb, 0, end);
  
         /*
@@ -2126,7 +2154,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
                 if (new_vma) {
                         *new_vma = *vma;
-                       pol = mpol_copy(vma_policy(vma));
+                       pol = mpol_dup(vma_policy(vma));
                         if (IS_ERR(pol)) {
                                 kmem_cache_free(vm_area_cachep, new_vma);
                                 return NULL;
@@ -2135,8 +2163,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                         new_vma->vm_start = addr;
                         new_vma->vm_end = addr + len;
                         new_vma->vm_pgoff = pgoff;
-                       if (new_vma->vm_file)
+                       if (new_vma->vm_file) {
                                 get_file(new_vma->vm_file);
+                               if (vma->vm_flags & VM_EXECUTABLE)
+                                       added_exe_file_vma(mm);
+                       }
                         if (new_vma->vm_ops && new_vma->vm_ops->open)
                                 new_vma->vm_ops->open(new_vma);
                         vma_link(mm, new_vma, prev, rb_link, rb_parent);
@@ -2239,3 +2270,167 @@ int install_special_mapping(struct mm_struct *mm,
  
         return 0;
  }
+
+static DEFINE_MUTEX(mm_all_locks_mutex);
+
+static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
+{
+       if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+               /*
+                * The LSB of head.next can't change from under us
+                * because we hold the mm_all_locks_mutex.
+                */
+               spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
+               /*
+                * We can safely modify head.next after taking the
+                * anon_vma->lock. If some other vma in this mm shares
+                * the same anon_vma we won't take it again.
+                *
+                * No need of atomic instructions here, head.next
+                * can't change from under us thanks to the
+                * anon_vma->lock.
+                */
+               if (__test_and_set_bit(0, (unsigned long *)
+                                      &anon_vma->head.next))
+                       BUG();
+       }
+}
+
+static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
+{
+       if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+               /*
+                * AS_MM_ALL_LOCKS can't change from under us because
+                * we hold the mm_all_locks_mutex.
+                *
+                * Operations on ->flags have to be atomic because
+                * even if AS_MM_ALL_LOCKS is stable thanks to the
+                * mm_all_locks_mutex, there may be other cpus
+                * changing other bitflags in parallel to us.
+                */
+               if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
+                       BUG();
+               spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+       }
+}
+
+/*
+ * This operation locks against the VM for all pte/vma/mm related
+ * operations that could ever happen on a certain mm. This includes
+ * vmtruncate, try_to_unmap, and all page faults.
+ *
+ * The caller must take the mmap_sem in write mode before calling
+ * mm_take_all_locks(). The caller isn't allowed to release the
+ * mmap_sem until mm_drop_all_locks() returns.
+ *
+ * mmap_sem in write mode is required in order to block all operations
+ * that could modify pagetables and free pages without need of
+ * altering the vma layout (for example populate_range() with
+ * nonlinear vmas). It's also needed in write mode to avoid new
+ * anon_vmas to be associated with existing vmas.
+ *
+ * A single task can't take more than one mm_take_all_locks() in a row
+ * or it would deadlock.
+ *
+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * mapping->flags avoid to take the same lock twice, if more than one
+ * vma in this mm is backed by the same anon_vma or address_space.
+ *
+ * We can take all the locks in random order because the VM code
+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * takes more than one of them in a row. Secondly we're protected
+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ *
+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
+ * that may have to take thousand of locks.
+ *
+ * mm_take_all_locks() can fail if it's interrupted by signals.
+ */
+int mm_take_all_locks(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+       int ret = -EINTR;
+
+       BUG_ON(down_read_trylock(&mm->mmap_sem));
+
+       mutex_lock(&mm_all_locks_mutex);
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (signal_pending(current))
+                       goto out_unlock;
+               if (vma->vm_file && vma->vm_file->f_mapping)
+                       vm_lock_mapping(mm, vma->vm_file->f_mapping);
+       }
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (signal_pending(current))
+                       goto out_unlock;
+               if (vma->anon_vma)
+                       vm_lock_anon_vma(mm, vma->anon_vma);
+       }
+
+       ret = 0;
+
+out_unlock:
+       if (ret)
+               mm_drop_all_locks(mm);
+
+       return ret;
+}
+
+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+       if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+               /*
+                * The LSB of head.next can't change to 0 from under
+                * us because we hold the mm_all_locks_mutex.
+                *
+                * We must however clear the bitflag before unlocking
+                * the vma so the users using the anon_vma->head will
+                * never see our bitflag.
+                *
+                * No need of atomic instructions here, head.next
+                * can't change from under us until we release the
+                * anon_vma->lock.
+                */
+               if (!__test_and_clear_bit(0, (unsigned long *)
+                                         &anon_vma->head.next))
+                       BUG();
+               spin_unlock(&anon_vma->lock);
+       }
+}
+
+static void vm_unlock_mapping(struct address_space *mapping)
+{
+       if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+               /*
+                * AS_MM_ALL_LOCKS can't change to 0 from under us
+                * because we hold the mm_all_locks_mutex.
+                */
+               spin_unlock(&mapping->i_mmap_lock);
+               if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
+                                       &mapping->flags))
+                       BUG();
+       }
+}
+
+/*
+ * The mmap_sem cannot be released by the caller until
+ * mm_drop_all_locks() returns.
+ */
+void mm_drop_all_locks(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+
+       BUG_ON(down_read_trylock(&mm->mmap_sem));
+       BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (vma->anon_vma)
+                       vm_unlock_anon_vma(vma->anon_vma);
+               if (vma->vm_file && vma->vm_file->f_mapping)
+                       vm_unlock_mapping(vma->vm_file->f_mapping);
+       }
+
+       mutex_unlock(&mm_all_locks_mutex);
+}