mmu-notifiers: add mm_take_all_locks() operation

author Andrea Arcangeli <andrea@qumranet.com>

Mon, 28 Jul 2008 22:46:26 +0000 (15:46 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 28 Jul 2008 23:30:21 +0000 (16:30 -0700)
author Andrea Arcangeli <andrea@qumranet.com>
Mon, 28 Jul 2008 22:46:26 +0000 (15:46 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Jul 2008 23:30:21 +0000 (16:30 -0700)
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 6e695ea..866a3db 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
         unsigned long addr, unsigned long len, pgoff_t pgoff);
  extern void exit_mmap(struct mm_struct *);
  
+extern int mm_take_all_locks(struct mm_struct *mm);
+extern void mm_drop_all_locks(struct mm_struct *mm);
+
  #ifdef CONFIG_PROC_FS
  /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
  extern void added_exe_file_vma(struct mm_struct *mm);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h

index a81d818..a39b38c 100644 (file)
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -20,6 +20,7 @@
   */
  #define        AS_EIO          (__GFP_BITS_SHIFT + 0)  /* IO error on async write */
  #define AS_ENOSPC      (__GFP_BITS_SHIFT + 1)  /* ENOSPC on async write */
+#define AS_MM_ALL_LOCKS        (__GFP_BITS_SHIFT + 2)  /* under mm_take_all_locks() */
  
  static inline void mapping_set_error(struct address_space *mapping, int error)
  {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h

index 1383692..69407f8 100644 (file)
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,14 @@
   */
  struct anon_vma {
         spinlock_t lock;        /* Serialize access to vma list */
+       /*
+        * NOTE: the LSB of the head.next is set by
+        * mm_take_all_locks() _after_ taking the above lock. So the
+        * head must only be read/written after taking the above lock
+        * to be sure to see a valid next pointer. The LSB bit itself
+        * is serialized by a system wide lock only visible to
+        * mm_take_all_locks() (mm_all_locks_mutex).
+        */
         struct list_head head;  /* List of private "related" vmas */
  };
  
diff --git a/mm/mmap.c b/mm/mmap.c

index 5e0cc99..e5f9cb8 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2268,3 +2268,161 @@ int install_special_mapping(struct mm_struct *mm,
  
         return 0;
  }
+
+static DEFINE_MUTEX(mm_all_locks_mutex);
+
+static void vm_lock_anon_vma(struct anon_vma *anon_vma)
+{
+       if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+               /*
+                * The LSB of head.next can't change from under us
+                * because we hold the mm_all_locks_mutex.
+                */
+               spin_lock(&anon_vma->lock);
+               /*
+                * We can safely modify head.next after taking the
+                * anon_vma->lock. If some other vma in this mm shares
+                * the same anon_vma we won't take it again.
+                *
+                * No need of atomic instructions here, head.next
+                * can't change from under us thanks to the
+                * anon_vma->lock.
+                */
+               if (__test_and_set_bit(0, (unsigned long *)
+                                      &anon_vma->head.next))
+                       BUG();
+       }
+}
+
+static void vm_lock_mapping(struct address_space *mapping)
+{
+       if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+               /*
+                * AS_MM_ALL_LOCKS can't change from under us because
+                * we hold the mm_all_locks_mutex.
+                *
+                * Operations on ->flags have to be atomic because
+                * even if AS_MM_ALL_LOCKS is stable thanks to the
+                * mm_all_locks_mutex, there may be other cpus
+                * changing other bitflags in parallel to us.
+                */
+               if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
+                       BUG();
+               spin_lock(&mapping->i_mmap_lock);
+       }
+}
+
+/*
+ * This operation locks against the VM for all pte/vma/mm related
+ * operations that could ever happen on a certain mm. This includes
+ * vmtruncate, try_to_unmap, and all page faults.
+ *
+ * The caller must take the mmap_sem in write mode before calling
+ * mm_take_all_locks(). The caller isn't allowed to release the
+ * mmap_sem until mm_drop_all_locks() returns.
+ *
+ * mmap_sem in write mode is required in order to block all operations
+ * that could modify pagetables and free pages without need of
+ * altering the vma layout (for example populate_range() with
+ * nonlinear vmas). It's also needed in write mode to avoid new
+ * anon_vmas to be associated with existing vmas.
+ *
+ * A single task can't take more than one mm_take_all_locks() in a row
+ * or it would deadlock.
+ *
+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * mapping->flags avoid to take the same lock twice, if more than one
+ * vma in this mm is backed by the same anon_vma or address_space.
+ *
+ * We can take all the locks in random order because the VM code
+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * takes more than one of them in a row. Secondly we're protected
+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ *
+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
+ * that may have to take thousand of locks.
+ *
+ * mm_take_all_locks() can fail if it's interrupted by signals.
+ */
+int mm_take_all_locks(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+       int ret = -EINTR;
+
+       BUG_ON(down_read_trylock(&mm->mmap_sem));
+
+       mutex_lock(&mm_all_locks_mutex);
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (signal_pending(current))
+                       goto out_unlock;
+               if (vma->anon_vma)
+                       vm_lock_anon_vma(vma->anon_vma);
+               if (vma->vm_file && vma->vm_file->f_mapping)
+                       vm_lock_mapping(vma->vm_file->f_mapping);
+       }
+       ret = 0;
+
+out_unlock:
+       if (ret)
+               mm_drop_all_locks(mm);
+
+       return ret;
+}
+
+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+       if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+               /*
+                * The LSB of head.next can't change to 0 from under
+                * us because we hold the mm_all_locks_mutex.
+                *
+                * We must however clear the bitflag before unlocking
+                * the vma so the users using the anon_vma->head will
+                * never see our bitflag.
+                *
+                * No need of atomic instructions here, head.next
+                * can't change from under us until we release the
+                * anon_vma->lock.
+                */
+               if (!__test_and_clear_bit(0, (unsigned long *)
+                                         &anon_vma->head.next))
+                       BUG();
+               spin_unlock(&anon_vma->lock);
+       }
+}
+
+static void vm_unlock_mapping(struct address_space *mapping)
+{
+       if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+               /*
+                * AS_MM_ALL_LOCKS can't change to 0 from under us
+                * because we hold the mm_all_locks_mutex.
+                */
+               spin_unlock(&mapping->i_mmap_lock);
+               if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
+                                       &mapping->flags))
+                       BUG();
+       }
+}
+
+/*
+ * The mmap_sem cannot be released by the caller until
+ * mm_drop_all_locks() returns.
+ */
+void mm_drop_all_locks(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+
+       BUG_ON(down_read_trylock(&mm->mmap_sem));
+       BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (vma->anon_vma)
+                       vm_unlock_anon_vma(vma->anon_vma);
+               if (vma->vm_file && vma->vm_file->f_mapping)
+                       vm_unlock_mapping(vma->vm_file->f_mapping);
+       }
+
+       mutex_unlock(&mm_all_locks_mutex);
+}
author	Andrea Arcangeli <andrea@qumranet.com>
	Mon, 28 Jul 2008 22:46:26 +0000 (15:46 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 28 Jul 2008 23:30:21 +0000 (16:30 -0700)
include/linux/mm.h		patch \| blob \| history
include/linux/pagemap.h		patch \| blob \| history
include/linux/rmap.h		patch \| blob \| history
mm/mmap.c		patch \| blob \| history