[PATCH] mm: unmap_vmas with inner ptlock

author Hugh Dickins <hugh@veritas.com>

Sun, 30 Oct 2005 01:16:30 +0000 (18:16 -0700)

committer Linus Torvalds <torvalds@g5.osdl.org>

Sun, 30 Oct 2005 04:40:41 +0000 (21:40 -0700)
author Hugh Dickins <hugh@veritas.com>
Sun, 30 Oct 2005 01:16:30 +0000 (18:16 -0700)
committer Linus Torvalds <torvalds@g5.osdl.org>
Sun, 30 Oct 2005 04:40:41 +0000 (21:40 -0700)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 3a9b6d1..a826a8a 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -92,7 +92,7 @@ out:
  }
  
  /*
- * Called under down_write(mmap_sem), page_table_lock is not held
+ * Called under down_write(mmap_sem).
   */
  
  #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
@@ -308,7 +308,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
  
         vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
                 unsigned long h_vm_pgoff;
-               unsigned long v_length;
                 unsigned long v_offset;
  
                 h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
@@ -319,11 +318,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
                 if (h_vm_pgoff >= h_pgoff)
                         v_offset = 0;
  
-               v_length = vma->vm_end - vma->vm_start;
-
-               zap_hugepage_range(vma,
-                               vma->vm_start + v_offset,
-                               v_length - v_offset);
+               unmap_hugepage_range(vma,
+                               vma->vm_start + v_offset, vma->vm_end);
         }
  }
  
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index d664330..0cea162 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
  int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
  int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
  int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
-void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
  void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
  int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
  int hugetlb_report_meminfo(char *);
@@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void)
  #define follow_huge_addr(mm, addr, write)      ERR_PTR(-EINVAL)
  #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
  #define hugetlb_prefault(mapping, vma)         ({ BUG(); 0; })
-#define zap_hugepage_range(vma, start, len)    BUG()
  #define unmap_hugepage_range(vma, start, end)  BUG()
  #define is_hugepage_mem_enough(size)           0
  #define hugetlb_report_meminfo(buf)            0
diff --git a/include/linux/mm.h b/include/linux/mm.h

index d4c3512..972e2ce 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -682,7 +682,7 @@ struct zap_details {
  
  unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
                 unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlb,
                 struct vm_area_struct *start_vma, unsigned long start_addr,
                 unsigned long end_addr, unsigned long *nr_accounted,
                 struct zap_details *);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index ea0826f..f29b7dc 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -314,6 +314,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
         BUG_ON(start & ~HPAGE_MASK);
         BUG_ON(end & ~HPAGE_MASK);
  
+       spin_lock(&mm->page_table_lock);
+
         /* Update high watermark before we lower rss */
         update_hiwater_rss(mm);
  
@@ -333,17 +335,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
                 put_page(page);
                 add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
         }
-       flush_tlb_range(vma, start, end);
-}
  
-void zap_hugepage_range(struct vm_area_struct *vma,
-                       unsigned long start, unsigned long length)
-{
-       struct mm_struct *mm = vma->vm_mm;
-
-       spin_lock(&mm->page_table_lock);
-       unmap_hugepage_range(vma, start, start + length);
         spin_unlock(&mm->page_table_lock);
+       flush_tlb_range(vma, start, end);
  }
  
  int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
diff --git a/mm/memory.c b/mm/memory.c

index 4ea89a2..622a4ef 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -551,10 +551,11 @@ static void zap_pte_range(struct mmu_gather *tlb,
  {
         struct mm_struct *mm = tlb->mm;
         pte_t *pte;
+       spinlock_t *ptl;
         int file_rss = 0;
         int anon_rss = 0;
  
-       pte = pte_offset_map(pmd, addr);
+       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
         do {
                 pte_t ptent = *pte;
                 if (pte_none(ptent))
@@ -621,7 +622,7 @@ static void zap_pte_range(struct mmu_gather *tlb,
         } while (pte++, addr += PAGE_SIZE, addr != end);
  
         add_mm_rss(mm, file_rss, anon_rss);
-       pte_unmap(pte - 1);
+       pte_unmap_unlock(pte - 1, ptl);
  }
  
  static inline void zap_pmd_range(struct mmu_gather *tlb,
@@ -690,7 +691,6 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
  /**
   * unmap_vmas - unmap a range of memory covered by a list of vma's
   * @tlbp: address of the caller's struct mmu_gather
- * @mm: the controlling mm_struct
   * @vma: the starting vma
   * @start_addr: virtual address at which to start unmapping
   * @end_addr: virtual address at which to end unmapping
@@ -699,10 +699,10 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
   *
   * Returns the end address of the unmapping (restart addr if interrupted).
   *
- * Unmap all pages in the vma list.  Called under page_table_lock.
+ * Unmap all pages in the vma list.
   *
- * We aim to not hold page_table_lock for too long (for scheduling latency
- * reasons).  So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
+ * We aim to not hold locks for too long (for scheduling latency reasons).
+ * So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
   * return the ending mmu_gather to the caller.
   *
   * Only addresses between `start' and `end' will be unmapped.
@@ -714,7 +714,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
   * drops the lock and schedules.
   */
-unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp,
                 struct vm_area_struct *vma, unsigned long start_addr,
                 unsigned long end_addr, unsigned long *nr_accounted,
                 struct zap_details *details)
@@ -764,19 +764,15 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                         tlb_finish_mmu(*tlbp, tlb_start, start);
  
                         if (need_resched() ||
-                               need_lockbreak(&mm->page_table_lock) ||
                                 (i_mmap_lock && need_lockbreak(i_mmap_lock))) {
                                 if (i_mmap_lock) {
-                                       /* must reset count of rss freed */
-                                       *tlbp = tlb_gather_mmu(mm, fullmm);
+                                       *tlbp = NULL;
                                         goto out;
                                 }
-                               spin_unlock(&mm->page_table_lock);
                                 cond_resched();
-                               spin_lock(&mm->page_table_lock);
                         }
  
-                       *tlbp = tlb_gather_mmu(mm, fullmm);
+                       *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
                         tlb_start_valid = 0;
                         zap_bytes = ZAP_BLOCK_SIZE;
                 }
@@ -800,18 +796,12 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
         unsigned long end = address + size;
         unsigned long nr_accounted = 0;
  
-       if (is_vm_hugetlb_page(vma)) {
-               zap_hugepage_range(vma, address, size);
-               return end;
-       }
-
         lru_add_drain();
         tlb = tlb_gather_mmu(mm, 0);
         update_hiwater_rss(mm);
-       spin_lock(&mm->page_table_lock);
-       end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
-       spin_unlock(&mm->page_table_lock);
-       tlb_finish_mmu(tlb, address, end);
+       end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+       if (tlb)
+               tlb_finish_mmu(tlb, address, end);
         return end;
  }
  
@@ -1434,13 +1424,6 @@ again:
  
         restart_addr = zap_page_range(vma, start_addr,
                                         end_addr - start_addr, details);
-
-       /*
-        * We cannot rely on the break test in unmap_vmas:
-        * on the one hand, we don't want to restart our loop
-        * just because that broke out for the page_table_lock;
-        * on the other hand, it does no test when vma is small.
-        */
         need_break = need_resched() ||
                         need_lockbreak(details->i_mmap_lock);
  
diff --git a/mm/mmap.c b/mm/mmap.c

index fa35323..5ecc2cf 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1673,9 +1673,7 @@ static void unmap_region(struct mm_struct *mm,
         lru_add_drain();
         tlb = tlb_gather_mmu(mm, 0);
         update_hiwater_rss(mm);
-       spin_lock(&mm->page_table_lock);
-       unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
-       spin_unlock(&mm->page_table_lock);
+       unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
         free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
                                  next? next->vm_start: 0);
@@ -1958,9 +1956,7 @@ void exit_mmap(struct mm_struct *mm)
         tlb = tlb_gather_mmu(mm, 1);
         /* Don't update_hiwater_rss(mm) here, do_exit already did */
         /* Use -1 here to ensure all VMAs in the mm are unmapped */
-       spin_lock(&mm->page_table_lock);
-       end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
-       spin_unlock(&mm->page_table_lock);
+       end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
         free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
         tlb_finish_mmu(tlb, 0, end);
author	Hugh Dickins <hugh@veritas.com>
	Sun, 30 Oct 2005 01:16:30 +0000 (18:16 -0700)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Sun, 30 Oct 2005 04:40:41 +0000 (21:40 -0700)
fs/hugetlbfs/inode.c		patch \| blob \| history
include/linux/hugetlb.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history