mm: add_to_swap_cache() must not sleep
[safe/jmp/linux-2.6] / mm / rmap.c
index 3209825..720fc03 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -14,7 +14,7 @@
  * Original design by Rik van Riel <riel@conectiva.com.br> 2001
  * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
  * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
- * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004
+ * Contributions by Hugh Dickins 2003, 2004
  */
 
 /*
@@ -333,7 +333,9 @@ static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
  * repeatedly from either page_referenced_anon or page_referenced_file.
  */
 static int page_referenced_one(struct page *page,
-       struct vm_area_struct *vma, unsigned int *mapcount)
+                              struct vm_area_struct *vma,
+                              unsigned int *mapcount,
+                              unsigned long *vm_flags)
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long address;
@@ -356,6 +358,7 @@ static int page_referenced_one(struct page *page,
         */
        if (vma->vm_flags & VM_LOCKED) {
                *mapcount = 1;  /* break early from loop */
+               *vm_flags |= VM_LOCKED;
                goto out_unmap;
        }
 
@@ -381,11 +384,14 @@ out_unmap:
        (*mapcount)--;
        pte_unmap_unlock(pte, ptl);
 out:
+       if (referenced)
+               *vm_flags |= vma->vm_flags;
        return referenced;
 }
 
 static int page_referenced_anon(struct page *page,
-                               struct mem_cgroup *mem_cont)
+                               struct mem_cgroup *mem_cont,
+                               unsigned long *vm_flags)
 {
        unsigned int mapcount;
        struct anon_vma *anon_vma;
@@ -405,7 +411,8 @@ static int page_referenced_anon(struct page *page,
                 */
                if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
                        continue;
-               referenced += page_referenced_one(page, vma, &mapcount);
+               referenced += page_referenced_one(page, vma,
+                                                 &mapcount, vm_flags);
                if (!mapcount)
                        break;
        }
@@ -418,6 +425,7 @@ static int page_referenced_anon(struct page *page,
  * page_referenced_file - referenced check for object-based rmap
  * @page: the page we're checking references on.
  * @mem_cont: target memory controller
+ * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * For an object-based mapped page, find all the places it is mapped and
  * check/clear the referenced flag.  This is done by following the page->mapping
@@ -427,7 +435,8 @@ static int page_referenced_anon(struct page *page,
  * This function is only called from page_referenced for object-based pages.
  */
 static int page_referenced_file(struct page *page,
-                               struct mem_cgroup *mem_cont)
+                               struct mem_cgroup *mem_cont,
+                               unsigned long *vm_flags)
 {
        unsigned int mapcount;
        struct address_space *mapping = page->mapping;
@@ -467,7 +476,8 @@ static int page_referenced_file(struct page *page,
                 */
                if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
                        continue;
-               referenced += page_referenced_one(page, vma, &mapcount);
+               referenced += page_referenced_one(page, vma,
+                                                 &mapcount, vm_flags);
                if (!mapcount)
                        break;
        }
@@ -481,29 +491,35 @@ static int page_referenced_file(struct page *page,
  * @page: the page to test
  * @is_locked: caller holds lock on the page
  * @mem_cont: target memory controller
+ * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of ptes which referenced the page.
  */
-int page_referenced(struct page *page, int is_locked,
-                       struct mem_cgroup *mem_cont)
+int page_referenced(struct page *page,
+                   int is_locked,
+                   struct mem_cgroup *mem_cont,
+                   unsigned long *vm_flags)
 {
        int referenced = 0;
 
        if (TestClearPageReferenced(page))
                referenced++;
 
+       *vm_flags = 0;
        if (page_mapped(page) && page->mapping) {
                if (PageAnon(page))
-                       referenced += page_referenced_anon(page, mem_cont);
+                       referenced += page_referenced_anon(page, mem_cont,
+                                                               vm_flags);
                else if (is_locked)
-                       referenced += page_referenced_file(page, mem_cont);
+                       referenced += page_referenced_file(page, mem_cont,
+                                                               vm_flags);
                else if (!trylock_page(page))
                        referenced++;
                else {
                        if (page->mapping)
-                               referenced +=
-                                       page_referenced_file(page, mem_cont);
+                               referenced += page_referenced_file(page,
+                                                       mem_cont, vm_flags);
                        unlock_page(page);
                }
        }
@@ -688,68 +704,51 @@ void page_add_new_anon_rmap(struct page *page,
  */
 void page_add_file_rmap(struct page *page)
 {
-       if (atomic_inc_and_test(&page->_mapcount))
+       if (atomic_inc_and_test(&page->_mapcount)) {
                __inc_zone_page_state(page, NR_FILE_MAPPED);
+               mem_cgroup_update_mapped_file_stat(page, 1);
+       }
 }
 
-#ifdef CONFIG_DEBUG_VM
-/**
- * page_dup_rmap - duplicate pte mapping to a page
- * @page:      the page to add the mapping to
- * @vma:       the vm area being duplicated
- * @address:   the user virtual address mapped
- *
- * For copy_page_range only: minimal extract from page_add_file_rmap /
- * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
- * quicker.
- *
- * The caller needs to hold the pte lock.
- */
-void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
-{
-       BUG_ON(page_mapcount(page) == 0);
-       if (PageAnon(page))
-               __page_check_anon_rmap(page, vma, address);
-       atomic_inc(&page->_mapcount);
-}
-#endif
-
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
- * @vma: the vm area in which the mapping is removed
  *
  * The caller needs to hold the pte lock.
  */
-void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
+void page_remove_rmap(struct page *page)
 {
-       if (atomic_add_negative(-1, &page->_mapcount)) {
-               /*
-                * Now that the last pte has gone, s390 must transfer dirty
-                * flag from storage key to struct page.  We can usually skip
-                * this if the page is anon, so about to be freed; but perhaps
-                * not if it's in swapcache - there might be another pte slot
-                * containing the swap entry, but page not yet written to swap.
-                */
-               if ((!PageAnon(page) || PageSwapCache(page)) &&
-                   page_test_dirty(page)) {
-                       page_clear_dirty(page);
-                       set_page_dirty(page);
-               }
-               if (PageAnon(page))
-                       mem_cgroup_uncharge_page(page);
-               __dec_zone_page_state(page,
-                       PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
-               /*
-                * It would be tidy to reset the PageAnon mapping here,
-                * but that might overwrite a racing page_add_anon_rmap
-                * which increments mapcount after us but sets mapping
-                * before us: so leave the reset to free_hot_cold_page,
-                * and remember that it's only reliable while mapped.
-                * Leaving it set also helps swapoff to reinstate ptes
-                * faster for those pages still in swapcache.
-                */
+       /* page still mapped by someone else? */
+       if (!atomic_add_negative(-1, &page->_mapcount))
+               return;
+
+       /*
+        * Now that the last pte has gone, s390 must transfer dirty
+        * flag from storage key to struct page.  We can usually skip
+        * this if the page is anon, so about to be freed; but perhaps
+        * not if it's in swapcache - there might be another pte slot
+        * containing the swap entry, but page not yet written to swap.
+        */
+       if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) {
+               page_clear_dirty(page);
+               set_page_dirty(page);
+       }
+       if (PageAnon(page)) {
+               mem_cgroup_uncharge_page(page);
+               __dec_zone_page_state(page, NR_ANON_PAGES);
+       } else {
+               __dec_zone_page_state(page, NR_FILE_MAPPED);
        }
+       mem_cgroup_update_mapped_file_stat(page, -1);
+       /*
+        * It would be tidy to reset the PageAnon mapping here,
+        * but that might overwrite a racing page_add_anon_rmap
+        * which increments mapcount after us but sets mapping
+        * before us: so leave the reset to free_hot_cold_page,
+        * and remember that it's only reliable while mapped.
+        * Leaving it set also helps swapoff to reinstate ptes
+        * faster for those pages still in swapcache.
+        */
 }
 
 /*
@@ -837,7 +836,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                dec_mm_counter(mm, file_rss);
 
 
-       page_remove_rmap(page, vma);
+       page_remove_rmap(page);
        page_cache_release(page);
 
 out_unmap:
@@ -952,7 +951,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
                if (pte_dirty(pteval))
                        set_page_dirty(page);
 
-               page_remove_rmap(page, vma);
+               page_remove_rmap(page);
                page_cache_release(page);
                dec_mm_counter(mm, file_rss);
                (*mapcount)--;
@@ -1074,7 +1073,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
        spin_lock(&mapping->i_mmap_lock);
        vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
                if (MLOCK_PAGES && unlikely(unlock)) {
-                       if (!(vma->vm_flags & VM_LOCKED))
+                       if (!((vma->vm_flags & VM_LOCKED) &&
+                                               page_mapped_in_vma(page, vma)))
                                continue;       /* must visit all vmas */
                        ret = SWAP_MLOCK;
                } else {
@@ -1203,7 +1203,6 @@ int try_to_unmap(struct page *page, int migration)
        return ret;
 }
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 /**
  * try_to_munlock - try to munlock a page
  * @page: the page to be munlocked
@@ -1227,4 +1226,4 @@ int try_to_munlock(struct page *page)
        else
                return try_to_unmap_file(page, 1, 0);
 }
-#endif
+