memcg: fix deadlock between lock_page_cgroup and mapping tree_lock
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Thu, 28 May 2009 21:34:28 +0000 (14:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 May 2009 15:40:02 +0000 (08:40 -0700)
mapping->tree_lock can be acquired from interrupt context.  Then,
following dead lock can occur.

Assume "A" as a page.

 CPU0:
       lock_page_cgroup(A)
interrupted
-> take mapping->tree_lock.
 CPU1:
       take mapping->tree_lock
-> lock_page_cgroup(A)

This patch tries to fix above deadlock by moving memcg's hook to out of
mapping->tree_lock.  charge/uncharge of pagecache/swapcache is protected
by page lock, not tree_lock.

After this patch, lock_page_cgroup() is not called under mapping->tree_lock.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/swap.h
mm/filemap.c
mm/memcontrol.c
mm/swap_state.c
mm/truncate.c
mm/vmscan.c

index 62d8143..d476aad 100644 (file)
@@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page,
        return 0;
 }
 
+static inline void
+mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
+{
+}
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
index 379ff0b..1b60f30 100644 (file)
@@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page)
        mapping->nrpages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        BUG_ON(page_mapped(page));
-       mem_cgroup_uncharge_cache_page(page);
 
        /*
         * Some filesystems seem to re-dirty the page even after
@@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page)
        spin_lock_irq(&mapping->tree_lock);
        __remove_from_page_cache(page);
        spin_unlock_irq(&mapping->tree_lock);
+       mem_cgroup_uncharge_cache_page(page);
 }
 
 static int sync_page(void *word)
@@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
                if (likely(!error)) {
                        mapping->nrpages++;
                        __inc_zone_page_state(page, NR_FILE_PAGES);
+                       spin_unlock_irq(&mapping->tree_lock);
                } else {
                        page->mapping = NULL;
+                       spin_unlock_irq(&mapping->tree_lock);
                        mem_cgroup_uncharge_cache_page(page);
                        page_cache_release(page);
                }
-
-               spin_unlock_irq(&mapping->tree_lock);
                radix_tree_preload_end();
        } else
                mem_cgroup_uncharge_cache_page(page);
index 01c2d8f..4a747a2 100644 (file)
@@ -1488,8 +1488,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
        __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
 }
 
+#ifdef CONFIG_SWAP
 /*
- * called from __delete_from_swap_cache() and drop "page" account.
+ * called after __delete_from_swap_cache() and drop "page" account.
  * memcg information is recorded to swap_cgroup of "ent"
  */
 void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
@@ -1506,6 +1507,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
        if (memcg)
                css_put(&memcg->css);
 }
+#endif
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 /*
index 3ecea98..1416e7e 100644 (file)
@@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  */
 void __delete_from_swap_cache(struct page *page)
 {
-       swp_entry_t ent = {.val = page_private(page)};
-
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(!PageSwapCache(page));
        VM_BUG_ON(PageWriteback(page));
@@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page)
        total_swapcache_pages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        INC_CACHE_INFO(del_total);
-       mem_cgroup_uncharge_swapcache(page, ent);
 }
 
 /**
@@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page)
        __delete_from_swap_cache(page);
        spin_unlock_irq(&swapper_space.tree_lock);
 
+       mem_cgroup_uncharge_swapcache(page, entry);
        swap_free(entry);
        page_cache_release(page);
 }
index 55206fa..12e1579 100644 (file)
@@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
        BUG_ON(page_has_private(page));
        __remove_from_page_cache(page);
        spin_unlock_irq(&mapping->tree_lock);
+       mem_cgroup_uncharge_cache_page(page);
        page_cache_release(page);       /* pagecache ref */
        return 1;
 failed:
index 5fa3eda..d254306 100644 (file)
@@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
                swp_entry_t swap = { .val = page_private(page) };
                __delete_from_swap_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
+               mem_cgroup_uncharge_swapcache(page, swap);
                swap_free(swap);
        } else {
                __remove_from_page_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
+               mem_cgroup_uncharge_cache_page(page);
        }
 
        return 1;