memcg: fix race in file_mapped accounting
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tue, 6 Apr 2010 21:35:05 +0000 (14:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 7 Apr 2010 15:38:05 +0000 (08:38 -0700)
Presently, memcg's FILE_MAPPED accounting has following race with
move_account (happens at rmdir()).

    increment page->mapcount (rmap.c)
    mem_cgroup_update_file_mapped()           move_account()
      lock_page_cgroup()
      check page_mapped() if
      page_mapped(page)>1 {
FILE_MAPPED -1 from old memcg
FILE_MAPPED +1 to old memcg
      }
      .....
      overwrite pc->mem_cgroup
      unlock_page_cgroup()
    lock_page_cgroup()
    FILE_MAPPED + 1 to pc->mem_cgroup
    unlock_page_cgroup()

Then,
old memcg (-1 file mapped)
new memcg (+2 file mapped)

This happens because move_account see page_mapped() which is not guarded
by lock_page_cgroup().  This patch adds FILE_MAPPED flag to page_cgroup
and move account information based on it.  Now, all checks are synchronous
with lock_page_cgroup().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Balbir Singh <balbir@in.ibm.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Andrea Righi <arighi@develer.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/page_cgroup.h
mm/memcontrol.c

index 30b0813..aef22ae 100644 (file)
@@ -39,6 +39,7 @@ enum {
        PCG_CACHE, /* charged as cache */
        PCG_USED, /* this object is in use. */
        PCG_ACCT_LRU, /* page has been accounted for */
+       PCG_FILE_MAPPED, /* page is accounted as "mapped" */
 };
 
 #define TESTPCGFLAG(uname, lname)                      \
@@ -73,6 +74,11 @@ CLEARPCGFLAG(AcctLRU, ACCT_LRU)
 TESTPCGFLAG(AcctLRU, ACCT_LRU)
 TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
 
+
+SETPCGFLAG(FileMapped, FILE_MAPPED)
+CLEARPCGFLAG(FileMapped, FILE_MAPPED)
+TESTPCGFLAG(FileMapped, FILE_MAPPED)
+
 static inline int page_cgroup_nid(struct page_cgroup *pc)
 {
        return page_to_nid(pc->page);
index 9ed760d..f4ede99 100644 (file)
@@ -1359,16 +1359,19 @@ void mem_cgroup_update_file_mapped(struct page *page, int val)
 
        lock_page_cgroup(pc);
        mem = pc->mem_cgroup;
-       if (!mem)
-               goto done;
-
-       if (!PageCgroupUsed(pc))
+       if (!mem || !PageCgroupUsed(pc))
                goto done;
 
        /*
         * Preemption is already disabled. We can use __this_cpu_xxx
         */
-       __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], val);
+       if (val > 0) {
+               __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+               SetPageCgroupFileMapped(pc);
+       } else {
+               __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+               ClearPageCgroupFileMapped(pc);
+       }
 
 done:
        unlock_page_cgroup(pc);
@@ -1801,16 +1804,13 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 static void __mem_cgroup_move_account(struct page_cgroup *pc,
        struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
 {
-       struct page *page;
-
        VM_BUG_ON(from == to);
        VM_BUG_ON(PageLRU(pc->page));
        VM_BUG_ON(!PageCgroupLocked(pc));
        VM_BUG_ON(!PageCgroupUsed(pc));
        VM_BUG_ON(pc->mem_cgroup != from);
 
-       page = pc->page;
-       if (page_mapped(page) && !PageAnon(page)) {
+       if (PageCgroupFileMapped(pc)) {
                /* Update mapped_file data for mem_cgroup */
                preempt_disable();
                __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);